From 03a65ca35757ed7baf4c02474840d7dcbda5495a Mon Sep 17 00:00:00 2001 From: Alex Martani Date: Thu, 21 Oct 2021 15:29:54 -0700 Subject: [PATCH] Fix check-added-large-files --enforce-all to correctly consider all git-lfs files. `git lfs status` only outputs status for files that are pending some git-lfs related operation. For usage with --enforce-all, we need the list of all files that are tracked, which can be achived by `git lfs ls-files`. Fixes: https://github.com/pre-commit/pre-commit-hooks/issues/560 --- pre_commit_hooks/check_added_large_files.py | 33 ++++++++++++++------- tests/check_added_large_files_test.py | 13 ++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py index 4eaf853d..e1beb4ef 100644 --- a/pre_commit_hooks/check_added_large_files.py +++ b/pre_commit_hooks/check_added_large_files.py @@ -1,24 +1,33 @@ import argparse -import json import math import os +import subprocess from typing import Optional from typing import Sequence from typing import Set from pre_commit_hooks.util import added_files -from pre_commit_hooks.util import CalledProcessError -from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit -def lfs_files() -> Set[str]: - try: - # Introduced in git-lfs 2.2.0, first working in 2.2.1 - lfs_ret = cmd_output('git', 'lfs', 'status', '--json') - except CalledProcessError: # pragma: no cover (with git-lfs) - lfs_ret = '{"files":{}}' +def filter_lfs_files(filenames: Set[str]) -> None: # pragma: no cover (lfs) + """Remove files tracked by git-lfs from the set.""" + if not filenames: + return - return set(json.loads(lfs_ret)['files']) + check_attr = subprocess.run( + ('git', 'check-attr', 'filter', '-z', '--stdin'), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + encoding='utf-8', + check=True, + input='\0'.join(filenames), + ) + stdout = zsplit(check_attr.stdout) + for i in range(0, len(stdout), 3): + filename, filter_tag = stdout[i], stdout[i + 2] + if filter_tag == 'lfs': + filenames.remove(filename) def find_large_added_files( @@ -30,7 +39,9 @@ def find_large_added_files( # Find all added files that are also in the list of files pre-commit tells # us about retv = 0 - filenames_filtered = set(filenames) - lfs_files() + filenames_filtered = set(filenames) + filter_lfs_files(filenames_filtered) + if not enforce_all: filenames_filtered &= added_files() diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py index d38c4f67..5d2239ba 100644 --- a/tests/check_added_large_files_test.py +++ b/tests/check_added_large_files_test.py @@ -121,3 +121,16 @@ def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover cmd_output('git', 'add', '--', '.') # With --enforce-all large files on git lfs should succeed assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0 + + +@xfailif_no_gitlfs # pragma: no cover +def test_enforce_allows_gitlfs_after_commit(temp_git_dir, monkeypatch): + with temp_git_dir.as_cwd(): + monkeypatch.setenv('HOME', str(temp_git_dir)) + cmd_output('git', 'lfs', 'install') + temp_git_dir.join('f.py').write('a' * 10000) + cmd_output('git', 'lfs', 'track', 'f.py') + cmd_output('git', 'add', '--', '.') + git_commit('-am', 'foo') + # With --enforce-all large files on git lfs should succeed + assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0