Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix check-added-large-files --enforce-all to correctly consider all git-lfs files. #674

Merged
merged 1 commit into from Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 22 additions & 11 deletions pre_commit_hooks/check_added_large_files.py
@@ -1,24 +1,33 @@
import argparse
import json
import math
import os
import subprocess
from typing import Optional
from typing import Sequence
from typing import Set

from pre_commit_hooks.util import added_files
from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit


def lfs_files() -> Set[str]:
try:
# Introduced in git-lfs 2.2.0, first working in 2.2.1
lfs_ret = cmd_output('git', 'lfs', 'status', '--json')
except CalledProcessError: # pragma: no cover (with git-lfs)
lfs_ret = '{"files":{}}'
def filter_lfs_files(filenames: Set[str]) -> None: # pragma: no cover (lfs)
"""Remove files tracked by git-lfs from the set."""
if not filenames:
return

return set(json.loads(lfs_ret)['files'])
check_attr = subprocess.run(
('git', 'check-attr', 'filter', '-z', '--stdin'),
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8',
check=True,
input='\0'.join(filenames),
)
stdout = zsplit(check_attr.stdout)
for i in range(0, len(stdout), 3):
filename, filter_tag = stdout[i], stdout[i + 2]
if filter_tag == 'lfs':
filenames.remove(filename)


def find_large_added_files(
Expand All @@ -30,7 +39,9 @@ def find_large_added_files(
# Find all added files that are also in the list of files pre-commit tells
# us about
retv = 0
filenames_filtered = set(filenames) - lfs_files()
filenames_filtered = set(filenames)
filter_lfs_files(filenames_filtered)

if not enforce_all:
filenames_filtered &= added_files()

Expand Down
13 changes: 13 additions & 0 deletions tests/check_added_large_files_test.py
Expand Up @@ -121,3 +121,16 @@ def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
cmd_output('git', 'add', '--', '.')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0


@xfailif_no_gitlfs # pragma: no cover
def test_enforce_allows_gitlfs_after_commit(temp_git_dir, monkeypatch):
with temp_git_dir.as_cwd():
monkeypatch.setenv('HOME', str(temp_git_dir))
cmd_output('git', 'lfs', 'install')
temp_git_dir.join('f.py').write('a' * 10000)
cmd_output('git', 'lfs', 'track', 'f.py')
cmd_output('git', 'add', '--', '.')
git_commit('-am', 'foo')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0