New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Auto track binary files #828
Changes from 5 commits
a4e9b19
35b480c
57f8613
b8bbe4f
17a1331
5f48d6c
6d27a15
9fe6bb6
cbfdce5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1075,12 +1075,18 @@ def test_is_tracked_with_lfs_with_pattern(self): | |
# This content is 20MB (over 10MB) | ||
large_file = [100] * int(4e6) | ||
|
||
# This content is binary (contains the null character) | ||
binary_file = "\x00\x00\x00\x00" | ||
|
||
with open(f"{WORKING_REPO_DIR}/large_file.txt", "w+") as f: | ||
f.write(json.dumps(large_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/small_file.txt", "w+") as f: | ||
f.write(json.dumps(small_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
os.makedirs(f"{WORKING_REPO_DIR}/dir", exist_ok=True) | ||
|
||
with open(f"{WORKING_REPO_DIR}/dir/large_file.txt", "w+") as f: | ||
|
@@ -1089,20 +1095,30 @@ def test_is_tracked_with_lfs_with_pattern(self): | |
with open(f"{WORKING_REPO_DIR}/dir/small_file.txt", "w+") as f: | ||
f.write(json.dumps(small_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/dir/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
repo.auto_track_large_files("dir") | ||
repo.auto_track_binary_files("dir") | ||
|
||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "large_file.txt")) | ||
) | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "small_file.txt")) | ||
) | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file.txt")) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "dir/large_file.txt")) | ||
) | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "dir/small_file.txt")) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "dir/binary_file.txt")) | ||
) | ||
|
||
def test_auto_track_large_files(self): | ||
repo = Repository(WORKING_REPO_DIR) | ||
|
@@ -1113,35 +1129,48 @@ def test_auto_track_large_files(self): | |
# This content is 20MB (over 10MB) | ||
large_file = [100] * int(4e6) | ||
|
||
# This content is binary (contains the null character) | ||
binary_file = "\x00\x00\x00\x00" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would consider moving this to another test to keep this test focused in large files (or rename this test, although I think different tests would be better) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, what I was mentioning in the description is that this effectively doubles the time spent on these tests, which is already significant. We're currently sitting at 10+ minutes for the tests, which isn't ideal. I'm open to moving it around, but would like to emphasize that it will likely end up taking even longer. Let me know if this compromise is okay for you and I'll move it to its own separate test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! Let's keep as is then 😄 |
||
|
||
with open(f"{WORKING_REPO_DIR}/large_file.txt", "w+") as f: | ||
f.write(json.dumps(large_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/small_file.txt", "w+") as f: | ||
f.write(json.dumps(small_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
repo.auto_track_large_files() | ||
repo.auto_track_binary_files() | ||
|
||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "large_file.txt")) | ||
) | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "small_file.txt")) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file.txt")) | ||
) | ||
|
||
def test_auto_track_large_files_ignored_with_gitignore(self): | ||
repo = Repository(WORKING_REPO_DIR) | ||
|
||
# This content is 20MB (over 10MB) | ||
large_file = [100] * int(4e6) | ||
|
||
# This content is binary (contains the null character) | ||
binary_file = "\x00\x00\x00\x00" | ||
|
||
# Test nested gitignores | ||
os.makedirs(f"{WORKING_REPO_DIR}/directory") | ||
|
||
with open(f"{WORKING_REPO_DIR}/.gitignore", "w+") as f: | ||
f.write("large_file.txt") | ||
f.write("large_file.txt\nbinary_file.txt") | ||
|
||
with open(f"{WORKING_REPO_DIR}/directory/.gitignore", "w+") as f: | ||
f.write("large_file_3.txt") | ||
f.write("large_file_3.txt\nbinary_file_3.txt") | ||
|
||
with open(f"{WORKING_REPO_DIR}/large_file.txt", "w+") as f: | ||
f.write(json.dumps(large_file)) | ||
|
@@ -1157,6 +1186,21 @@ def test_auto_track_large_files_ignored_with_gitignore(self): | |
|
||
repo.auto_track_large_files() | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file_2.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
with open(f"{WORKING_REPO_DIR}/directory/binary_file_3.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
with open(f"{WORKING_REPO_DIR}/directory/binary_file_4.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
repo.auto_track_binary_files() | ||
|
||
# Large files | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "large_file.txt")) | ||
) | ||
|
@@ -1175,7 +1219,26 @@ def test_auto_track_large_files_ignored_with_gitignore(self): | |
) | ||
) | ||
|
||
def test_auto_track_large_files_through_git_add(self): | ||
# Binary files | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file.txt")) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file_2.txt")) | ||
) | ||
|
||
self.assertFalse( | ||
is_tracked_with_lfs( | ||
os.path.join(WORKING_REPO_DIR, "directory/binary_file_3.txt") | ||
) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs( | ||
os.path.join(WORKING_REPO_DIR, "directory/binary_file_4.txt") | ||
) | ||
) | ||
|
||
def test_auto_track_files_through_git_add(self): | ||
repo = Repository(WORKING_REPO_DIR) | ||
|
||
# This content is 5MB (under 10MB) | ||
|
@@ -1184,12 +1247,18 @@ def test_auto_track_large_files_through_git_add(self): | |
# This content is 20MB (over 10MB) | ||
large_file = [100] * int(4e6) | ||
|
||
# This content is binary (contains the null character) | ||
binary_file = "\x00\x00\x00\x00" | ||
|
||
with open(f"{WORKING_REPO_DIR}/large_file.txt", "w+") as f: | ||
f.write(json.dumps(large_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/small_file.txt", "w+") as f: | ||
f.write(json.dumps(small_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
repo.git_add(auto_lfs_track=True) | ||
|
||
self.assertTrue( | ||
|
@@ -1198,8 +1267,11 @@ def test_auto_track_large_files_through_git_add(self): | |
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "small_file.txt")) | ||
) | ||
self.assertTrue( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file.txt")) | ||
) | ||
|
||
def test_auto_no_track_large_files_through_git_add(self): | ||
def test_auto_no_track_files_through_git_add(self): | ||
repo = Repository(WORKING_REPO_DIR) | ||
|
||
# This content is 5MB (under 10MB) | ||
|
@@ -1208,12 +1280,18 @@ def test_auto_no_track_large_files_through_git_add(self): | |
# This content is 20MB (over 10MB) | ||
large_file = [100] * int(4e6) | ||
|
||
# This content is binary (contains the null character) | ||
binary_file = "\x00\x00\x00\x00" | ||
|
||
with open(f"{WORKING_REPO_DIR}/large_file.txt", "w+") as f: | ||
f.write(json.dumps(large_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/small_file.txt", "w+") as f: | ||
f.write(json.dumps(small_file)) | ||
|
||
with open(f"{WORKING_REPO_DIR}/binary_file.txt", "w+") as f: | ||
f.write(binary_file) | ||
|
||
repo.git_add(auto_lfs_track=False) | ||
|
||
self.assertFalse( | ||
|
@@ -1222,6 +1300,9 @@ def test_auto_no_track_large_files_through_git_add(self): | |
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "small_file.txt")) | ||
) | ||
self.assertFalse( | ||
is_tracked_with_lfs(os.path.join(WORKING_REPO_DIR, "binary_file.txt")) | ||
) | ||
|
||
def test_auto_track_updates_removed_gitattributes(self): | ||
repo = Repository(WORKING_REPO_DIR) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than reading the first few bytes, I think this answer gives a more accurate way of checking if the file is binary or not: https://stackoverflow.com/a/7392391/2536294
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I implemented what you propose in 9fe6bb6, let me know if that's what you had in mind. Cc @coyotte508