forked from jupyter/nbdime
/
gitfiles.py
185 lines (152 loc) · 5.85 KB
/
gitfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import io
import os
from collections import deque
from six import string_types
os.environ['GIT_PYTHON_REFRESH'] = 'quiet'
from git import (
Repo, InvalidGitRepositoryError, BadName, NoSuchPathError,
GitCommandNotFound, Diffable
)
from nbdime.vcs.git.filter_integration import apply_possible_filter
from .utils import EXPLICIT_MISSING_FILE, pushd
# Git ref representing the working tree
GitRefWorkingTree = None
# Git ref representing the index
GitRefIndex = Diffable.Index
class BlobWrapper(io.StringIO):
"""StringIO with a name attribute"""
name = ''
def get_repo(path):
"""Gets a Repo for path, also if path is a subdirectory of a repository
Returns a tuple with the Repo object, and a list of subdirectories
between path and the parent repository"""
path = os.path.realpath(os.path.abspath(path))
popped = deque()
while True:
try:
repo = Repo(path)
return (repo, tuple(popped))
except (InvalidGitRepositoryError, NoSuchPathError):
path, pop = os.path.split(path)
if not pop:
raise
popped.appendleft(pop)
def find_repo_root(path):
"""Gets the directory containing the working tree"""
repo = get_repo(path)[0]
return repo.working_tree_dir
def traverse_tree(tree, subdirs):
"""Get the subtree according to given list of subdirectories"""
if len(subdirs) == 0:
return tree
sub = subdirs[0]
# Find first subtree whose name matches sub:
subtree = next((st for st in tree.trees if st.name == sub), None)
if subtree is None:
raise KeyError('%s is not a subtree of %s' % (sub, tree.name))
return traverse_tree(subtree, subdirs[1:])
def is_gitref(candidate):
"""Is candidate a gitref, or is it a file/filename?
Returns false for collisions (e.g. when it is both a valid filename and gitref).
Tests relative to current directory.
"""
return (
(candidate is None or not os.path.exists(candidate)) and
candidate != EXPLICIT_MISSING_FILE and
is_valid_gitref(candidate)
)
def is_valid_gitref(ref, path=None):
"""Checks whether ref is a valid gitref in `path`, per git-rev-parse
"""
try:
repo = get_repo(path or os.curdir)[0]
repo.commit(ref)
return True
except InvalidGitRepositoryError:
return False
except BadName:
return False
def is_path_in_repo(path):
"""Checks whether path is part of a git repository"""
try:
get_repo(path)
return True
except InvalidGitRepositoryError:
return False
def _get_diff_entry_stream(path, blob, ref_name, repo_dir):
"""Get a stream to the notebook, for a given diff entry's path and blob
Returns None if path is not a Notebook file, and EXPLICIT_MISSING_FILE
if path is missing, or the blob is None (unless diffing against working
tree).
"""
if path:
if not path.endswith('.ipynb'):
return None
if ref_name is GitRefWorkingTree:
# Diffing against working copy, use file on disk!
with pushd(repo_dir):
# We are diffing against working dir, so ensure we apply
# any git filters before comparing:
ret = apply_possible_filter(path)
# ret == path means no filter was applied
if ret != path:
return ret
try:
return io.open(path, encoding='utf-8')
except IOError:
return EXPLICIT_MISSING_FILE
elif blob is None:
# GitPython uses a None blob to indicate if a file was deleted or
# added. Workaround for GitPython issue #749.
return EXPLICIT_MISSING_FILE
else:
# There were strange issues with passing blob data_streams around,
# so we solve this by reading into a StringIO buffer.
# The penalty should be low as long as changed_notebooks are used
# properly as an iterator.
f = BlobWrapper(blob.data_stream.read().decode('utf-8'))
f.name = '%s (%s)' % (
path,
ref_name if ref_name != GitRefIndex else '<INDEX>'
)
return f
return EXPLICIT_MISSING_FILE
def changed_notebooks(ref_base, ref_remote, paths=None, repo_dir=None):
"""Iterator over all notebooks in path that has changed between the two git refs
References are all valid values according to git-rev-parse, or one of
the special sentinel values GitRefWorkingTree or GitRefIndex.
Iterator value is a base/remote pair of streams to Notebooks
or EXPLICIT_MISSING_FILE for added/removed files.
"""
repo, popped = get_repo(repo_dir or os.curdir)
if repo_dir is None:
repo_dir = os.path.relpath(repo.working_tree_dir, os.curdir)
if isinstance(paths, string_types):
paths = (paths,)
if paths and popped:
# All paths need to be prepended by popped
paths = [os.path.join(*(popped + (p,))) for p in paths]
# Get tree/index for base
if ref_base == GitRefIndex:
tree_base = repo.index
else:
tree_base = repo.commit(ref_base).tree
if ref_remote in (GitRefWorkingTree, GitRefIndex):
diff = tree_base.diff(ref_remote, paths)
else:
# Get remote tree and diff against base:
tree_remote = repo.commit(ref_remote).tree
diff = tree_base.diff(tree_remote, paths)
# Return the base/remote pair of Notebook file streams
for entry in diff:
fa = _get_diff_entry_stream(
entry.a_path, entry.a_blob, ref_base, repo_dir)
if fa is None:
continue
fb = _get_diff_entry_stream(
entry.b_path, entry.b_blob, ref_remote, repo_dir)
if fb is None:
continue
yield (fa, fb)