devtools: Make github-merge compute SHA512 from git, instead of worktree
This changes tree_sha512sum() to requests the objects for hashing from git instead of from the working tree. The change should make the process more deterministic (it hashes what will be pushed) and hopefully avoids the frequent miscomputed SHA512's that happen now.
This commit is contained in:
parent
8040ae6fc5
commit
a327e8ea30
1 changed files with 37 additions and 8 deletions
|
@ -78,24 +78,53 @@ def get_symlink_files():
|
|||
ret.append(f.decode('utf-8').split("\t")[1])
|
||||
return ret
|
||||
|
||||
def tree_sha512sum():
|
||||
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
|
||||
def tree_sha512sum(commit='HEAD'):
|
||||
# request metadata for entire tree, recursively
|
||||
files = []
|
||||
blob_by_name = {}
|
||||
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
|
||||
name_sep = line.index(b'\t')
|
||||
metadata = line[:name_sep].split() # perms, 'blob', blobid
|
||||
assert(metadata[1] == b'blob')
|
||||
name = line[name_sep+1:]
|
||||
files.append(name)
|
||||
blob_by_name[name] = metadata[2]
|
||||
|
||||
files.sort()
|
||||
# open connection to git-cat-file in batch mode to request data for all blobs
|
||||
# this is much faster than launching it per file
|
||||
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
overall = hashlib.sha512()
|
||||
for f in files:
|
||||
blob = blob_by_name[f]
|
||||
# request blob
|
||||
p.stdin.write(blob + b'\n')
|
||||
p.stdin.flush()
|
||||
# read header: blob, "blob", size
|
||||
reply = p.stdout.readline().split()
|
||||
assert(reply[0] == blob and reply[1] == b'blob')
|
||||
size = int(reply[2])
|
||||
# hash the blob data
|
||||
intern = hashlib.sha512()
|
||||
fi = open(f, 'rb')
|
||||
while True:
|
||||
piece = fi.read(65536)
|
||||
if piece:
|
||||
ptr = 0
|
||||
while ptr < size:
|
||||
bs = min(65536, size - ptr)
|
||||
piece = p.stdout.read(bs)
|
||||
if len(piece) == bs:
|
||||
intern.update(piece)
|
||||
else:
|
||||
break
|
||||
fi.close()
|
||||
raise IOError('Premature EOF reading git cat-file output')
|
||||
ptr += bs
|
||||
dig = intern.hexdigest()
|
||||
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
|
||||
# update overall hash with file hash
|
||||
overall.update(dig.encode("utf-8"))
|
||||
overall.update(" ".encode("utf-8"))
|
||||
overall.update(f)
|
||||
overall.update("\n".encode("utf-8"))
|
||||
p.stdin.close()
|
||||
if p.wait():
|
||||
raise IOError('Non-zero return value executing git cat-file')
|
||||
return overall.hexdigest()
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue