downloading header from S3 correctly tracks progress

This commit is contained in:
Lex Berezhny 2019-02-18 16:44:46 -05:00
parent 03396c1993
commit 461c3c8408

View file

@ -147,40 +147,31 @@ class HeadersComponent(Component):
}
async def fetch_headers_from_s3(self):
def collector(d, h_file):
h_file.write(d)
local_size = float(h_file.tell())
final_size = float(final_size_after_download)
self._headers_progress_percent = math.ceil(local_size / final_size * 100)
local_header_size = self.local_header_file_size()
resume_header = {"Range": f"bytes={local_header_size}-"}
async with aiohttp.request('get', HEADERS_URL, headers=resume_header) as response:
got_406 = response.status == 406 # our file is bigger
final_size_after_download = response.content_length + local_header_size
if got_406:
if response.status == 406 or response.content_length < HEADER_SIZE: # our file is bigger
log.warning("s3 is more out of date than we are")
# should have something to download and a final length divisible by the header size
elif final_size_after_download and not final_size_after_download % HEADER_SIZE:
s3_height = (final_size_after_download / HEADER_SIZE) - 1
local_height = self.local_header_file_height()
if s3_height > local_height:
data = await response.read()
if local_header_size:
log.info("Resuming download of %i bytes from s3", response.content_length)
with open(self.headers_file, "a+b") as headers_file:
collector(data, headers_file)
else:
with open(self.headers_file, "wb") as headers_file:
collector(data, headers_file)
log.info("fetched headers from s3 (s3 height: %i), now verifying integrity after download.",
s3_height)
self._check_header_file_integrity()
else:
log.warning("s3 is more out of date than we are")
else:
log.error("invalid size for headers from s3")
return
if response.content_length % HEADER_SIZE != 0:
log.warning("s3 appears to have corrupted header")
return
final_size_after_download = response.content_length + local_header_size
write_mode = "wb"
if local_header_size > 0:
log.info("Resuming download of %i bytes from s3", response.content_length)
write_mode = "a+b"
with open(self.headers_file, write_mode) as fd:
while True:
chunk = await response.content.read(512)
if not chunk:
break
fd.write(chunk)
self._headers_progress_percent = math.ceil(
float(fd.tell()) / float(final_size_after_download) * 100
)
log.info("fetched headers from s3, now verifying integrity after download.")
self._check_header_file_integrity()
def local_header_file_height(self):
return max((self.local_header_file_size() / HEADER_SIZE) - 1, 0)