downloading header from S3 correctly tracks progress

This commit is contained in:
Lex Berezhny 2019-02-18 16:44:46 -05:00
parent 03396c1993
commit 461c3c8408

View file

@ -147,40 +147,31 @@ class HeadersComponent(Component):
} }
async def fetch_headers_from_s3(self): async def fetch_headers_from_s3(self):
def collector(d, h_file):
h_file.write(d)
local_size = float(h_file.tell())
final_size = float(final_size_after_download)
self._headers_progress_percent = math.ceil(local_size / final_size * 100)
local_header_size = self.local_header_file_size() local_header_size = self.local_header_file_size()
resume_header = {"Range": f"bytes={local_header_size}-"} resume_header = {"Range": f"bytes={local_header_size}-"}
async with aiohttp.request('get', HEADERS_URL, headers=resume_header) as response: async with aiohttp.request('get', HEADERS_URL, headers=resume_header) as response:
got_406 = response.status == 406 # our file is bigger if response.status == 406 or response.content_length < HEADER_SIZE: # our file is bigger
log.warning("s3 is more out of date than we are")
return
if response.content_length % HEADER_SIZE != 0:
log.warning("s3 appears to have corrupted header")
return
final_size_after_download = response.content_length + local_header_size final_size_after_download = response.content_length + local_header_size
if got_406: write_mode = "wb"
log.warning("s3 is more out of date than we are") if local_header_size > 0:
# should have something to download and a final length divisible by the header size
elif final_size_after_download and not final_size_after_download % HEADER_SIZE:
s3_height = (final_size_after_download / HEADER_SIZE) - 1
local_height = self.local_header_file_height()
if s3_height > local_height:
data = await response.read()
if local_header_size:
log.info("Resuming download of %i bytes from s3", response.content_length) log.info("Resuming download of %i bytes from s3", response.content_length)
with open(self.headers_file, "a+b") as headers_file: write_mode = "a+b"
collector(data, headers_file) with open(self.headers_file, write_mode) as fd:
else: while True:
with open(self.headers_file, "wb") as headers_file: chunk = await response.content.read(512)
collector(data, headers_file) if not chunk:
log.info("fetched headers from s3 (s3 height: %i), now verifying integrity after download.", break
s3_height) fd.write(chunk)
self._headers_progress_percent = math.ceil(
float(fd.tell()) / float(final_size_after_download) * 100
)
log.info("fetched headers from s3, now verifying integrity after download.")
self._check_header_file_integrity() self._check_header_file_integrity()
else:
log.warning("s3 is more out of date than we are")
else:
log.error("invalid size for headers from s3")
def local_header_file_height(self): def local_header_file_height(self):
return max((self.local_header_file_size() / HEADER_SIZE) - 1, 0) return max((self.local_header_file_size() / HEADER_SIZE) - 1, 0)