new archivebox update speed improvements

This commit is contained in:
Nick Sweeting 2024-02-22 04:49:09 -08:00
parent 2d32f05a62
commit 6a4e568d1b
8 changed files with 36 additions and 13 deletions

View file

@ -77,6 +77,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
status = 'succeeded'
timer = TimedProgress(timeout, prefix=' ')
result = None
try:
result = run(cmd, cwd=str(out_dir), timeout=timeout)
@ -84,7 +85,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
output_tail = [
line.strip()
for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
if line.strip()
]
hints = (
@ -94,12 +95,13 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
# Check for common failure cases
if (result.returncode > 0) or not (out_dir / output).is_file():
raise ArchiveError('SingleFile was not able to archive the page', hints)
raise ArchiveError(f'SingleFile was not able to archive the page (status={result.returncode})', hints)
chmod_file(output, cwd=str(out_dir))
except (Exception, OSError) as err:
status = 'failed'
# TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
cmd[2] = browser_args.replace('"', "\\\"")
err.hints = (result.stdout + result.stderr).decode().split('\n')
output = err
finally:
timer.end()