mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 07:04:27 -04:00
more granular errors
This commit is contained in:
parent
263f71862a
commit
fa8a7bde80
1 changed files with 16 additions and 8 deletions
24
archive.py
24
archive.py
|
@ -157,12 +157,12 @@ def fetch_wget(out_dir, link, overwrite=False):
|
||||||
if not os.path.exists('{}/{}'.format(out_dir, domain)) or overwrite:
|
if not os.path.exists('{}/{}'.format(out_dir, domain)) or overwrite:
|
||||||
print(' - Downloading Full Site')
|
print(' - Downloading Full Site')
|
||||||
CMD = [
|
CMD = [
|
||||||
*'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
*'wget --mirror --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
output = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # dom.html
|
result = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # dom.html
|
||||||
if output.returncode:
|
if result.returncode:
|
||||||
print(output.stderr.read())
|
print(' ', result.stderr)
|
||||||
raise Exception('Failed to wget download')
|
raise Exception('Failed to wget download')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||||
|
@ -177,9 +177,13 @@ def fetch_pdf(out_dir, link, overwrite=False):
|
||||||
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
|
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
|
||||||
try:
|
try:
|
||||||
result = run([CHROME_BINARY, *chrome_args, link['url']], stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # output.pdf
|
result = run([CHROME_BINARY, *chrome_args, link['url']], stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # output.pdf
|
||||||
if run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
if result.returncode:
|
||||||
print(result.stderr.read())
|
print(' ', result.stderr)
|
||||||
raise Exception('Failed to print PDF')
|
raise Exception('Failed to print PDF')
|
||||||
|
chmod_result = run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], stdout=DEVNULL, stderr=DEVNULL, timeout=5)
|
||||||
|
if chmod_result.returncode:
|
||||||
|
print(' ', chmod_result.stderr)
|
||||||
|
raise Exception('Failed to chmod PDF')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||||
else:
|
else:
|
||||||
|
@ -193,9 +197,13 @@ def fetch_screenshot(out_dir, link, overwrite=False):
|
||||||
chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
|
chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
|
||||||
try:
|
try:
|
||||||
result = run([CHROME_BINARY, *chrome_args, '--window-size={}'.format(RESOLUTION), link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # sreenshot.png
|
result = run([CHROME_BINARY, *chrome_args, '--window-size={}'.format(RESOLUTION), link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # sreenshot.png
|
||||||
if run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
if result.returncode:
|
||||||
print(result.stderr.read())
|
print(result.stderr)
|
||||||
raise Exception('Failed to take screenshot')
|
raise Exception('Failed to take screenshot')
|
||||||
|
chmod_result = run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], stdout=DEVNULL, stderr=DEVNULL, timeout=5)
|
||||||
|
if chmod_result.returncode:
|
||||||
|
print(result.stderr)
|
||||||
|
raise Exception('Failed to chmod screenshot')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue