mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 15:14:31 -04:00
handle failing screenshots and PDFs better
This commit is contained in:
parent
3957bab0e9
commit
b00498dc42
1 changed files with 6 additions and 3 deletions
|
@ -141,7 +141,8 @@ def parse_bookmarks_export(html_file):
|
||||||
|
|
||||||
def fetch_wget(out_dir, link, overwrite=False):
|
def fetch_wget(out_dir, link, overwrite=False):
|
||||||
# download full site
|
# download full site
|
||||||
if not os.path.exists('{}/{}'.format(out_dir, link['base_url'].split('/', 1)[0])) or overwrite:
|
domain = link['base_url'].split('/', 1)[0]
|
||||||
|
if not os.path.exists('{}/{}'.format(out_dir, domain)) or overwrite:
|
||||||
print(' - Downloading Full Site')
|
print(' - Downloading Full Site')
|
||||||
CMD = [
|
CMD = [
|
||||||
*'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
*'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
||||||
|
@ -161,7 +162,8 @@ def fetch_pdf(out_dir, link, overwrite=False):
|
||||||
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
|
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
|
||||||
try:
|
try:
|
||||||
run([CHROME_BINARY, *chrome_args, link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # output.pdf
|
run([CHROME_BINARY, *chrome_args, link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # output.pdf
|
||||||
run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], timeout=5)
|
if run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
||||||
|
raise Exception('Failed to print PDF')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||||
else:
|
else:
|
||||||
|
@ -174,7 +176,8 @@ def fetch_screenshot(out_dir, link, overwrite=False):
|
||||||
chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
|
chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
|
||||||
try:
|
try:
|
||||||
run([CHROME_BINARY, *chrome_args, '--window-size={}'.format(RESOLUTION), link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # sreenshot.png
|
run([CHROME_BINARY, *chrome_args, '--window-size={}'.format(RESOLUTION), link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # sreenshot.png
|
||||||
run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], timeout=5)
|
if run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
||||||
|
raise Exception('Failed to take screenshot')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue