Merge branch 'dev' into link-removal2

This commit is contained in:
Nick Sweeting 2021-02-01 02:46:57 -05:00
commit 3eaf580fc0
16 changed files with 226 additions and 129 deletions

View file

@ -413,6 +413,8 @@ class Link:
"""predict the expected output paths that should be present after archiving"""
from ..extractors.wget import wget_output_path
# TODO: banish this awful duplication from the codebase and import these
# from their respective extractor files
canonical = {
'index_path': 'index.html',
'favicon_path': 'favicon.ico',
@ -428,6 +430,7 @@ class Link:
'archive_org_path': 'https://web.archive.org/web/{}'.format(self.base_url),
'git_path': 'git/',
'media_path': 'media/',
'headers_path': 'headers.json',
}
if self.is_static:
# static binary files like PDF and images are handled slightly differently.