mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
add common code extensions to default blacklist
This commit is contained in:
parent
3658153cf8
commit
718d39e242
2 changed files with 8 additions and 2 deletions
|
@ -61,6 +61,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Re-archive URLs from scratch, overwriting any existing files"
|
help="Re-archive URLs from scratch, overwriting any existing files"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--init', #'-i',
|
||||||
|
action='store_true',
|
||||||
|
help="Init/upgrade the curent data directory before adding",
|
||||||
|
)
|
||||||
command = parser.parse_args(args or ())
|
command = parser.parse_args(args or ())
|
||||||
urls = command.urls
|
urls = command.urls
|
||||||
stdin_urls = accept_stdin(stdin)
|
stdin_urls = accept_stdin(stdin)
|
||||||
|
@ -76,6 +81,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
update_all=command.update_all,
|
update_all=command.update_all,
|
||||||
index_only=command.index_only,
|
index_only=command.index_only,
|
||||||
overwrite=command.overwrite,
|
overwrite=command.overwrite,
|
||||||
|
init=command.init,
|
||||||
out_dir=pwd or OUTPUT_DIR,
|
out_dir=pwd or OUTPUT_DIR,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
||||||
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
||||||
'URL_BLACKLIST': {'type': str, 'default': None},
|
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
||||||
},
|
},
|
||||||
|
|
||||||
'SERVER_CONFIG': {
|
'SERVER_CONFIG': {
|
||||||
|
@ -231,7 +231,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
|
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
|
||||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
||||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
||||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
|
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||||
|
|
||||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
||||||
'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()},
|
'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue