mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Add _EXTRA_ARGS
for various extractors (#1360)
This commit is contained in:
commit
ca2c484a8e
10 changed files with 115 additions and 42 deletions
|
@ -241,7 +241,11 @@ def chrome_args(**options) -> List[str]:
|
|||
|
||||
# Chrome CLI flag documentation: https://peter.sh/experiments/chromium-command-line-switches/
|
||||
|
||||
from .config import CHROME_OPTIONS, CHROME_VERSION
|
||||
from .config import (
|
||||
CHROME_OPTIONS,
|
||||
CHROME_VERSION,
|
||||
CHROME_EXTRA_ARGS,
|
||||
)
|
||||
|
||||
options = {**CHROME_OPTIONS, **options}
|
||||
|
||||
|
@ -250,6 +254,8 @@ def chrome_args(**options) -> List[str]:
|
|||
|
||||
cmd_args = [options['CHROME_BINARY']]
|
||||
|
||||
cmd_args += CHROME_EXTRA_ARGS
|
||||
|
||||
if options['CHROME_HEADLESS']:
|
||||
chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
|
||||
if chrome_major_version >= 111:
|
||||
|
@ -293,8 +299,9 @@ def chrome_args(**options) -> List[str]:
|
|||
|
||||
if options['CHROME_USER_DATA_DIR']:
|
||||
cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
|
||||
|
||||
return cmd_args
|
||||
|
||||
|
||||
return dedupe(cmd_args)
|
||||
|
||||
def chrome_cleanup():
|
||||
"""
|
||||
|
@ -331,6 +338,20 @@ def ansi_to_html(text):
|
|||
return COLOR_REGEX.sub(single_sub, text)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def dedupe(options: List[str]) -> List[str]:
|
||||
"""
|
||||
Deduplicates the given options. Options that come later clobber earlier
|
||||
conflicting options.
|
||||
"""
|
||||
deduped = {}
|
||||
|
||||
for option in options:
|
||||
deduped[option.split('=')[0]] = option
|
||||
|
||||
return list(deduped.values())
|
||||
|
||||
|
||||
class AttributeDict(dict):
|
||||
"""Helper to allow accessing dict values via Example.key or Example['key']"""
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue