Add _EXTRA_ARGS for various extractors (#1360)

This commit is contained in:
Nick Sweeting 2024-03-14 01:55:09 -07:00 committed by GitHub
commit ca2c484a8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 115 additions and 42 deletions

View file

@ -241,7 +241,11 @@ def chrome_args(**options) -> List[str]:
# Chrome CLI flag documentation: https://peter.sh/experiments/chromium-command-line-switches/
from .config import CHROME_OPTIONS, CHROME_VERSION
from .config import (
CHROME_OPTIONS,
CHROME_VERSION,
CHROME_EXTRA_ARGS,
)
options = {**CHROME_OPTIONS, **options}
@ -250,6 +254,8 @@ def chrome_args(**options) -> List[str]:
cmd_args = [options['CHROME_BINARY']]
cmd_args += CHROME_EXTRA_ARGS
if options['CHROME_HEADLESS']:
chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
if chrome_major_version >= 111:
@ -293,8 +299,9 @@ def chrome_args(**options) -> List[str]:
if options['CHROME_USER_DATA_DIR']:
cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
return cmd_args
return dedupe(cmd_args)
def chrome_cleanup():
"""
@ -331,6 +338,20 @@ def ansi_to_html(text):
return COLOR_REGEX.sub(single_sub, text)
@enforce_types
def dedupe(options: List[str]) -> List[str]:
"""
Deduplicates the given options. Options that come later clobber earlier
conflicting options.
"""
deduped = {}
for option in options:
deduped[option.split('=')[0]] = option
return list(deduped.values())
class AttributeDict(dict):
"""Helper to allow accessing dict values via Example.key or Example['key']"""