mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 07:04:27 -04:00
add chrome headless option and improve default data dir finding
This commit is contained in:
parent
8630c0fdaa
commit
1c1bc76ac1
2 changed files with 39 additions and 9 deletions
|
@ -31,6 +31,7 @@ from config import (
|
||||||
COOKIES_FILE,
|
COOKIES_FILE,
|
||||||
WGET_USER_AGENT,
|
WGET_USER_AGENT,
|
||||||
CHROME_USER_DATA_DIR,
|
CHROME_USER_DATA_DIR,
|
||||||
|
CHROME_HEADLESS,
|
||||||
CHROME_SANDBOX,
|
CHROME_SANDBOX,
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
MEDIA_TIMEOUT,
|
MEDIA_TIMEOUT,
|
||||||
|
@ -613,14 +614,42 @@ def fetch_git(link_dir, link, timeout=TIMEOUT):
|
||||||
'output': output,
|
'output': output,
|
||||||
}
|
}
|
||||||
|
|
||||||
def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR):
|
def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR, headless=CHROME_HEADLESS, sandbox=CHROME_SANDBOX):
|
||||||
args = [binary, '--headless']
|
global USER_DATA_DIR
|
||||||
if not CHROME_SANDBOX:
|
user_data_dir = user_data_dir or USER_DATA_DIR
|
||||||
|
cmd_args = [binary]
|
||||||
|
|
||||||
|
if headless:
|
||||||
|
cmd_args += ('--headless',)
|
||||||
|
|
||||||
|
if not sandbox:
|
||||||
# dont use GPU or sandbox when running inside docker container
|
# dont use GPU or sandbox when running inside docker container
|
||||||
args += ['--no-sandbox', '--disable-gpu']
|
cmd_args += ('--no-sandbox', '--disable-gpu')
|
||||||
default_profile = os.path.expanduser('~/Library/Application Support/Google/Chrome')
|
|
||||||
|
|
||||||
|
# Find chrome user data directory
|
||||||
|
default_profile_paths = (
|
||||||
|
'~/.config/chromium',
|
||||||
|
'~/.config/google-chrome',
|
||||||
|
'~/.config/google-chrome-beta',
|
||||||
|
'~/.config/google-chrome-unstable',
|
||||||
|
'~/Library/Application Support/Chromium',
|
||||||
|
'~/Library/Application Support/Google/Chrome',
|
||||||
|
'~/Library/Application Support/Google/Chrome Canary',
|
||||||
|
'~/AppData/Local/Chromium/User Data',
|
||||||
|
'~/AppData/Local/Google/Chrome/User Data',
|
||||||
|
'~/AppData/Local/Google/Chrome SxS/User Data',
|
||||||
|
)
|
||||||
if user_data_dir:
|
if user_data_dir:
|
||||||
args.append('--user-data-dir={}'.format(user_data_dir))
|
cmd_args.append('--user-data-dir={}'.format(user_data_dir))
|
||||||
elif os.path.exists(default_profile):
|
else:
|
||||||
args.append('--user-data-dir={}'.format(default_profile))
|
for path in default_profile_paths:
|
||||||
return args
|
full_path = os.path.expanduser(path)
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
USER_DATA_DIR = full_path
|
||||||
|
cmd_args.append('--user-data-dir={}'.format(full_path))
|
||||||
|
break
|
||||||
|
return cmd_args
|
||||||
|
|
||||||
|
|
||||||
|
USER_DATA_DIR = CHROME_USER_DATA_DIR
|
||||||
|
|
|
@ -37,6 +37,7 @@ GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbuck
|
||||||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
|
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
|
||||||
COOKIES_FILE = os.getenv('COOKIES_FILE', None)
|
COOKIES_FILE = os.getenv('COOKIES_FILE', None)
|
||||||
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
||||||
|
CHROME_HEADLESS = os.getenv('CHROME_HEADLESS', 'True' ).lower() == 'true'
|
||||||
|
|
||||||
CURL_BINARY = os.getenv('CURL_BINARY', 'curl')
|
CURL_BINARY = os.getenv('CURL_BINARY', 'curl')
|
||||||
GIT_BINARY = os.getenv('GIT_BINARY', 'git')
|
GIT_BINARY = os.getenv('GIT_BINARY', 'git')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue