diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index be8202c1..ed030e98 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -30,6 +30,7 @@ from config import ( TIMEOUT, ANSI, ARCHIVE_DIR, + GIT_DOMAINS, ) from util import ( check_dependencies, @@ -504,7 +505,7 @@ def fetch_favicon(link_dir, link, timeout=TIMEOUT): def fetch_git(link_dir, link, timeout=TIMEOUT): """download full site using git""" - if not (link['domain'] == 'github.com' + if not (link['domain'] in GIT_DOMAINS or link['url'].endswith('.git') or link['type'] == 'git'): return diff --git a/archivebox/config.py b/archivebox/config.py index 9da05c66..ef7e4933 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -33,6 +33,7 @@ WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox') CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None) TIMEOUT = int(os.getenv('TIMEOUT', '60')) FOOTER_INFO = os.getenv('FOOTER_INFO', 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.',) +GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbucket.org,gitlab.com').split(',') ### Paths REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))