Merge branch 'dev' into version-banner

This commit is contained in:
Nick Sweeting 2023-12-19 09:57:08 -08:00 committed by GitHub
commit 14f10a0461
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 2055 additions and 1063 deletions

View file

@ -54,6 +54,7 @@ from .config_stubs import (
### Pre-Fetch Minimal System Config
TIMEZONE = 'UTC'
SYSTEM_USER = getpass.getuser() or os.getlogin()
try:
@ -82,7 +83,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'IN_QEMU': {'type': bool, 'default': False},
'PUID': {'type': int, 'default': os.getuid()},
'PGID': {'type': int, 'default': os.getgid()},
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
},
'GENERAL_CONFIG': {
@ -377,7 +377,7 @@ ALLOWED_IN_OUTPUT_DIR = {
'static_index.json',
}
def get_version(config):
def get_version(config) -> str:
try:
return importlib.metadata.version(__package__ or 'archivebox')
except importlib.metadata.PackageNotFoundError:
@ -392,58 +392,76 @@ def get_version(config):
raise Exception('Failed to detect installed archivebox version!')
def get_commit_hash(config):
def get_commit_hash(config) -> Optional[str]:
try:
git_dir = config['PACKAGE_DIR'] / '../'
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
commit_hash = git_dir.joinpath(ref).read_text().strip()
return commit_hash
except Exception:
pass
try:
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
return None
pass
return None
def get_version_releases(config):
def get_build_time(config) -> str:
if config['IN_DOCKER']:
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
return docker_build_end_time
src_last_modified_unix_timestamp = (config['PACKAGE_DIR'] / 'config.py').stat().st_mtime
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
def get_versions_available_on_github(config):
"""
returns a dictionary containing the GitHub release data for
returns a dictionary containing the ArchiveBox GitHub release info for
the recommended upgrade version and the currently installed version
"""
# we only want to perform the (relatively expensive) check for new versions
# when its most relevant, e.g. when the user runs a long-running command
subcommand_run_by_user = sys.argv[3]
long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
if subcommand_run_by_user not in long_running_commands:
return None
github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
response = requests.get(github_releases_api)
if response.status_code != 200:
stderr('Failed to get release data from GitHub', color='lightyellow', config=config)
stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
return None
all_releases = response.json()
releases = response.json()
installed_version = config['VERSION']
installed_version_parts = parse_tag_name(installed_version)
installed_version = parse_version_string(config['VERSION'])
# find current version or nearest older version (to link to)
current_version = None
for release in releases:
release_parts = parse_tag_name(release["tag_name"])
if release_parts <= installed_version_parts :
for idx, release in enumerate(all_releases):
release_version = parse_version_string(release["tag_name"])
if release_version <= installed_version:
current_version = release
break
current_version = current_version if current_version else releases[-1]
current_version = current_version or releases[-1]
# recommended version is whatever comes after current_version in the release list
# (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
try:
recommended_version = all_releases[idx+1]
except IndexError:
recommended_version = None
# find upgrade version
upgrade_version = None
smallest_version_diff = parse_tag_name(releases[0]["tag_name"])[1]
for release in releases:
release_parts = parse_tag_name(release["tag_name"])
major_version_diff = release_parts[1] - installed_version_parts[1]
if major_version_diff < smallest_version_diff:
smallest_version_diff = major_version_diff
if smallest_version_diff < 1:
break
upgrade_version = release
upgrade_version = upgrade_version if upgrade_version else releases[0]
return {"upgrade_version": upgrade_version, "current_version": current_version}
return {"recommended_version": recommended_version, "current_version": current_version}
def can_upgrade(config):
if config['VERSION_RELEASES']:
upgrade_version = parse_tag_name(config['VERSION_RELEASES']['upgrade_version']['tag_name'])
current_version = parse_tag_name(config['VERSION_RELEASES']['current_version']['tag_name'])
return upgrade_version > current_version
if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
return recommended_version > current_version
return False
@ -473,11 +491,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
'VERSION': {'default': lambda c: get_version(c)},
'VERSION_RELEASES': {'default': lambda c: get_version_releases(c)},
'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)},
'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]},
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
'BUILD_TIME': {'default': lambda c: get_build_time(c)},
'VERSIONS_AVAILABLE': {'default': lambda c: get_versions_available_on_github(c)},
'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)},
'PYTHON_BINARY': {'default': lambda c: sys.executable},
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
@ -487,7 +508,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting but unused for now
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting if changed later but unused for now because its always expected to be wal
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
@ -744,14 +765,11 @@ def load_config(defaults: ConfigDefaultDict,
return extended_config
# def write_config(config: ConfigDict):
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
def parse_tag_name(v):
"""parses a version tag string formatted like 'vx.x.x'"""
def parse_version_string(version: str) -> Tuple[int, int int]:
"""parses a version tag string formatted like 'vx.x.x' into (major, minor, patch) ints"""
base = v.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix
return tuple(int(part) for part in base.split('.'))
return tuple(int(part) for part in base.split('.'))[:3]
# Logging Helpers
@ -840,6 +858,7 @@ def find_chrome_binary() -> Optional[str]:
# Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
# make sure data dir finding precedence order always matches binary finding order
default_executable_paths = (
# '~/Library/Caches/ms-playwright/chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium',
'chromium-browser',
'chromium',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
@ -1166,14 +1185,25 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
def check_system_config(config: ConfigDict=CONFIG) -> None:
### Check system environment
if config['USER'] == 'root':
if config['USER'] == 'root' or str(config['PUID']) == "0":
stderr('[!] ArchiveBox should never be run as root!', color='red')
stderr(' For more information, see the security overview documentation:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
if config['IN_DOCKER']:
attempted_command = ' '.join(sys.argv[:3])
stderr('')
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
stderr(f' docker compose run archivebox {attempted_command}')
stderr(f' docker compose exec --user=archivebox archivebox {attempted_command}')
stderr(' or')
stderr(f' docker run -it -v ... -p ... archivebox/archivebox {attempted_command}')
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash')
raise SystemExit(2)
### Check Python environment
if sys.version_info[:3] < (3, 6, 0):
if sys.version_info[:3] < (3, 7, 0):
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
raise SystemExit(2)
@ -1249,7 +1279,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
stderr(' Youtube-dl will fail to archive all media if set to less than ~20 seconds.')
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
stderr(' (Setting it somewhere over 60 seconds is recommended)')
stderr()
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
@ -1337,8 +1367,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
if check_db:
# Enable WAL mode in sqlite3

View file

@ -48,22 +48,25 @@ class TagInline(admin.TabularInline):
from django.contrib.admin.helpers import ActionForm
from django.contrib.admin.widgets import AutocompleteSelectMultiple
class AutocompleteTags:
model = Tag
search_fields = ['name']
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
# class AutocompleteTags:
# model = Tag
# search_fields = ['name']
# name = 'tags'
class AutocompleteTagsAdminStub:
name = 'admin'
# class AutocompleteTagsAdminStub:
# name = 'admin'
class SnapshotActionForm(ActionForm):
tags = forms.ModelMultipleChoiceField(
queryset=Tag.objects.all(),
required=False,
widget=AutocompleteSelectMultiple(
AutocompleteTags(),
AutocompleteTagsAdminStub(),
),
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
# widget=AutocompleteSelectMultiple(
# # AutocompleteTags(),
# # AutocompleteTagsAdminStub(),
# ),
)
# TODO: allow selecting actions for specific extractors? is this useful?

View file

@ -3,4 +3,4 @@ from django.apps import AppConfig
class CoreConfig(AppConfig):
name = 'core'
default_auto_field = 'django.db.models.UUIDField'
# default_auto_field = 'django.db.models.UUIDField'

View file

@ -268,6 +268,8 @@ AUTH_PASSWORD_VALIDATORS = [
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
]
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
# DEFAULT_AUTO_FIELD = 'django.db.models.UUIDField'
################################################################################
### Shell Settings

View file

@ -184,7 +184,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
link.url,
command,
ts
) + "\n"))
) + "\n" + str(e) + "\n"))
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
# print(' ', stats)

View file

@ -393,7 +393,11 @@ def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats
else:
_LAST_RUN_STATS.succeeded += 1
size = get_dir_size(link_dir)
try:
size = get_dir_size(link_dir)
except FileNotFoundError:
size = (0, None, '0')
end_ts = datetime.now(timezone.utc)
duration = str(end_ts - start_ts).split('.')[0]
print(' {black}{} files ({}) in {}s {reset}'.format(size[2], printable_filesize(size[0]), duration, **ANSI))

View file

@ -93,6 +93,8 @@ from .config import (
SQL_INDEX_FILENAME,
ALLOWED_IN_OUTPUT_DIR,
SEARCH_BACKEND_ENGINE,
LDAP,
get_version,
check_dependencies,
check_data_folder,
write_config_file,
@ -100,6 +102,7 @@ from .config import (
VERSION_RELEASES,
CAN_UPGRADE,
COMMIT_HASH,
BUILD_TIME,
CODE_LOCATIONS,
EXTERNAL_LOCATIONS,
DATA_LOCATIONS,
@ -220,31 +223,39 @@ def version(quiet: bool=False,
if not quiet:
# 0.7.1
# ArchiveBox v0.7.1 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
# IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
# FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
p = platform.uname()
print(
'ArchiveBox v{}'.format(VERSION),
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
sys.implementation.name.title(),
p.system,
platform.platform(),
p.machine,
'ArchiveBox v{}'.format(get_version(CONFIG)),
*((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
f'BUILD_TIME={BUILD_TIME}',
)
print(
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'ARCH={p.machine}',
f'OS={p.system}',
f'PLATFORM={platform.platform()}',
f'PYTHON={sys.implementation.name.title()}',
)
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print(
f'DEBUG={DEBUG}',
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'IS_TTY={IS_TTY}',
f'TZ={TIMEZONE}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
f'FS_USER={PUID}:{PGID}',
f'FS_PERMS={OUTPUT_PERMISSIONS}',
)
print(
f'DEBUG={DEBUG}',
f'IS_TTY={IS_TTY}',
f'TZ={TIMEZONE}',
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
f'LDAP={LDAP}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
print()
@ -273,7 +284,7 @@ def version(quiet: bool=False,
print(printable_folder_status(name, path))
else:
print()
print('{white}[i] Data locations:{reset}'.format(**ANSI))
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
print()
check_dependencies()
@ -1010,9 +1021,9 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
if not NODE_VERSION:
stderr('[X] You must first install node using your system package manager', color='red')
stderr('[X] You must first install node & npm using your system package manager', color='red')
hint([
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
'https://github.com/nodesource/distributions#table-of-contents',
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
])
raise SystemExit(1)

1
archivebox/static Symbolic link
View file

@ -0,0 +1 @@
templates/static