mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
working archivebox update CLI cmd
This commit is contained in:
parent
a0edf218e8
commit
c9a05c9d94
7 changed files with 61 additions and 114 deletions
|
@ -21,6 +21,8 @@ from django.urls import reverse_lazy
|
||||||
|
|
||||||
from django_stubs_ext.db.models import TypedModelMeta
|
from django_stubs_ext.db.models import TypedModelMeta
|
||||||
|
|
||||||
|
from archivebox.index.json import to_json
|
||||||
|
|
||||||
from .abid import (
|
from .abid import (
|
||||||
ABID,
|
ABID,
|
||||||
ABID_LEN,
|
ABID_LEN,
|
||||||
|
@ -438,7 +440,7 @@ class ModelWithOutputDir(ABIDModel):
|
||||||
|
|
||||||
def write_indexes(self):
|
def write_indexes(self):
|
||||||
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
|
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
|
||||||
print(f'{self}.write_indexes()')
|
print(f'{type(self).__name__}[{self.ABID}].write_indexes()')
|
||||||
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
self.migrate_output_dir()
|
self.migrate_output_dir()
|
||||||
self.save_merkle_index()
|
self.save_merkle_index()
|
||||||
|
@ -447,14 +449,14 @@ class ModelWithOutputDir(ABIDModel):
|
||||||
|
|
||||||
def migrate_output_dir(self):
|
def migrate_output_dir(self):
|
||||||
"""Move the output files to the new folder structure if needed"""
|
"""Move the output files to the new folder structure if needed"""
|
||||||
print(f'{self}.migrate_output_dir()')
|
print(f'{type(self).__name__}[{self.ABID}].migrate_output_dir()')
|
||||||
self.migrate_from_0_7_2()
|
self.migrate_from_0_7_2()
|
||||||
self.migrate_from_0_8_6()
|
self.migrate_from_0_8_6()
|
||||||
# ... future migrations here
|
# ... future migrations here
|
||||||
|
|
||||||
def migrate_from_0_7_2(self) -> None:
|
def migrate_from_0_7_2(self) -> None:
|
||||||
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
||||||
print(f'{self}.migrate_from_0_7_2()')
|
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_7_2()')
|
||||||
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
||||||
# update self.output_path = /data/archive/snapshots/<abid>
|
# update self.output_path = /data/archive/snapshots/<abid>
|
||||||
pass
|
pass
|
||||||
|
@ -462,27 +464,28 @@ class ModelWithOutputDir(ABIDModel):
|
||||||
def migrate_from_0_8_6(self) -> None:
|
def migrate_from_0_8_6(self) -> None:
|
||||||
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
||||||
# ... future migration code here ...
|
# ... future migration code here ...
|
||||||
print(f'{self}.migrate_from_0_8_6()')
|
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_8_6()')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_merkle_index(self, **kwargs) -> None:
|
def save_merkle_index(self, **kwargs) -> None:
|
||||||
"""Write the ./.index.merkle file to the output dir"""
|
"""Write the ./.index.merkle file to the output dir"""
|
||||||
# write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
|
# write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
|
||||||
print(f'{self}.save_merkle_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_merkle_index()')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_html_index(self, **kwargs) -> None:
|
def save_html_index(self, **kwargs) -> None:
|
||||||
# write self.as_html() to self.output_dir / 'index.html'
|
# write self.as_html() to self.output_dir / 'index.html'
|
||||||
print(f'{self}.save_html_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_html_index()')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_json_index(self, **kwargs) -> None:
|
def save_json_index(self, **kwargs) -> None:
|
||||||
print(f'{self}.save_json_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_json_index()')
|
||||||
# write self.as_json() to self.output_dir / 'index.json'
|
# write self.as_json() to self.output_dir / 'index.json'
|
||||||
|
(self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_symlinks_index(self) -> None:
|
def save_symlinks_index(self) -> None:
|
||||||
print(f'{self}.save_symlinks_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_symlinks_index()')
|
||||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
||||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
||||||
# ln -s self.output_dir data/archive/1453452234234.21445
|
# ln -s self.output_dir data/archive/1453452234234.21445
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
__package__ = 'archivebox.cli'
|
__package__ = 'archivebox.cli'
|
||||||
__command__ = 'archivebox install'
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -128,6 +127,8 @@ def install(binproviders: Optional[List[str]]=None, binaries: Optional[List[str]
|
||||||
# if we are only installing a single binary, raise the exception so the user can see what went wrong
|
# if we are only installing a single binary, raise the exception so the user can see what went wrong
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
from archivebox.config.django import setup_django
|
||||||
|
setup_django()
|
||||||
|
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
User = get_user_model()
|
User = get_user_model()
|
||||||
|
|
|
@ -1,60 +1,46 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
__package__ = 'archivebox.cli'
|
__package__ = 'archivebox.cli'
|
||||||
__command__ = 'archivebox server'
|
|
||||||
|
|
||||||
import sys
|
from typing import Iterable
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, List, IO
|
|
||||||
|
|
||||||
from archivebox.misc.util import docstring
|
import rich_click as click
|
||||||
from archivebox.config import DATA_DIR
|
from rich import print
|
||||||
|
|
||||||
|
from archivebox.misc.util import docstring, enforce_types
|
||||||
from archivebox.config.common import SERVER_CONFIG
|
from archivebox.config.common import SERVER_CONFIG
|
||||||
from archivebox.misc.logging_util import SmartFormatter, reject_stdin
|
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
# @enforce_types
|
def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
|
||||||
def server(runserver_args: Optional[List[str]]=None,
|
reload: bool=False,
|
||||||
reload: bool=False,
|
init: bool=False,
|
||||||
debug: bool=False,
|
debug: bool=False,
|
||||||
init: bool=False,
|
daemonize: bool=False,
|
||||||
quick_init: bool=False,
|
nothreading: bool=False) -> None:
|
||||||
createsuperuser: bool=False,
|
|
||||||
daemonize: bool=False,
|
|
||||||
out_dir: Path=DATA_DIR) -> None:
|
|
||||||
"""Run the ArchiveBox HTTP server"""
|
"""Run the ArchiveBox HTTP server"""
|
||||||
|
|
||||||
from rich import print
|
runserver_args = list(runserver_args)
|
||||||
|
|
||||||
runserver_args = runserver_args or []
|
|
||||||
|
|
||||||
if init:
|
if init:
|
||||||
run_subcommand('init', stdin=None, pwd=out_dir)
|
from archivebox.cli.archivebox_init import init as archivebox_init
|
||||||
print()
|
archivebox_init(quick=True)
|
||||||
elif quick_init:
|
|
||||||
run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if createsuperuser:
|
from archivebox.misc.checks import check_data_folder
|
||||||
run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
check_data_folder()
|
check_data_folder()
|
||||||
|
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
|
||||||
|
from archivebox.config.common import SHELL_CONFIG
|
||||||
|
|
||||||
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
||||||
print()
|
print()
|
||||||
# print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
|
|
||||||
print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
|
print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
|
||||||
print(' [green]archivebox manage createsuperuser[/green]')
|
print(' [green]archivebox manage createsuperuser[/green]')
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
host = '127.0.0.1'
|
host = '127.0.0.1'
|
||||||
port = '8000'
|
port = '8000'
|
||||||
|
|
||||||
|
@ -78,80 +64,28 @@ def server(runserver_args: Optional[List[str]]=None,
|
||||||
if SHELL_CONFIG.DEBUG:
|
if SHELL_CONFIG.DEBUG:
|
||||||
if not reload:
|
if not reload:
|
||||||
runserver_args.append('--noreload') # '--insecure'
|
runserver_args.append('--noreload') # '--insecure'
|
||||||
|
if nothreading:
|
||||||
|
runserver_args.append('--nothreading')
|
||||||
call_command("runserver", *runserver_args)
|
call_command("runserver", *runserver_args)
|
||||||
else:
|
else:
|
||||||
from workers.supervisord_util import start_server_workers
|
from workers.supervisord_util import start_server_workers
|
||||||
|
|
||||||
print()
|
print()
|
||||||
start_server_workers(host=host, port=port, daemonize=False)
|
start_server_workers(host=host, port=port, daemonize=daemonize)
|
||||||
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
|
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.argument('runserver_args', nargs=-1)
|
||||||
|
@click.option('--reload', is_flag=True, help='Enable auto-reloading when code or templates change')
|
||||||
|
@click.option('--debug', is_flag=True, help='Enable DEBUG=True mode with more verbose errors')
|
||||||
|
@click.option('--nothreading', is_flag=True, help='Force runserver to run in single-threaded mode')
|
||||||
|
@click.option('--init', is_flag=True, help='Run a full archivebox init/upgrade before starting the server')
|
||||||
|
@click.option('--daemonize', is_flag=True, help='Run the server in the background as a daemon')
|
||||||
@docstring(server.__doc__)
|
@docstring(server.__doc__)
|
||||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
def main(**kwargs):
|
||||||
parser = argparse.ArgumentParser(
|
server(**kwargs)
|
||||||
prog=__command__,
|
|
||||||
description=server.__doc__,
|
|
||||||
add_help=True,
|
|
||||||
formatter_class=SmartFormatter,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'runserver_args',
|
|
||||||
nargs='*',
|
|
||||||
type=str,
|
|
||||||
default=[SERVER_CONFIG.BIND_ADDR],
|
|
||||||
help='Arguments to pass to Django runserver'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--reload',
|
|
||||||
action='store_true',
|
|
||||||
help='Enable auto-reloading when code or templates change',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--debug',
|
|
||||||
action='store_true',
|
|
||||||
help='Enable DEBUG=True mode with more verbose errors',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--nothreading',
|
|
||||||
action='store_true',
|
|
||||||
help='Force runserver to run in single-threaded mode',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--init',
|
|
||||||
action='store_true',
|
|
||||||
help='Run a full archivebox init/upgrade before starting the server',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--quick-init', '-i',
|
|
||||||
action='store_true',
|
|
||||||
help='Run quick archivebox init/upgrade before starting the server',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--createsuperuser',
|
|
||||||
action='store_true',
|
|
||||||
help='Run archivebox manage createsuperuser before starting the server',
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--daemonize',
|
|
||||||
action='store_true',
|
|
||||||
help='Run the server in the background as a daemon',
|
|
||||||
)
|
|
||||||
command = parser.parse_args(args or ())
|
|
||||||
reject_stdin(__command__, stdin)
|
|
||||||
|
|
||||||
server(
|
|
||||||
runserver_args=command.runserver_args + (['--nothreading'] if command.nothreading else []),
|
|
||||||
reload=command.reload,
|
|
||||||
debug=command.debug,
|
|
||||||
init=command.init,
|
|
||||||
quick_init=command.quick_init,
|
|
||||||
createsuperuser=command.createsuperuser,
|
|
||||||
daemonize=command.daemonize,
|
|
||||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(args=sys.argv[1:], stdin=sys.stdin)
|
main()
|
||||||
|
|
|
@ -23,7 +23,7 @@ def get_real_name(key: str) -> str:
|
||||||
for section in CONFIGS.values():
|
for section in CONFIGS.values():
|
||||||
try:
|
try:
|
||||||
return section.aliases[key]
|
return section.aliases[key]
|
||||||
except KeyError:
|
except (KeyError, AttributeError):
|
||||||
pass
|
pass
|
||||||
return key
|
return key
|
||||||
|
|
||||||
|
@ -159,6 +159,9 @@ def write_config_file(config: Dict[str, str]) -> benedict:
|
||||||
section = section_for_key(key)
|
section = section_for_key(key)
|
||||||
assert section is not None
|
assert section is not None
|
||||||
|
|
||||||
|
if not hasattr(section, 'toml_section_header'):
|
||||||
|
raise ValueError(f'{key} is read-only (defined in {type(section).__module__}.{type(section).__name__}). Refusing to set.')
|
||||||
|
|
||||||
section_name = section.toml_section_header
|
section_name = section.toml_section_header
|
||||||
|
|
||||||
if section_name in config_file:
|
if section_name in config_file:
|
||||||
|
|
|
@ -192,7 +192,7 @@ class Snapshot(ModelWithOutputDir, ModelWithStateMachine, ABIDModel):
|
||||||
objects = SnapshotManager()
|
objects = SnapshotManager()
|
||||||
|
|
||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
print(f'{self}.save()')
|
print(f'Snapshot[{self.ABID}].save()')
|
||||||
if self.pk:
|
if self.pk:
|
||||||
existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
|
existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
|
||||||
if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
|
if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
|
||||||
|
@ -634,7 +634,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithStateMachine, ABIDModel):
|
||||||
return repr(self)
|
return repr(self)
|
||||||
|
|
||||||
def save(self, *args, write_indexes: bool=False, **kwargs):
|
def save(self, *args, write_indexes: bool=False, **kwargs):
|
||||||
print(f'{self}.save()')
|
print(f'ArchiveResult[{self.ABID}].save()')
|
||||||
# if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
|
# if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
|
||||||
# raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
|
# raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
|
||||||
if self.pk:
|
if self.pk:
|
||||||
|
|
|
@ -23,13 +23,19 @@ timezone.utc = datetime.timezone.utc
|
||||||
# Install rich for pretty tracebacks in console logs
|
# Install rich for pretty tracebacks in console logs
|
||||||
# https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler
|
# https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler
|
||||||
|
|
||||||
from rich.traceback import install
|
from rich.traceback import install # noqa
|
||||||
|
|
||||||
TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200
|
TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200
|
||||||
# os.environ.setdefault('COLUMNS', str(TERM_WIDTH))
|
# os.environ.setdefault('COLUMNS', str(TERM_WIDTH))
|
||||||
install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
|
install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
|
||||||
|
|
||||||
|
|
||||||
|
# Hide site-packages/sonic/client.py:115: SyntaxWarning
|
||||||
|
# https://github.com/xmonader/python-sonic-client/pull/18
|
||||||
|
import warnings # noqa
|
||||||
|
warnings.filterwarnings("ignore", category=SyntaxWarning, module='sonic')
|
||||||
|
|
||||||
|
# Make daphne log requests quieter and esier to read
|
||||||
from daphne import access # noqa
|
from daphne import access # noqa
|
||||||
|
|
||||||
class ModifiedAccessLogGenerator(access.AccessLogGenerator):
|
class ModifiedAccessLogGenerator(access.AccessLogGenerator):
|
||||||
|
@ -53,7 +59,7 @@ class ModifiedAccessLogGenerator(access.AccessLogGenerator):
|
||||||
|
|
||||||
# clean up the log format to mostly match the same format as django.conf.settings.LOGGING rich formats
|
# clean up the log format to mostly match the same format as django.conf.settings.LOGGING rich formats
|
||||||
self.stream.write(
|
self.stream.write(
|
||||||
"[%s] HTTP %s (%s) %s\n"
|
"%s HTTP %s %s %s\n"
|
||||||
% (
|
% (
|
||||||
date.strftime("%Y-%m-%d %H:%M:%S"),
|
date.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
request,
|
request,
|
||||||
|
|
|
@ -490,13 +490,13 @@ class ActorType(Generic[ModelType]):
|
||||||
# abx.pm.hook.on_actor_shutdown(actor=self, last_obj=last_obj, last_error=last_error)
|
# abx.pm.hook.on_actor_shutdown(actor=self, last_obj=last_obj, last_error=last_error)
|
||||||
|
|
||||||
def on_tick_start(self, obj_to_process: ModelType) -> None:
|
def on_tick_start(self, obj_to_process: ModelType) -> None:
|
||||||
print(f'🏃♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
|
# print(f'🏃♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
|
||||||
# abx.pm.hook.on_actor_tick_start(actor=self, obj_to_process=obj)
|
# abx.pm.hook.on_actor_tick_start(actor=self, obj_to_process=obj)
|
||||||
# self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
|
# self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def on_tick_end(self, obj_to_process: ModelType) -> None:
|
def on_tick_end(self, obj_to_process: ModelType) -> None:
|
||||||
print(f'🏃♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
|
# print(f'🏃♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
|
||||||
# abx.pm.hook.on_actor_tick_end(actor=self, obj_to_process=obj_to_process)
|
# abx.pm.hook.on_actor_tick_end(actor=self, obj_to_process=obj_to_process)
|
||||||
# self.timer.end()
|
# self.timer.end()
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue