diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py index d2f85c84..bc800e53 100644 --- a/archivebox/cli/archivebox_schedule.py +++ b/archivebox/cli/archivebox_schedule.py @@ -1,38 +1,43 @@ #!/usr/bin/env python3 __package__ = 'archivebox.cli' -__command__ = 'archivebox schedule' import sys -import argparse from pathlib import Path -from typing import Optional, List, IO -from archivebox.misc.util import docstring -from archivebox.config import DATA_DIR -from archivebox.misc.logging_util import SmartFormatter, reject_stdin -from archivebox.config.common import ARCHIVING_CONFIG +import rich_click as click + +from archivebox.misc.util import enforce_types, docstring +from archivebox.config import DATA_DIR, CONSTANTS +from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG +from archivebox.misc.logging_util import stderr +from archivebox.config.permissions import USER -# @enforce_types +CRON_COMMENT = 'ArchiveBox' + + +@enforce_types def schedule(add: bool=False, - show: bool=False, - clear: bool=False, - foreground: bool=False, - run_all: bool=False, - quiet: bool=False, - every: Optional[str]=None, - tag: str='', - depth: int=0, - overwrite: bool=False, - update: bool=not ARCHIVING_CONFIG.ONLY_NEW, - import_path: Optional[str]=None, - out_dir: Path=DATA_DIR): + show: bool=False, + clear: bool=False, + foreground: bool=False, + run_all: bool=False, + quiet: bool=False, + every: str | None=None, + tag: str='', + depth: int | str=0, + overwrite: bool=False, + update: bool=not ARCHIVING_CONFIG.ONLY_NEW, + import_path: str | None=None, + out_dir: Path=DATA_DIR) -> None: """Set ArchiveBox to regularly import URLs at specific times using cron""" + + depth = int(depth) - check_data_folder() + from crontab import CronTab, CronSlices + from archivebox.misc.system import dedupe_cron_jobs from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY - from archivebox.config.permissions import USER Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) @@ -65,7 +70,6 @@ def schedule(add: bool=False, '>>', quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'), '2>&1', - ] new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT) @@ -83,10 +87,11 @@ def schedule(add: bool=False, raise SystemExit(1) cron = dedupe_cron_jobs(cron) + print(cron) cron.write() total_runs = sum(j.frequency_per_year() for j in cron) - existing_jobs = list(cron.find_comment(CRON_COMMENT)) + existing_jobs = list(cron.find_command('archivebox')) print() print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI)) @@ -108,10 +113,6 @@ def schedule(add: bool=False, stderr(' archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml') raise SystemExit(0) - cron = CronTab(user=True) - cron = dedupe_cron_jobs(cron) - existing_jobs = list(cron.find_comment(CRON_COMMENT)) - if foreground or run_all: if not existing_jobs: stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI)) @@ -141,108 +142,25 @@ def schedule(add: bool=False, print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI)) raise SystemExit(1) - # if CAN_UPGRADE: - # hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n") - - +@click.command() +@click.option('--quiet', '-q', is_flag=True, help="Don't warn about storage space") +@click.option('--add', is_flag=True, help='Add a new scheduled ArchiveBox update job to cron') +@click.option('--every', type=str, help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")') +@click.option('--tag', '-t', default='', help='Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3') +@click.option('--depth', type=click.Choice(['0', '1']), default='0', help='Depth to archive to [0] or 1') +@click.option('--overwrite', is_flag=True, help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots') +@click.option('--update', is_flag=True, help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults') +@click.option('--clear', is_flag=True, help='Stop all ArchiveBox scheduled runs (remove cron jobs)') +@click.option('--show', is_flag=True, help='Print a list of currently active ArchiveBox cron jobs') +@click.option('--foreground', '-f', is_flag=True, help='Launch ArchiveBox scheduler as a long-running foreground task instead of using cron') +@click.option('--run-all', is_flag=True, help='Run all the scheduled jobs once immediately, independent of their configured schedules') +@click.argument('import_path', required=False) @docstring(schedule.__doc__) -def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: - parser = argparse.ArgumentParser( - prog=__command__, - description=schedule.__doc__, - add_help=True, - formatter_class=SmartFormatter, - ) - parser.add_argument( - '--quiet', '-q', - action='store_true', - help=("Don't warn about storage space."), - ) - group = parser.add_mutually_exclusive_group() - group.add_argument( - '--add', # '-a', - action='store_true', - help='Add a new scheduled ArchiveBox update job to cron', - ) - parser.add_argument( - '--every', # '-e', - type=str, - default=None, - help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")', - ) - parser.add_argument( - '--tag', '-t', - type=str, - default='', - help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3", - ) - parser.add_argument( - '--depth', # '-d', - type=int, - choices=[0, 1], - default=0, - help='Depth to archive to [0] or 1, see "add" command help for more info', - ) - parser.add_argument( - '--overwrite', - action='store_true', - help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots', - ) - parser.add_argument( - '--update', - action='store_true', - help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults', - ) - group.add_argument( - '--clear', # '-c' - action='store_true', - help=("Stop all ArchiveBox scheduled runs (remove cron jobs)"), - ) - group.add_argument( - '--show', # '-s' - action='store_true', - help=("Print a list of currently active ArchiveBox cron jobs"), - ) - group.add_argument( - '--foreground', '-f', - action='store_true', - help=("Launch ArchiveBox scheduler as a long-running foreground task " - "instead of using cron."), - ) - group.add_argument( - '--run-all', # '-a', - action='store_true', - help=("Run all the scheduled jobs once immediately, independent of " - "their configured schedules, can be used together with --foreground"), - ) - parser.add_argument( - 'import_path', - nargs='?', - type=str, - default=None, - help=("Check this path and import any new links on every run " - "(can be either local file or remote URL)"), - ) - command = parser.parse_args(args or ()) - reject_stdin(__command__, stdin) - - schedule( - add=command.add, - show=command.show, - clear=command.clear, - foreground=command.foreground, - run_all=command.run_all, - quiet=command.quiet, - every=command.every, - tag=command.tag, - depth=command.depth, - overwrite=command.overwrite, - update=command.update, - import_path=command.import_path, - out_dir=Path(pwd) if pwd else DATA_DIR, - ) +def main(**kwargs): + """Set ArchiveBox to regularly import URLs at specific times using cron""" + schedule(**kwargs) if __name__ == '__main__': - main(args=sys.argv[1:], stdin=sys.stdin) + main()