move everything out of legacy folder

This commit is contained in:
Nick Sweeting 2019-04-27 17:26:24 -04:00
parent 553f312125
commit 1b8abc0961
74 changed files with 3162 additions and 2629 deletions

View file

@ -4,34 +4,17 @@ __package__ = 'archivebox.cli'
__command__ = 'archivebox schedule'
__description__ = 'Set ArchiveBox to regularly import URLs at specific times using cron'
import os
import sys
import argparse
from datetime import datetime
from crontab import CronTab, CronSlices
from typing import Optional, List, IO
from ..main import schedule
from ..util import reject_stdin
from ..config import OUTPUT_DIR
from ..legacy.util import reject_stdin
from ..legacy.config import (
OUTPUT_DIR,
LOGS_DIR,
ARCHIVEBOX_BINARY,
USER,
ANSI,
stderr,
check_data_folder,
)
CRON_COMMENT = 'archivebox_schedule'
def main(args=None):
check_data_folder()
args = sys.argv[1:] if args is None else args
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
@ -57,7 +40,7 @@ def main(args=None):
group.add_argument(
'--clear', # '-c'
action='store_true',
help=("Stop all ArchiveBox scheduled runs, clear it completely from cron"),
help=("Stop all ArchiveBox scheduled runs (remove cron jobs)"),
)
group.add_argument(
'--show', # '-s'
@ -67,13 +50,14 @@ def main(args=None):
group.add_argument(
'--foreground', '-f',
action='store_true',
help=("Launch ArchiveBox as a long-running foreground task "
help=("Launch ArchiveBox scheduler as a long-running foreground task "
"instead of using cron."),
)
group.add_argument(
'--run-all', # '-a',
action='store_true',
help='Run all the scheduled jobs once immediately, independent of their configured schedules',
help=("Run all the scheduled jobs once immediately, independent of "
"their configured schedules, can be used together with --foreground"),
)
parser.add_argument(
'import_path',
@ -83,115 +67,21 @@ def main(args=None):
help=("Check this path and import any new links on every run "
"(can be either local file or remote URL)"),
)
command = parser.parse_args(args)
reject_stdin(__command__)
command = parser.parse_args(args or ())
reject_stdin(__command__, stdin)
os.makedirs(LOGS_DIR, exist_ok=True)
cron = CronTab(user=True)
cron = dedupe_jobs(cron)
existing_jobs = list(cron.find_comment(CRON_COMMENT))
if command.foreground or command.run_all:
if command.import_path or (not existing_jobs):
stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**ANSI))
stderr(' archivebox schedule --every=hour https://example.com/some/rss/feed.xml')
raise SystemExit(1)
print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **ANSI))
if command.run_all:
try:
for job in existing_jobs:
sys.stdout.write(f' > {job.command}')
sys.stdout.flush()
job.run()
sys.stdout.write(f'\r{job.command}\n')
except KeyboardInterrupt:
print('\n{green}[√] Stopped.{reset}'.format(**ANSI))
raise SystemExit(1)
if command.foreground:
try:
for result in cron.run_scheduler():
print(result)
except KeyboardInterrupt:
print('\n{green}[√] Stopped.{reset}'.format(**ANSI))
raise SystemExit(1)
elif command.show:
if existing_jobs:
print('\n'.join(str(cmd) for cmd in existing_jobs))
else:
stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **ANSI))
stderr(' To schedule a new job, run:')
stderr(' archivebox schedule --every=[timeperiod] https://example.com/some/rss/feed.xml')
raise SystemExit(0)
elif command.clear:
print(cron.remove_all(comment=CRON_COMMENT))
cron.write()
raise SystemExit(0)
elif command.every:
quoted = lambda s: f'"{s}"' if s and ' ' in s else s
cmd = [
'cd',
quoted(OUTPUT_DIR),
'&&',
quoted(ARCHIVEBOX_BINARY),
*(('add', f'"{command.import_path}"',) if command.import_path else ('update',)),
'2>&1',
'>',
quoted(os.path.join(LOGS_DIR, 'archivebox.log')),
]
new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
if command.every in ('minute', 'hour', 'day', 'week', 'month', 'year'):
set_every = getattr(new_job.every(), command.every)
set_every()
elif CronSlices.is_valid(command.every):
new_job.setall(command.every)
else:
stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**ANSI))
stderr(' It must be one of minute/hour/day/week/month')
stderr(' or a quoted cron-format schedule like:')
stderr(' archivebox init --every=day https://example.com/some/rss/feed.xml')
stderr(' archivebox init --every="0/5 * * * *" https://example.com/some/rss/feed.xml')
raise SystemExit(1)
cron = dedupe_jobs(cron)
cron.write()
total_runs = sum(j.frequency_per_year() for j in cron)
existing_jobs = list(cron.find_comment(CRON_COMMENT))
print()
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **ANSI))
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
if total_runs > 60 and not command.quiet:
stderr()
stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
stderr(f' Congrats on being an enthusiastic internet archiver! 👌')
stderr()
stderr(' Make sure you have enough storage space available to hold all the data.')
stderr(' Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
raise SystemExit(0)
def dedupe_jobs(cron: CronTab) -> CronTab:
deduped = set()
for job in list(cron):
unique_tuple = (str(job.slices), job.command)
if unique_tuple not in deduped:
deduped.add(unique_tuple)
cron.remove(job)
for schedule, command in deduped:
job = cron.new(command=command, comment=CRON_COMMENT)
job.setall(schedule)
job.enable()
return cron
schedule(
add=command.add,
show=command.show,
clear=command.clear,
foreground=command.foreground,
run_all=command.run_all,
quiet=command.quiet,
every=command.every,
import_path=command.import_path,
out_dir=pwd or OUTPUT_DIR,
)
if __name__ == '__main__':
main()
main(args=sys.argv[1:], stdin=sys.stdin)