add pipenv, schedule cmd, logs dir, and lots more

2025-05-17 16:44:26 -04:00 · 2019-04-18 21:09:54 -04:00 · 2019-04-18 21:09:54 -04:00 · 39a0ab3013
commit 39a0ab3013
parent 4f869f235f
20 changed files with 820 additions and 188 deletions
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 [[source]]
 name = "pypi"
 url = "https://pypi.org/simple"
 verify_ssl = true
 [dev-packages]
 ipdb = "*"
 flake8 = "*"
 mypy = "*"
 django-stubs = "*"
 setuptools = "*"
 [packages]
 dataclasses = "*"
 base32-crockford = "*"
 django = "*"
 youtube-dl = "*"
 python-crontab = "*"
 croniter = "*"
 [requires]
 python_version = ">=3.6"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@ -0,0 +1,314 @@
 {
    "_meta": {
        "hash": {
            "sha256": "7f25fb9c97e469fdb787e755c5756e2be4b0b649e3c5ad8feb17200b32d3bb36"
        },
        "pipfile-spec": 6,
        "requires": {
            "python_version": ">=3.6"
        },
        "sources": [
            {
                "name": "pypi",
                "url": "https://pypi.org/simple",
                "verify_ssl": true
            }
        ]
    },
    "default": {
        "base32-crockford": {
            "hashes": [
                "sha256:115f5bd32ae32b724035cb02eb65069a8824ea08c08851eb80c8b9f63443a969",
                "sha256:295ef5ffbf6ed96b6e739ffd36be98fa7e90a206dd18c39acefb15777eedfe6e"
            ],
            "index": "pypi",
            "version": "==0.3.0"
        },
        "croniter": {
            "hashes": [
                "sha256:625949cbd38a0b2325295591940dfa5fa0dfca41d03150ae0284a924e0be10f0",
                "sha256:66b6a9c6b2d1a85d4af51453b2328be775a173e688b69eb3a96a7ec752ba77a3"
            ],
            "index": "pypi",
            "version": "==0.3.29"
        },
        "dataclasses": {
            "hashes": [
                "sha256:454a69d788c7fda44efd71e259be79577822f5e3f53f029a22d08004e951dc9f",
                "sha256:6988bd2b895eef432d562370bb707d540f32f7360ab13da45340101bc2307d84"
            ],
            "index": "pypi",
            "version": "==0.6"
        },
        "django": {
            "hashes": [
                "sha256:7c3543e4fb070d14e10926189a7fcf42ba919263b7473dceaefce34d54e8a119",
                "sha256:a2814bffd1f007805b19194eb0b9a331933b82bd5da1c3ba3d7b7ba16e06dc4b"
            ],
            "index": "pypi",
            "version": "==2.2"
        },
        "python-crontab": {
            "hashes": [
                "sha256:91ce4b245ee5e5c117aa0b21b485bc43f2d80df854a36e922b707643f50d7923"
            ],
            "index": "pypi",
            "version": "==2.3.6"
        },
        "python-dateutil": {
            "hashes": [
                "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
                "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"
            ],
            "version": "==2.8.0"
        },
        "pytz": {
            "hashes": [
                "sha256:303879e36b721603cc54604edcac9d20401bdbe31e1e4fdee5b9f98d5d31dfda",
                "sha256:d747dd3d23d77ef44c6a3526e274af6efeb0a6f1afd5a69ba4d5be4098c8e141"
            ],
            "version": "==2019.1"
        },
        "six": {
            "hashes": [
                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
            ],
            "version": "==1.12.0"
        },
        "sqlparse": {
            "hashes": [
                "sha256:40afe6b8d4b1117e7dff5504d7a8ce07d9a1b15aeeade8a2d10f130a834f8177",
                "sha256:7c3dca29c022744e95b547e867cee89f4fce4373f3549ccd8797d8eb52cdb873"
            ],
            "version": "==0.3.0"
        },
        "youtube-dl": {
            "hashes": [
                "sha256:0d25459093870bf560bccafe9015e59402d7de1b2c956593623ba4c2840153e5",
                "sha256:ea0824ae9a166059ec754c267480198a074bd899c20b2ba497809bac099cde2e"
            ],
            "index": "pypi",
            "version": "==2019.4.17"
        }
    },
    "develop": {
        "appnope": {
            "hashes": [
                "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0",
                "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"
            ],
            "markers": "sys_platform == 'darwin'",
            "version": "==0.1.0"
        },
        "backcall": {
            "hashes": [
                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
            ],
            "version": "==0.1.0"
        },
        "decorator": {
            "hashes": [
                "sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de",
                "sha256:f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6"
            ],
            "version": "==4.4.0"
        },
        "django-stubs": {
            "hashes": [
                "sha256:9c06a4b28fc8c18f6abee4f199f8ee29cb5cfcecf349e912ded31cb3526ea2b6",
                "sha256:9ef230843a24b5d74f2ebd4c60f9bea09c21911bc119d0325e8bb47e2f495e70"
            ],
            "index": "pypi",
            "version": "==0.12.1"
        },
        "entrypoints": {
            "hashes": [
                "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
                "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
            ],
            "version": "==0.3"
        },
        "flake8": {
            "hashes": [
                "sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661",
                "sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8"
            ],
            "index": "pypi",
            "version": "==3.7.7"
        },
        "ipdb": {
            "hashes": [
                "sha256:dce2112557edfe759742ca2d0fee35c59c97b0cc7a05398b791079d78f1519ce"
            ],
            "index": "pypi",
            "version": "==0.12"
        },
        "ipython": {
            "hashes": [
                "sha256:b038baa489c38f6d853a3cfc4c635b0cda66f2864d136fe8f40c1a6e334e2a6b",
                "sha256:f5102c1cd67e399ec8ea66bcebe6e3968ea25a8977e53f012963e5affeb1fe38"
            ],
            "markers": "python_version >= '3.4'",
            "version": "==7.4.0"
        },
        "ipython-genutils": {
            "hashes": [
                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
            ],
            "version": "==0.2.0"
        },
        "jedi": {
            "hashes": [
                "sha256:2bb0603e3506f708e792c7f4ad8fc2a7a9d9c2d292a358fbbd58da531695595b",
                "sha256:2c6bcd9545c7d6440951b12b44d373479bf18123a401a52025cf98563fbd826c"
            ],
            "version": "==0.13.3"
        },
        "mccabe": {
            "hashes": [
                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
            ],
            "version": "==0.6.1"
        },
        "mypy": {
            "hashes": [
                "sha256:2afe51527b1f6cdc4a5f34fc90473109b22bf7f21086ba3e9451857cf11489e6",
                "sha256:56a16df3e0abb145d8accd5dbb70eba6c4bd26e2f89042b491faa78c9635d1e2",
                "sha256:5764f10d27b2e93c84f70af5778941b8f4aa1379b2430f85c827e0f5464e8714",
                "sha256:5bbc86374f04a3aa817622f98e40375ccb28c4836f36b66706cf3c6ccce86eda",
                "sha256:6a9343089f6377e71e20ca734cd8e7ac25d36478a9df580efabfe9059819bf82",
                "sha256:6c9851bc4a23dc1d854d3f5dfd5f20a016f8da86bcdbb42687879bb5f86434b0",
                "sha256:b8e85956af3fcf043d6f87c91cbe8705073fc67029ba6e22d3468bfee42c4823",
                "sha256:b9a0af8fae490306bc112229000aa0c2ccc837b49d29a5c42e088c132a2334dd",
                "sha256:bbf643528e2a55df2c1587008d6e3bda5c0445f1240dfa85129af22ae16d7a9a",
                "sha256:c46ab3438bd21511db0f2c612d89d8344154c0c9494afc7fbc932de514cf8d15",
                "sha256:f7a83d6bd805855ef83ec605eb01ab4fa42bcef254b13631e451cbb44914a9b0"
            ],
            "index": "pypi",
            "version": "==0.701"
        },
        "mypy-extensions": {
            "hashes": [
                "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
                "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
            ],
            "version": "==0.4.1"
        },
        "parso": {
            "hashes": [
                "sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
                "sha256:2e9574cb12e7112a87253e14e2c380ce312060269d04bd018478a3c92ea9a376"
            ],
            "version": "==0.4.0"
        },
        "pexpect": {
            "hashes": [
                "sha256:2094eefdfcf37a1fdbfb9aa090862c1a4878e5c7e0e7e7088bdb511c558e5cd1",
                "sha256:9e2c1fd0e6ee3a49b28f95d4b33bc389c89b20af6a1255906e90ff1262ce62eb"
            ],
            "markers": "sys_platform != 'win32'",
            "version": "==4.7.0"
        },
        "pickleshare": {
            "hashes": [
                "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
                "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
            ],
            "version": "==0.7.5"
        },
        "prompt-toolkit": {
            "hashes": [
                "sha256:11adf3389a996a6d45cc277580d0d53e8a5afd281d0c9ec71b28e6f121463780",
                "sha256:2519ad1d8038fd5fc8e770362237ad0364d16a7650fb5724af6997ed5515e3c1",
                "sha256:977c6583ae813a37dc1c2e1b715892461fcbdaa57f6fc62f33a528c4886c8f55"
            ],
            "version": "==2.0.9"
        },
        "ptyprocess": {
            "hashes": [
                "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
                "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
            ],
            "version": "==0.6.0"
        },
        "pycodestyle": {
            "hashes": [
                "sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
                "sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"
            ],
            "version": "==2.5.0"
        },
        "pyflakes": {
            "hashes": [
                "sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0",
                "sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"
            ],
            "version": "==2.1.1"
        },
        "pygments": {
            "hashes": [
                "sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
                "sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
            ],
            "version": "==2.3.1"
        },
        "six": {
            "hashes": [
                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
            ],
            "version": "==1.12.0"
        },
        "traitlets": {
            "hashes": [
                "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
                "sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
            ],
            "version": "==4.3.2"
        },
        "typed-ast": {
            "hashes": [
                "sha256:04894d268ba6eab7e093d43107869ad49e7b5ef40d1a94243ea49b352061b200",
                "sha256:16616ece19daddc586e499a3d2f560302c11f122b9c692bc216e821ae32aa0d0",
                "sha256:252fdae740964b2d3cdfb3f84dcb4d6247a48a6abe2579e8029ab3be3cdc026c",
                "sha256:2af80a373af123d0b9f44941a46df67ef0ff7a60f95872412a145f4500a7fc99",
                "sha256:2c88d0a913229a06282b285f42a31e063c3bf9071ff65c5ea4c12acb6977c6a7",
                "sha256:2ea99c029ebd4b5a308d915cc7fb95b8e1201d60b065450d5d26deb65d3f2bc1",
                "sha256:3d2e3ab175fc097d2a51c7a0d3fda442f35ebcc93bb1d7bd9b95ad893e44c04d",
                "sha256:4766dd695548a15ee766927bf883fb90c6ac8321be5a60c141f18628fb7f8da8",
                "sha256:56b6978798502ef66625a2e0f80cf923da64e328da8bbe16c1ff928c70c873de",
                "sha256:5cddb6f8bce14325b2863f9d5ac5c51e07b71b462361fd815d1d7706d3a9d682",
                "sha256:644ee788222d81555af543b70a1098f2025db38eaa99226f3a75a6854924d4db",
                "sha256:64cf762049fc4775efe6b27161467e76d0ba145862802a65eefc8879086fc6f8",
                "sha256:68c362848d9fb71d3c3e5f43c09974a0ae319144634e7a47db62f0f2a54a7fa7",
                "sha256:6c1f3c6f6635e611d58e467bf4371883568f0de9ccc4606f17048142dec14a1f",
                "sha256:b213d4a02eec4ddf622f4d2fbc539f062af3788d1f332f028a2e19c42da53f15",
                "sha256:bb27d4e7805a7de0e35bd0cb1411bc85f807968b2b0539597a49a23b00a622ae",
                "sha256:c9d414512eaa417aadae7758bc118868cd2396b0e6138c1dd4fda96679c079d3",
                "sha256:f0937165d1e25477b01081c4763d2d9cdc3b18af69cb259dd4f640c9b900fe5e",
                "sha256:fb96a6e2c11059ecf84e6741a319f93f683e440e341d4489c9b161eca251cf2a",
                "sha256:fc71d2d6ae56a091a8d94f33ec9d0f2001d1cb1db423d8b4355debfe9ce689b7"
            ],
            "version": "==1.3.4"
        },
        "typing-extensions": {
            "hashes": [
                "sha256:07b2c978670896022a43c4b915df8958bec4a6b84add7f2c87b2b728bda3ba64",
                "sha256:f3f0e67e1d42de47b5c67c32c9b26641642e9170fe7e292991793705cd5fef7c",
                "sha256:fb2cd053238d33a8ec939190f30cfd736c00653a85a2919415cecf7dc3d9da71"
            ],
            "version": "==3.7.2"
        },
        "wcwidth": {
            "hashes": [
                "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
                "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
            ],
            "version": "==0.1.7"
        }
    }
 }
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -1,30 +1,59 @@
 __package__ = 'archivebox.cli'
 import os
 from typing import Dict
 from importlib import import_module
 CLI_DIR = os.path.dirname(os.path.abspath(__file__))
-required_attrs = ('__package__', '__command__', '__description__', 'main')
+# these common commands will appear sorted before any others for ease-of-use
 display_first = ('help', 'version', 'init', 'list', 'update', 'add', 'remove')
 # every imported command module must have these properties in order to be valid
 required_attrs = ('__package__', '__command__', 'main')
-order = ('help', 'version', 'init', 'list', 'update', 'add', 'remove')
+# basic checks to make sure imported files are valid subcommands
 is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
 is_valid_cli_module = lambda module, subcommand: (
    all(hasattr(module, attr) for attr in required_attrs)
    and module.__command__.split(' ')[-1] == subcommand
 )
 def list_subcommands() -> Dict[str, str]:
    """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
 def list_subcommands():
    COMMANDS = []
    for filename in os.listdir(CLI_DIR):
-        if filename.startswith('archivebox_') and filename.endswith('.py'):
+        if is_cli_module(filename):
            subcommand = filename.replace('archivebox_', '').replace('.py', '')
            module = import_module('.archivebox_{}'.format(subcommand), __package__)
            assert is_valid_cli_module(module, subcommand)
            COMMANDS.append((subcommand, module.__description__))  # type: ignore
            globals()[subcommand] = module.main
            module.main.__doc__ = module.__description__
-            assert all(hasattr(module, attr) for attr in required_attrs)
+    display_order = lambda cmd: (
-            assert module.__command__.split(' ')[-1] == subcommand
+        display_first.index(cmd[0])
-            COMMANDS.append((subcommand, module.__description__))
+        if cmd[0] in display_first else
        100 + len(cmd[0])
    )
-    return dict(sorted(COMMANDS, key=lambda cmd: order.index(cmd[0]) if cmd[0] in order else 10 + len(cmd[0]))) 
+    return dict(sorted(COMMANDS, key=display_order))
-def run_subcommand(subcommand: str, args=None):
+def run_subcommand(subcommand: str, args=None) -> None:
    """run a given ArchiveBox subcommand with the given list of args"""
    module = import_module('.archivebox_{}'.format(subcommand), __package__)
-    return module.main(args)    # type: ignore
+    module.main(args)    # type: ignore
 SUBCOMMANDS = list_subcommands()
 __all__ = (
    'SUBCOMMANDS',
    'list_subcommands',
    'run_subcommand',
    *SUBCOMMANDS.keys(),
 )
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@ -82,5 +82,6 @@ def main(args=None, stdin=None):
        only_new=command.only_new,
    )
 if __name__ == '__main__':
    main()
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@ -4,7 +4,6 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox init'
 __description__ = 'Initialize a new ArchiveBox collection in the current directory'
 import os
 import sys
 import argparse
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@ -0,0 +1,194 @@
 #!/usr/bin/env python3
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox schedule'
 __description__ = 'Set ArchiveBox to run regularly at a specific time'
 import os
 import sys
 import argparse
 from datetime import datetime
 from crontab import CronTab, CronSlices
 from ..legacy.util import reject_stdin
 from ..legacy.config import (
    OUTPUT_DIR,
    LOGS_DIR,
    ARCHIVEBOX_BINARY,
    USER,
    ANSI,
    stderr,
 )
 CRON_COMMENT = 'archivebox_schedule'
 def main(args=None):
    args = sys.argv[1:] if args is None else args
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=__description__,
        add_help=True,
    )
    parser.add_argument(
        '--quiet', '-q',
        action='store_true',
        help=("Don't warn about storage space."),
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--add', # '-a',
        action='store_true',
        help='Add a new scheduled ArchiveBox update job to cron',
    )
    parser.add_argument(
        '--every', # '-e',
        type=str,
        default='daily',
        help='Run ArchiveBox once every [timeperiod] (hour/day/week/month/year or cron format e.g. "0 0 * * *")',
    )
    group.add_argument(
        '--clear', # '-c'
        action='store_true',
        help=("Stop all ArchiveBox scheduled runs, clear it completely from cron"),
    )
    group.add_argument(
        '--show', # '-s'
        action='store_true',
        help=("Print a list of currently active ArchiveBox cron jobs"),
    )
    group.add_argument(
        '--foreground', '-f',
        action='store_true',
        help=("Launch ArchiveBox as a long-running foreground task "
              "instead of using cron."),
    )
    group.add_argument(
        '--run-all', # '-a',
        action='store_true',
        help='Run all the scheduled jobs once immediately, independent of their configured schedules',
    )
    parser.add_argument(
        'import_path',
        nargs='?',
        type=str,
        default=None,
        help=("Check this path and import any new links on every run "
              "(can be either local file or remote URL)"),
    )
    command = parser.parse_args(args)
    reject_stdin(__command__)
    os.makedirs(LOGS_DIR, exist_ok=True)
    cron = CronTab(user=True)
    cron = dedupe_jobs(cron)
    existing_jobs = list(cron.find_comment(CRON_COMMENT))
    if command.foreground or command.run_all:
        if command.import_path or (not existing_jobs):
            stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**ANSI))
            stderr('    archivebox schedule --every=hour https://example.com/some/rss/feed.xml')
            raise SystemExit(1)
        print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **ANSI))
        if command.run_all:
            try:
                for job in existing_jobs:
                    sys.stdout.write(f'  > {job.command}')
                    sys.stdout.flush()
                    job.run()
                    sys.stdout.write(f'\r  √ {job.command}\n')
            except KeyboardInterrupt:
                print('\n{green}[√] Stopped.{reset}'.format(**ANSI))
                raise SystemExit(1)
        if command.foreground:
            try:
                for result in cron.run_scheduler():
                    print(result)
            except KeyboardInterrupt:
                print('\n{green}[√] Stopped.{reset}'.format(**ANSI))
                raise SystemExit(1)
    elif command.show:
        if existing_jobs:
            print('\n'.join(str(cmd) for cmd in existing_jobs))
        else:
            stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **ANSI))
            stderr('    To schedule a new job, run:')
            stderr('        archivebox schedule --every=[timeperiod] https://example.com/some/rss/feed.xml')
        raise SystemExit(0)
    elif command.clear:
        print(cron.remove_all(comment=CRON_COMMENT))
        cron.write()
        raise SystemExit(0)
    elif command.every:
        quoted = lambda s: f'"{s}"' if s and ' ' in s else s
        cmd = [
            'cd',
            quoted(OUTPUT_DIR),
            '&&',
            quoted(ARCHIVEBOX_BINARY),
            *(('add', f'"{command.import_path}"',) if command.import_path else ('update',)),
            '2>&1',
            '>',
            quoted(os.path.join(LOGS_DIR, 'archivebox.log')),
        ]
        new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
        if command.every in ('minute', 'hour', 'day', 'week', 'month', 'year'):
            set_every = getattr(new_job.every(), command.every)
            set_every()
        elif CronSlices.is_valid(command.every):
            new_job.setall(command.every)
        else:
            stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**ANSI))
            stderr('    It must be one of minute/hour/day/week/month')
            stderr('    or a quoted cron-format schedule like:')
            stderr('        archivebox init --every=day https://example.com/some/rss/feed.xml')
            stderr('        archivebox init --every="0/5 * * * *" https://example.com/some/rss/feed.xml')
            raise SystemExit(1)
        cron = dedupe_jobs(cron)
        cron.write()
        total_runs = sum(j.frequency_per_year() for j in cron)
        existing_jobs = list(cron.find_comment(CRON_COMMENT))
        print()
        print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **ANSI))
        print('\n'.join(f'  > {cmd}' if str(cmd) == str(new_job) else f'    {cmd}' for cmd in existing_jobs))
        if total_runs > 60 and not command.quiet:
            stderr()
            stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
            stderr(f'    Congrats on being an enthusiastic internet archiver! 👌')
            stderr()
            stderr('    Make sure you have enough storage space available to hold all the data.')
            stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
        raise SystemExit(0)
 def dedupe_jobs(cron: CronTab) -> CronTab:
    deduped = set()
    for job in list(cron):
        unique_tuple = (str(job.slices), job.command)
        if unique_tuple not in deduped:
            deduped.add(unique_tuple)
        cron.remove(job)
    for schedule, command in deduped:
        job = cron.new(command=command, comment=CRON_COMMENT)
        job.setall(schedule)
        job.enable()
    return cron
 if __name__ == '__main__':
    main()
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@ -7,7 +7,7 @@ __description__ = 'Run the ArchiveBox HTTP server'
 import sys
 import argparse
-from ..legacy.config import setup_django
+from ..legacy.config import setup_django, OUTPUT_DIR
 from ..legacy.util import reject_stdin
@ -29,7 +29,7 @@ def main(args=None):
    command = parser.parse_args(args)
    reject_stdin(__command__)
-    setup_django()
+    setup_django(OUTPUT_DIR)
    from django.core.management import call_command
    call_command("runserver", *command.runserver_args)
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@ -7,7 +7,7 @@ __description__ = 'Enter an interactive ArchiveBox Django shell'
 import sys
 import argparse
-from ..legacy.config import setup_django
+from ..legacy.config import setup_django, OUTPUT_DIR
 from ..legacy.util import reject_stdin
@ -22,7 +22,7 @@ def main(args=None):
    parser.parse_args(args)
    reject_stdin(__command__)
-    setup_django()
+    setup_django(OUTPUT_DIR)
    from django.core.management import call_command
    call_command("shell_plus")
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -5,10 +5,8 @@ import os
 SECRET_KEY = '---------------- not a valid secret key ! ----------------'
 DEBUG = True
-OUTPUT_DIR = os.path.abspath(os.curdir)
+OUTPUT_DIR = os.path.abspath(os.getenv('OUTPUT_DIR', os.curdir))
-DATABASE_DIR_NAME = 'database'
+DATABASE_FILE = os.path.join(OUTPUT_DIR, 'index.sqlite3')
 DATABASE_FILE_NAME = 'database.sqlite3'
 DATABASE_FILE = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME, DATABASE_FILE_NAME)
 INSTALLED_APPS = [
@ -38,7 +36,7 @@ ROOT_URLCONF = 'core.urls'
 TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
-        'DIRS': ['templates'],
+        'DIRS': ['themes'],
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
--- a/archivebox/env.py
+++ b/archivebox/env.py
@ -1,15 +0,0 @@
 import os
 import sys
 PYTHON_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(PYTHON_DIR)
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "core.settings")
 import django
 django.setup()
 from django.conf import settings
 DATABASE_FILE = settings.DATABASE_FILE
--- a/archivebox/legacy/config.py
+++ b/archivebox/legacy/config.py
@ -60,7 +60,6 @@ WGET_BINARY =            os.getenv('WGET_BINARY',            'wget')
 YOUTUBEDL_BINARY =       os.getenv('YOUTUBEDL_BINARY',       'youtube-dl')
 CHROME_BINARY =          os.getenv('CHROME_BINARY',          None)
 # ******************************************************************************
 ### Terminal Configuration
@ -84,6 +83,7 @@ def stderr(*args):
    sys.stderr.write(' '.join(str(a) for a in args) + '\n')
 USER = getpass.getuser() or os.getlogin()
 ARCHIVEBOX_BINARY = sys.argv[0]
 REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))
 if OUTPUT_DIR:
@ -91,14 +91,15 @@ if OUTPUT_DIR:
 else:
    OUTPUT_DIR = os.path.abspath(os.curdir)
 SQL_INDEX_FILENAME = 'index.sqlite3'
 JSON_INDEX_FILENAME = 'index.json'
 HTML_INDEX_FILENAME = 'index.html'
 ARCHIVE_DIR_NAME = 'archive'
 SOURCES_DIR_NAME = 'sources'
-DATABASE_DIR_NAME = 'database'
+LOGS_DIR_NAME = 'logs'
 DATABASE_FILE_NAME = 'database.sqlite3'
 ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
 SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
-DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
+LOGS_DIR = os.path.join(OUTPUT_DIR, LOGS_DIR_NAME)
 DATABASE_FILE = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME, DATABASE_FILE_NAME)
 PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
 LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
@ -126,9 +127,10 @@ if USER == 'root':
    raise SystemExit(1)
 ### Check Python environment
-python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
+PYTHON_BINARY = sys.executable
-if python_vers < 3.6:
+PYTHON_VERSION = '{}.{}'.format(sys.version_info.major, sys.version_info.minor)
-    stderr('{}[X] Python version is not new enough: {} (>3.6 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
+if float(PYTHON_VERSION) < 3.6:
    stderr('{}[X] Python version is not new enough: {} (>3.6 is required){}'.format(ANSI['red'], PYTHON_VERSION, ANSI['reset']))
    stderr('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
    raise SystemExit(1)
@ -150,6 +152,7 @@ if sys.stdout.encoding.upper() not in ('UTF-8', 'UTF8'):
 def bin_version(binary: str) -> Optional[str]:
    """check the presence and return valid version line of a specified binary"""
    global HAS_INVALID_DEPENDENCIES
    binary = os.path.expanduser(binary)
    try:
@ -223,12 +226,17 @@ def find_chrome_data_dir() -> Optional[str]:
    return None
-def setup_django():
+def setup_django(out_dir: str=OUTPUT_DIR, check_db=False):
    import django
    sys.path.append(PYTHON_DIR)
    os.environ.setdefault('OUTPUT_DIR', out_dir)
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
    django.setup()
    if check_db:
        assert os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)), (
            f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {out_dir}')
 # ******************************************************************************
 # ************************ Environment & Dependencies **************************
 # ******************************************************************************
@ -338,16 +346,16 @@ try:
            'enabled': True,
            'is_valid': os.path.exists(SOURCES_DIR),
        },
        'LOGS_DIR': {
            'path': os.path.abspath(LOGS_DIR),
            'enabled': True,
            'is_valid': os.path.exists(LOGS_DIR),
        },
        'ARCHIVE_DIR': {
            'path': os.path.abspath(ARCHIVE_DIR),
            'enabled': True,
            'is_valid': os.path.exists(ARCHIVE_DIR),
        },
        'DATABASE_DIR': {
            'path': os.path.abspath(DATABASE_DIR),
            'enabled': True,
            'is_valid': os.path.exists(DATABASE_FILE),
        },
        'CHROME_USER_DATA_DIR': {
            'path': CHROME_USER_DATA_DIR and os.path.abspath(CHROME_USER_DATA_DIR),
            'enabled': USE_CHROME and CHROME_USER_DATA_DIR,
@ -361,6 +369,12 @@ try:
    }
    DEPENDENCIES = {
        'PYTHON_BINARY': {
            'path': PYTHON_BINARY,
            'version': PYTHON_VERSION,
            'enabled': True,
            'is_valid': bool(DJANGO_VERSION),
        },
        'DJANGO_BINARY': {
            'path': DJANGO_BINARY,
            'version': DJANGO_VERSION,
--- a/archivebox/legacy/index.py
+++ b/archivebox/legacy/index.py
@ -1,13 +1,17 @@
 __package__ = 'archivebox.legacy'
 import os
 import json
 from typing import List, Tuple, Optional, Iterable
 from collections import OrderedDict
 from contextlib import contextmanager
 from .schema import Link, ArchiveResult
 from .config import (
-    DATABASE_DIR,
+    SQL_INDEX_FILENAME,
-    DATABASE_FILE_NAME,
+    JSON_INDEX_FILENAME,
    HTML_INDEX_FILENAME,
    OUTPUT_DIR,
    TIMEOUT,
    URL_BLACKLIST_PTN,
@ -35,14 +39,13 @@ from .util import (
 from .parse import parse_links
 from .logs import (
    log_indexing_process_started,
    log_indexing_process_finished,
    log_indexing_started,
    log_indexing_finished,
    log_parsing_started,
    log_parsing_finished,
 )
 ### Link filtering and checking
@enforce_types
@ -117,7 +120,7 @@ def validate_links(links: Iterable[Link]) -> Iterable[Link]:
    links = uniquefied_links(links)  # merge/dedupe duplicate timestamps & urls
    if not links:
-        stderr('{red}[X] No links found in index.json{reset}'.format(**ANSI))
+        stderr('{red}[X] No links found in index.{reset}'.format(**ANSI))
        stderr('    To add a link to your archive, run:')
        stderr("        archivebox add 'https://example.com'")
        stderr()
@ -204,49 +207,54 @@ def lowest_uniq_timestamp(used_timestamps: OrderedDict, timestamp: str) -> str:
 ### Main Links Index
@contextmanager
@enforce_types
 def timed_index_update(out_path: str):
    log_indexing_started(out_path)
    timer = TimedProgress(TIMEOUT * 2, prefix='      ')
    try:
        yield
    finally:
        timer.end()
    assert os.path.exists(out_path), f'Failed to write index file: {out_path}'
    log_indexing_finished(out_path)
@enforce_types
 def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
    """create index.html file for a given list of links"""
-    log_indexing_process_started()
+    log_indexing_process_started(len(links))
-    log_indexing_started(DATABASE_DIR, DATABASE_FILE_NAME)
+    with timed_index_update(os.path.join(out_dir, SQL_INDEX_FILENAME)):
-    timer = TimedProgress(TIMEOUT * 2, prefix='      ')
+        write_sql_main_index(links, out_dir=out_dir)
    try:
        write_sql_main_index(links)
    finally:
        timer.end()
    log_indexing_finished(DATABASE_DIR, DATABASE_FILE_NAME)
-    log_indexing_started(out_dir, 'index.json')
+    with timed_index_update(os.path.join(out_dir, JSON_INDEX_FILENAME)):
    timer = TimedProgress(TIMEOUT * 2, prefix='      ')
    try:
        write_json_main_index(links, out_dir=out_dir)
    finally:
        timer.end()
    log_indexing_finished(out_dir, 'index.json')
-    log_indexing_started(out_dir, 'index.html')
+    with timed_index_update(os.path.join(out_dir, HTML_INDEX_FILENAME)):
    timer = TimedProgress(TIMEOUT * 2, prefix='      ')
    try:
        write_html_main_index(links, out_dir=out_dir, finished=finished)
-    finally:
+
-        timer.end()
+    log_indexing_process_finished()
    log_indexing_finished(out_dir, 'index.html')
@enforce_types
-def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) -> Tuple[List[Link], List[Link]]:
+def load_main_index(out_dir: str=OUTPUT_DIR) -> List[Link]:
    """parse and load existing index with any new links from import_path merged in"""
-    existing_links: List[Link] = []
+    all_links: List[Link] = []
-    if out_dir:
+    all_links = list(parse_json_main_index(out_dir))
-        existing_links = list(parse_json_main_index(out_dir))
+    links_from_sql = list(parse_sql_main_index())
-        existing_sql_links = list(parse_sql_main_index())
+    assert set(l.url for l in all_links) == set(l['url'] for l in links_from_sql)
        assert set(l.url for l in existing_links) == set(l['url'] for l in existing_sql_links)
    return all_links
@enforce_types
 def import_new_links(existing_links: List[Link], import_path: str) -> Tuple[List[Link], List[Link]]:
    new_links: List[Link] = []
-    if import_path:
+
    # parse and validate the import file
    log_parsing_started(import_path)
    raw_links, parser_name = parse_links(import_path)
@ -255,7 +263,7 @@ def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) ->
    # merge existing links in out_dir and new links
    all_links = list(validate_links(existing_links + new_links))
-    if import_path and parser_name:
+    if parser_name:
        num_parsed = len(raw_links)
        num_new_links = len(all_links) - len(existing_links)
        log_parsing_finished(num_parsed, num_new_links, parser_name)
@ -323,9 +331,3 @@ def load_link_details(link: Link, out_dir: Optional[str]=None) -> Link:
        return merge_links(existing_link, link)
    return link
--- a/archivebox/legacy/logs.py
+++ b/archivebox/legacy/logs.py
@ -6,7 +6,7 @@ from dataclasses import dataclass
 from typing import Optional, List
 from .schema import Link, ArchiveResult
-from .config import ANSI, OUTPUT_DIR
+from .config import ANSI, OUTPUT_DIR, IS_TTY
@dataclass
@ -42,7 +42,7 @@ def pretty_path(path: str) -> str:
 def log_parsing_started(source_file: str):
    start_ts = datetime.now()
    _LAST_RUN_STATS.parse_start_ts = start_ts
-    print('{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
+    print('\n{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
        start_ts.strftime('%Y-%m-%d %H:%M:%S'),
        source_file.rsplit('/', 1)[-1],
        **ANSI,
@ -56,22 +56,26 @@ def log_parsing_finished(num_parsed: int, num_new_links: int, parser_name: str):
 ### Indexing Stage
-def log_indexing_process_started():
+def log_indexing_process_started(num_links: int):
    start_ts = datetime.now()
    _LAST_RUN_STATS.index_start_ts = start_ts
    print()
-    print('{green}[*] [{}] Saving main index files...{reset}'.format(
+    print('{green}[*] [{}] Updating {} links in main index...{reset}'.format(
        start_ts.strftime('%Y-%m-%d %H:%M:%S'),
        num_links,
        **ANSI,
    ))
-def log_indexing_started(out_dir: str, out_file: str):
+def log_indexing_process_finished():
    sys.stdout.write('    > {}/{}'.format(pretty_path(out_dir), out_file))
 def log_indexing_finished(out_dir: str, out_file: str):
    end_ts = datetime.now()
    _LAST_RUN_STATS.index_end_ts = end_ts
-    print('\r    √ {}/{}'.format(out_dir, out_file))
+
 def log_indexing_started(out_path: str):
    if IS_TTY:
        sys.stdout.write(f'    > {out_path}')
 def log_indexing_finished(out_path: str):
    print(f'\r    √ {out_path}')
 ### Archiving Stage
@ -108,7 +112,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
    print('    To view your archive, open:')
    print('        {}/index.html'.format(OUTPUT_DIR))
    print('    Continue archiving where you left off by running:')
-    print('        archivebox {}'.format(timestamp))
+    print('        archivebox update --resume={}'.format(timestamp))
 def log_archiving_finished(num_links: int):
    end_ts = datetime.now()
--- a/archivebox/legacy/main.py
+++ b/archivebox/legacy/main.py
@ -9,6 +9,7 @@ from .util import enforce_types, TimedProgress
 from .index import (
    links_after_timestamp,
    load_main_index,
    import_new_links,
    write_main_index,
 )
 from .archive_methods import archive_link
@ -19,8 +20,9 @@ from .config import (
    OUTPUT_DIR,
    SOURCES_DIR,
    ARCHIVE_DIR,
-    DATABASE_DIR,
+    LOGS_DIR,
-    DATABASE_FILE,
+    JSON_INDEX_FILENAME,
    SQL_INDEX_FILENAME,
    check_dependencies,
    check_data_folder,
    setup_django,
@ -36,60 +38,85 @@ from .logs import (
 )
 ALLOWED_IN_OUTPUT_DIR = {
    '.DS_Store',
    '.venv',
    'venv',
    'virtualenv',
    '.virtualenv',
    'sources',
    'archive',
    'logs',
    'static',
 }
@enforce_types
 def init():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
-    harmless_files = {'.DS_Store', '.venv', 'venv', 'virtualenv', '.virtualenv', 'sources', 'archive', 'database', 'logs', 'static'}
+    is_empty = not len(set(os.listdir(OUTPUT_DIR)) - ALLOWED_IN_OUTPUT_DIR)
-    is_empty = not len(set(os.listdir(OUTPUT_DIR)) - harmless_files)
+    existing_index = os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
    existing_index = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))
    if is_empty:
-        stderr('{green}[+] Initializing new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI))
+        print('{green}[+] Initializing new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI))
-        write_main_index([], out_dir=OUTPUT_DIR, finished=True)
+        print('{green}----------------------------------------------------------------{reset}'.format(**ANSI))
    else:
        if existing_index:
-            stderr('{green}[√] You already have an ArchiveBox collection in the current folder.{reset}'.format(**ANSI))
+            print('{green}[√] You already have an ArchiveBox collection in the current folder.{reset}'.format(**ANSI))
-            stderr(f'    {OUTPUT_DIR}')
+            print('{green}----------------------------------------------------------------{reset}'.format(**ANSI))
-            stderr(f'    > index.html')
+            print(f'    {OUTPUT_DIR}')
            stderr(f'    > index.json')
        else:
            stderr(
-                ("{red}[X] This folder already has files in it. You must run init inside a completely empty directory.{reset}"
+                ("{red}[X] This folder appears to have non-ArchiveBox files in it. You must run 'archivebox init' inside a completely empty directory.{reset}"
                "\n\n"
                "    {lightred}Hint:{reset} To import a data folder created by an older version of ArchiveBox, \n"
-                "    just cd into the folder and run the archivebox command to pick up where you left off.\n\n"
+                "    just cd into the folder and run 'archivebox update' to pick up where you left off.\n\n"
                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
                ).format(OUTPUT_DIR, **ANSI)
            )
            raise SystemExit(1)
    os.makedirs(SOURCES_DIR, exist_ok=True)
-    stderr(f'    > sources/')
+    print(f'    > {SOURCES_DIR}')
    os.makedirs(ARCHIVE_DIR, exist_ok=True)
-    stderr(f'    > archive/')
+    print(f'    > {ARCHIVE_DIR}')
    os.makedirs(DATABASE_DIR, exist_ok=True)
-    setup_django()
+    os.makedirs(LOGS_DIR, exist_ok=True)
    print(f'    > {LOGS_DIR}')
    print('\n{green}[+] Running Django migrations...{reset}'.format(**ANSI))
    setup_django(OUTPUT_DIR, check_db=False)
    from django.core.management import call_command
-    from django.contrib.auth.models import User
+    from django.conf import settings
-    stderr(f'    > database/')
+    assert settings.DATABASE_FILE == os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME)
    print(f'    {settings.DATABASE_FILE}')
    stderr('\n{green}[+] Running Django migrations...{reset}'.format(**ANSI))
    call_command("makemigrations", interactive=False)
    call_command("migrate", interactive=False)
-    if not User.objects.filter(is_superuser=True).exists():
+    assert os.path.exists(settings.DATABASE_FILE)
        stderr('{green}[+] Creating admin user account...{reset}'.format(**ANSI))
        call_command("createsuperuser", interactive=True)
-    stderr('\n{green}------------------------------------------------------------{reset}'.format(**ANSI))
+    # from django.contrib.auth.models import User
-    stderr('{green}[√] Done. ArchiveBox collection is set up in current folder.{reset}'.format(**ANSI))
+    # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
-    stderr('    To add new links, you can run:')
+    #     print('{green}[+] Creating admin user account...{reset}'.format(**ANSI))
-    stderr("        archivebox add 'https://example.com'")
+    #     call_command("createsuperuser", interactive=True)
-    stderr()
+
-    stderr('    For more usage and examples, run:')
+    if existing_index:
-    stderr('        archivebox help')
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        write_main_index(links=list(all_links), out_dir=OUTPUT_DIR)
    else:
        write_main_index([], out_dir=OUTPUT_DIR)
    print('\n{green}----------------------------------------------------------------{reset}'.format(**ANSI))
    print('{green}[√] Done. ArchiveBox collection is set up in the current folder.{reset}'.format(**ANSI))
    print('    To add new links, you can run:')
    print("        archivebox add 'https://example.com'")
    print()
    print('    For more usage and examples, run:')
    print('        archivebox help')
@ -102,7 +129,11 @@ def update_archive_data(import_path: Optional[str]=None, resume: Optional[float]
    # Step 1: Load list of links from the existing index
    #         merge in and dedupe new links from import_path
-    all_links, new_links = load_main_index(out_dir=OUTPUT_DIR, import_path=import_path)
+    all_links: List[Link] = []
    new_links: List[Link] = []
    all_links = load_main_index(out_dir=OUTPUT_DIR)
    if import_path:
        all_links, new_links = import_new_links(all_links, import_path)
    # Step 2: Write updated index with deduped old and new links back to disk
    write_main_index(links=list(all_links), out_dir=OUTPUT_DIR)
@ -127,7 +158,7 @@ def update_archive_data(import_path: Optional[str]=None, resume: Optional[float]
    log_archiving_finished(len(links))
    # Step 4: Re-write links index with updated titles, icons, and resources
-    all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+    all_links = load_main_index(out_dir=OUTPUT_DIR)
    write_main_index(links=list(all_links), out_dir=OUTPUT_DIR, finished=True)
    return all_links
@ -152,7 +183,7 @@ def link_matches_filter(link: Link, filter_patterns: List[str], filter_type: str
 def list_archive_data(filter_patterns: Optional[List[str]]=None, filter_type: str='exact',
                      after: Optional[float]=None, before: Optional[float]=None) -> Iterable[Link]:
-    all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+    all_links = load_main_index(out_dir=OUTPUT_DIR)
    for link in all_links:
        if after is not None and float(link.timestamp) < after:
@ -198,7 +229,7 @@ def remove_archive_links(filter_patterns: List[str], filter_type: str='exact',
    timer = TimedProgress(360, prefix='      ')
    try:
        to_keep = []
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        for link in all_links:
            should_remove = (
                (after is not None and float(link.timestamp) < after)
--- a/archivebox/legacy/storage/html.py
+++ b/archivebox/legacy/storage/html.py
@ -13,6 +13,7 @@ from ..config import (
    GIT_SHA,
    FOOTER_INFO,
    ARCHIVE_DIR_NAME,
    HTML_INDEX_FILENAME,
 )
 from ..util import (
    enforce_types,
@ -44,7 +45,7 @@ def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished:
    copy_and_overwrite(join(TEMPLATES_DIR, 'static'), join(out_dir, 'static'))
    rendered_html = main_index_template(links, finished=finished)
-    atomic_write(rendered_html, join(out_dir, 'index.html'))
+    atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
@enforce_types
@ -100,7 +101,7 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
    out_dir = out_dir or link.link_dir
    rendered_html = link_details_template(link)
-    atomic_write(rendered_html, join(out_dir, 'index.html'))
+    atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
@enforce_types
--- a/archivebox/legacy/storage/json.py
+++ b/archivebox/legacy/storage/json.py
@ -1,6 +1,7 @@
 __package__ = 'archivebox.legacy.storage'
 import os
 import sys
 import json
 from datetime import datetime
@ -10,12 +11,33 @@ from ..schema import Link, ArchiveResult
 from ..config import (
    VERSION,
    OUTPUT_DIR,
    FOOTER_INFO,
    GIT_SHA,
    DEPENDENCIES,
    JSON_INDEX_FILENAME,
 )
 from ..util import (
    enforce_types,
    atomic_write,
 )
 MAIN_INDEX_HEADER = {
    'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
    'schema': 'archivebox.legacy.storage.json',
    'copyright_info': FOOTER_INFO,
    'meta': {
        'project': 'ArchiveBox',
        'cmd': sys.argv,
        'version': VERSION,
        'git_sha': GIT_SHA,
        'website': 'https://ArchiveBox.io',
        'docs': 'https://github.com/pirate/ArchiveBox/wiki',
        'source': 'https://github.com/pirate/ArchiveBox',
        'issues': 'https://github.com/pirate/ArchiveBox/issues',
        'dependencies': DEPENDENCIES,
    },
 }
 ### Main Links Index
@ -23,7 +45,7 @@ from ..util import (
 def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
    """parse a archive index json file and return the list of links"""
-    index_path = os.path.join(out_dir, 'index.json')
+    index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
    if os.path.exists(index_path):
        with open(index_path, 'r', encoding='utf-8') as f:
            links = json.load(f)['links']
@ -46,18 +68,13 @@ def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
    if links and links[0].sources:
        assert isinstance(links[0].sources[0], str)
-    path = os.path.join(out_dir, 'index.json')
+    main_index_json = {
-
+        **MAIN_INDEX_HEADER,
    index_json = {
        'info': 'ArchiveBox Index',
        'source': 'https://github.com/pirate/ArchiveBox',
        'docs': 'https://github.com/pirate/ArchiveBox/wiki',
        'version': VERSION,
        'num_links': len(links),
        'updated': datetime.now(),
        'links': links,
    }
-    atomic_write(index_json, path)
+    atomic_write(main_index_json, os.path.join(out_dir, JSON_INDEX_FILENAME))
 ### Link Details Index
@ -67,7 +84,7 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
    """write a json file with some info about the link"""
    out_dir = out_dir or link.link_dir
-    path = os.path.join(out_dir, 'index.json')
+    path = os.path.join(out_dir, JSON_INDEX_FILENAME)
    atomic_write(link._asdict(extended=True), path)
@ -75,7 +92,7 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
@enforce_types
 def parse_json_link_details(out_dir: str) -> Optional[Link]:
    """load the json link index from a given directory"""
-    existing_index = os.path.join(out_dir, 'index.json')
+    existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
    if os.path.exists(existing_index):
        with open(existing_index, 'r', encoding='utf-8') as f:
            link_json = json.load(f)
--- a/archivebox/legacy/storage/sql.py
+++ b/archivebox/legacy/storage/sql.py
@ -4,14 +4,14 @@ from typing import List, Iterator
 from ..schema import Link
 from ..util import enforce_types
-from ..config import setup_django
+from ..config import setup_django, OUTPUT_DIR
 ### Main Links Index
@enforce_types
-def parse_sql_main_index() -> Iterator[Link]:
+def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
-    setup_django()
+    setup_django(out_dir, check_db=True)
    from core.models import Page
    return (
@ -20,8 +20,8 @@ def parse_sql_main_index() -> Iterator[Link]:
    )
@enforce_types
-def write_sql_main_index(links: List[Link]) -> None:
+def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
-    setup_django()
+    setup_django(out_dir, check_db=True)
    from core.models import Page
    for link in links:
--- a/archivebox/tests.py
+++ b/archivebox/tests.py
@ -27,6 +27,11 @@ os.environ.update(TEST_CONFIG)
 from .legacy.main import init
 from .legacy.index import load_main_index
 from .legacy.config import (
    SQL_INDEX_FILENAME,
    JSON_INDEX_FILENAME,
    HTML_INDEX_FILENAME,
 )
 from .cli import (
    archivebox_init,
@ -55,12 +60,12 @@ and example14.badb
 <or>htt://example15.badc</that>
 '''
 stdout = sys.stdout
 stderr = sys.stderr
@contextmanager
 def output_hidden(show_failing=True):
    stdout = sys.stdout
    stderr = sys.stderr
    if not HIDE_CLI_OUTPUT:
        yield
        return
@ -100,6 +105,11 @@ class TestInit(unittest.TestCase):
        with output_hidden():
            archivebox_init.main([])
        assert os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
        assert os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
        assert os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
        assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
    def test_conflicting_init(self):
        with open(os.path.join(OUTPUT_DIR, 'test_conflict.txt'), 'w+') as f:
            f.write('test')
@ -108,9 +118,25 @@ class TestInit(unittest.TestCase):
            with output_hidden(show_failing=False):
                archivebox_init.main([])
            assert False, 'Init should have exited with an exception'
        except SystemExit:
            pass
        assert not os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
        assert not os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
        assert not os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
        try:
            load_main_index(out_dir=OUTPUT_DIR)
            assert False, 'load_main_index should raise an exception when no index is present'
        except:
            pass
    def test_no_dirty_state(self):
        with output_hidden():
            init()
        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
        with output_hidden():
            init()
 class TestAdd(unittest.TestCase):
    def setUp(self):
@ -125,7 +151,7 @@ class TestAdd(unittest.TestCase):
        with output_hidden():
            archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 30
    def test_add_arg_file(self):
@ -136,7 +162,7 @@ class TestAdd(unittest.TestCase):
        with output_hidden():
            archivebox_add.main([test_file])
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 12
        os.remove(test_file)
@ -144,7 +170,7 @@ class TestAdd(unittest.TestCase):
        with output_hidden():
            archivebox_add.main([], stdin=test_urls)
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 12
@ -155,29 +181,29 @@ class TestRemove(unittest.TestCase):
            init()
            archivebox_add.main([], stdin=test_urls)
-    def tearDown(self):
+    # def tearDown(self):
-        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        # shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
    def test_remove_exact(self):
        with output_hidden():
            archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 11
    def test_remove_regex(self):
        with output_hidden():
            archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', 'http(s)?:\/\/(.+\.)?(example\d\.com)'])
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 4
    def test_remove_domain(self):
        with output_hidden():
            archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
-        all_links, _ = load_main_index(out_dir=OUTPUT_DIR)
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
        assert len(all_links) == 10
    def test_remove_none(self):
@ -190,4 +216,7 @@ class TestRemove(unittest.TestCase):
 if __name__ == '__main__':
    if '--verbose' in sys.argv or '-v' in sys.argv:
        HIDE_CLI_OUTPUT = False
    unittest.main()
--- a/requirements.txt
+++ b/requirements.txt
@ -1,17 +0,0 @@
 dataclasses
 django
 base32-crockford
 setuptools
 ipdb
 mypy
 django-stubs
 flake8
 #wpull
 #pywb
 #pyppeteer
 #GitPython
 #youtube-dl
 #archivenow
 #requests
--- a/setup.py
+++ b/setup.py
@ -31,7 +31,7 @@ setuptools.setup(
        'Bug Tracker': 'https://github.com/pirate/ArchiveBox/issues',
        'Roadmap': 'https://github.com/pirate/ArchiveBox/wiki/Roadmap',
        'Changelog': 'https://github.com/pirate/ArchiveBox/wiki/Changelog',
-        'Donations': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
+        'Patreon': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
    },
    packages=setuptools.find_packages(),
    python_requires='>=3.6',
@ -40,6 +40,15 @@ setuptools.setup(
        "base32-crockford==0.3.0",
        "django==2.2",
        "django-extensions==2.1.6",
        "youtube-dl",
        # Some/all of these will likely be added in the future:
        # wpull
        # pywb
        # pyppeteer
        # archivenow
        # requests
    ],
    entry_points={
        'console_scripts': [