From 68b4c01c6b9dec3e37c20a387bd499d8344e18de Mon Sep 17 00:00:00 2001 From: Nick Sweeting <git@nicksweeting.com> Date: Tue, 2 Apr 2019 18:53:21 -0400 Subject: [PATCH] working archivebox command inside django legacy folder --- VERSION | 2 +- archivebox/__init__.py | 5 - archivebox/__main__.py | 21 ++ archivebox/archivebox/VERSION | 1 - archivebox/archivebox/__init__.py | 0 archivebox/archivebox/settings.py | 123 ---------- .../core/management/commands/archivebox.py | 5 +- archivebox/core/settings.py | 78 ++++++ archivebox/{archivebox => core}/urls.py | 0 archivebox/{archivebox => core}/wsgi.py | 0 archivebox/env.py | 11 + archivebox/legacy/__init__.py | 5 + archivebox/{core => legacy}/archive.py | 67 +++-- .../{core => legacy}/archive_methods.py | 10 +- archivebox/{core => legacy}/config.py | 17 +- archivebox/{core => legacy}/index.py | 12 +- archivebox/{core => legacy}/links.py | 6 +- archivebox/{core => legacy}/logs.py | 4 +- archivebox/{core => legacy}/parse.py | 4 +- archivebox/{core => legacy}/purge.py | 4 +- archivebox/{core => legacy}/schema.py | 0 archivebox/{ => legacy}/templates/index.html | 0 .../{ => legacy}/templates/index_row.html | 0 .../{ => legacy}/templates/link_index.html | 0 .../{ => legacy/templates}/static/archive.png | Bin .../templates}/static/bootstrap.min.css | 0 .../templates}/static/external.png | Bin .../static/jquery.dataTables.min.css | 0 .../static/jquery.dataTables.min.js | 0 .../templates}/static/jquery.min.js | 0 .../templates}/static/sort_asc.png | Bin .../templates}/static/sort_both.png | Bin .../templates}/static/sort_desc.png | Bin .../{ => legacy/templates}/static/spinner.gif | Bin archivebox/{core => legacy}/util.py | 15 +- archivebox/manage.py | 2 +- archivebox/tests/firefox_export.html | 34 --- archivebox/tests/pinboard_export.html | 12 - archivebox/tests/pinboard_export.json | 8 - archivebox/tests/pinboard_export.rss | 46 ---- archivebox/tests/pinboard_export.xml | 5 - archivebox/tests/pinboard_export_2.json | 2 - archivebox/tests/pocket_export.html | 38 --- archivebox/tests/rss_export.xml | 228 ------------------ archivebox/tests/tests.py | 92 ------- bin/README.md | 18 -- bin/archivebox | 16 +- bin/archivebox-purge | 1 - setup.py | 3 +- 49 files changed, 222 insertions(+), 673 deletions(-) create mode 100755 archivebox/__main__.py delete mode 120000 archivebox/archivebox/VERSION delete mode 100644 archivebox/archivebox/__init__.py delete mode 100644 archivebox/archivebox/settings.py create mode 100644 archivebox/core/settings.py rename archivebox/{archivebox => core}/urls.py (100%) rename archivebox/{archivebox => core}/wsgi.py (100%) create mode 100644 archivebox/env.py create mode 100644 archivebox/legacy/__init__.py rename archivebox/{core => legacy}/archive.py (85%) rename archivebox/{core => legacy}/archive_methods.py (99%) rename archivebox/{core => legacy}/config.py (95%) rename archivebox/{core => legacy}/index.py (97%) rename archivebox/{core => legacy}/links.py (96%) rename archivebox/{core => legacy}/logs.py (98%) rename archivebox/{core => legacy}/parse.py (99%) rename archivebox/{core => legacy}/purge.py (93%) rename archivebox/{core => legacy}/schema.py (100%) rename archivebox/{ => legacy}/templates/index.html (100%) rename archivebox/{ => legacy}/templates/index_row.html (100%) rename archivebox/{ => legacy}/templates/link_index.html (100%) rename archivebox/{ => legacy/templates}/static/archive.png (100%) rename archivebox/{ => legacy/templates}/static/bootstrap.min.css (100%) rename archivebox/{ => legacy/templates}/static/external.png (100%) rename archivebox/{ => legacy/templates}/static/jquery.dataTables.min.css (100%) rename archivebox/{ => legacy/templates}/static/jquery.dataTables.min.js (100%) rename archivebox/{ => legacy/templates}/static/jquery.min.js (100%) rename archivebox/{ => legacy/templates}/static/sort_asc.png (100%) rename archivebox/{ => legacy/templates}/static/sort_both.png (100%) rename archivebox/{ => legacy/templates}/static/sort_desc.png (100%) rename archivebox/{ => legacy/templates}/static/spinner.gif (100%) rename archivebox/{core => legacy}/util.py (98%) delete mode 100644 archivebox/tests/firefox_export.html delete mode 100644 archivebox/tests/pinboard_export.html delete mode 100644 archivebox/tests/pinboard_export.json delete mode 100644 archivebox/tests/pinboard_export.rss delete mode 100644 archivebox/tests/pinboard_export.xml delete mode 100644 archivebox/tests/pinboard_export_2.json delete mode 100644 archivebox/tests/pocket_export.html delete mode 100644 archivebox/tests/rss_export.xml delete mode 100755 archivebox/tests/tests.py delete mode 100644 bin/README.md mode change 120000 => 100755 bin/archivebox delete mode 120000 bin/archivebox-purge diff --git a/VERSION b/VERSION index 0d91a54c..1d0ba9ea 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.0 +0.4.0 diff --git a/archivebox/__init__.py b/archivebox/__init__.py index ab53f570..e69de29b 100644 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -1,5 +0,0 @@ - - -#__name__ = 'archivebox' -#__package__ = 'archivebox' - diff --git a/archivebox/__main__.py b/archivebox/__main__.py new file mode 100755 index 00000000..8e75ec40 --- /dev/null +++ b/archivebox/__main__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +""" +Main ArchiveBox command line application entrypoint. +""" + +__package__ = 'archivebox' + +import os +import sys + +PYTHON_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(PYTHON_DIR) + +from .env import * +from .legacy.archive import main + + +if __name__ == '__main__': + main(sys.argv) + diff --git a/archivebox/archivebox/VERSION b/archivebox/archivebox/VERSION deleted file mode 120000 index 6ff19de4..00000000 --- a/archivebox/archivebox/VERSION +++ /dev/null @@ -1 +0,0 @@ -../VERSION \ No newline at end of file diff --git a/archivebox/archivebox/__init__.py b/archivebox/archivebox/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/archivebox/settings.py b/archivebox/archivebox/settings.py deleted file mode 100644 index e027de02..00000000 --- a/archivebox/archivebox/settings.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Django settings for archivebox project. - -Generated by 'django-admin startproject' using Django 2.1.7. - -For more information on this file, see -https://docs.djangoproject.com/en/2.1/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/2.1/ref/settings/ -""" - -import os - -# Build paths inside the project like this: os.path.join(COLLECTION_DIR, ...) -REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) -COLLECTION_DIR = os.path.abspath(os.curdir) - -print(REPO_DIR) -print(COLLECTION_DIR) -raise SystemExit(0) - - -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/ - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'm-ma!-z^0b5w4%**le#ig!7-d@h($t02q*96h*-ua+$lm9bvao' - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True - -ALLOWED_HOSTS = [] - - -# Application definition - -INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - - 'core', -] - -MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -] - -ROOT_URLCONF = 'archivebox.urls' - -ACTIVE_THEME = 'default' -TEMPLATES_DIR = os.path.join(REPO_DIR, 'themes', ACTIVE_THEME) -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [TEMPLATES_DIR], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -WSGI_APPLICATION = 'archivebox.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/2.1/ref/settings/#databases - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(COLLECTION_DIR, 'database.sqlite3'), - } -} - - -# Password validation -# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, -] - - -# Internationalization -# https://docs.djangoproject.com/en/2.1/topics/i18n/ -LANGUAGE_CODE = 'en-us' -TIME_ZONE = 'UTC' -USE_I18N = True -USE_L10N = True -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/2.1/howto/static-files/ -STATIC_URL = '/static/' diff --git a/archivebox/core/management/commands/archivebox.py b/archivebox/core/management/commands/archivebox.py index 1764e4e2..c3c236e5 100644 --- a/archivebox/core/management/commands/archivebox.py +++ b/archivebox/core/management/commands/archivebox.py @@ -1,10 +1,11 @@ from django.core.management.base import BaseCommand -from core.archive import main +from legacy.archive import main + class Command(BaseCommand): help = 'ArchiveBox test.bee' def handle(self, *args, **kwargs): - main() + main(*args) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py new file mode 100644 index 00000000..0f209b4c --- /dev/null +++ b/archivebox/core/settings.py @@ -0,0 +1,78 @@ +import os + +from legacy.config import ( + REPO_DIR, + OUTPUT_DIR, + TEMPLATES_DIR, + DATABASE_DIR, +) + + +SECRET_KEY = '---------------- not a valid secret key ! ----------------' +DEBUG = True + + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + + 'core', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'core.urls' +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [TEMPLATES_DIR], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'core.wsgi.application' + + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(DATABASE_DIR, 'database.sqlite3'), + } +} + +AUTH_PASSWORD_VALIDATORS = [ + {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'}, + {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'}, + {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'}, + {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, +] + + +LANGUAGE_CODE = 'en-us' +TIME_ZONE = 'UTC' +USE_I18N = True +USE_L10N = True +USE_TZ = True + + +STATIC_URL = '/static/' diff --git a/archivebox/archivebox/urls.py b/archivebox/core/urls.py similarity index 100% rename from archivebox/archivebox/urls.py rename to archivebox/core/urls.py diff --git a/archivebox/archivebox/wsgi.py b/archivebox/core/wsgi.py similarity index 100% rename from archivebox/archivebox/wsgi.py rename to archivebox/core/wsgi.py diff --git a/archivebox/env.py b/archivebox/env.py new file mode 100644 index 00000000..3a40fab5 --- /dev/null +++ b/archivebox/env.py @@ -0,0 +1,11 @@ +import os +import sys + + +PYTHON_DIR = os.path.dirname(os.path.abspath(__file__)) + +sys.path.append(PYTHON_DIR) +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "core.settings") + +import django +django.setup() diff --git a/archivebox/legacy/__init__.py b/archivebox/legacy/__init__.py new file mode 100644 index 00000000..ab53f570 --- /dev/null +++ b/archivebox/legacy/__init__.py @@ -0,0 +1,5 @@ + + +#__name__ = 'archivebox' +#__package__ = 'archivebox' + diff --git a/archivebox/core/archive.py b/archivebox/legacy/archive.py similarity index 85% rename from archivebox/core/archive.py rename to archivebox/legacy/archive.py index e74b2644..82788c47 100755 --- a/archivebox/core/archive.py +++ b/archivebox/legacy/archive.py @@ -8,7 +8,7 @@ but you can also run it directly using `python3 archive.py` Usage & Documentation: https://github.com/pirate/ArchiveBox/Wiki """ -__package__ = 'archivebox' +__package__ = 'legacy' import os import sys @@ -16,37 +16,50 @@ import shutil from typing import List, Optional -from core.schema import Link -from core.links import links_after_timestamp -from core.index import write_links_index, load_links_index -from core.archive_methods import archive_link -from core.config import ( +from .schema import Link +from .links import links_after_timestamp +from .index import write_links_index, load_links_index +from .archive_methods import archive_link +from .config import ( ONLY_NEW, - OUTPUT_DIR, VERSION, ANSI, - CURL_VERSION, - GIT_VERSION, - WGET_VERSION, - YOUTUBEDL_VERSION, - CHROME_VERSION, + + REPO_DIR, + PYTHON_DIR, + LEGACY_DIR, + TEMPLATES_DIR, + OUTPUT_DIR, + SOURCES_DIR, + ARCHIVE_DIR, + DATABASE_DIR, + USE_CURL, USE_WGET, USE_CHROME, + FETCH_GIT, + FETCH_MEDIA, + + DJANGO_BINARY, CURL_BINARY, GIT_BINARY, WGET_BINARY, YOUTUBEDL_BINARY, CHROME_BINARY, - FETCH_GIT, - FETCH_MEDIA, + + DJANGO_VERSION, + CURL_VERSION, + GIT_VERSION, + WGET_VERSION, + YOUTUBEDL_VERSION, + CHROME_VERSION, ) -from core.util import ( +from .util import ( enforce_types, handle_stdin_import, handle_file_import, ) -from core.logs import ( +from .logs import ( log_archiving_started, log_archiving_paused, log_archiving_finished, @@ -74,9 +87,26 @@ def print_help(): print(" archivebox add --depth=1 https://example.com/feed.rss") print(" archivebox update --resume=15109948213.123") + def print_version(): print('ArchiveBox v{}'.format(__VERSION__)) print() + print('[i] Folder locations:') + print(' REPO_DIR: ', REPO_DIR) + print(' PYTHON_DIR: ', PYTHON_DIR) + print(' LEGACY_DIR: ', LEGACY_DIR) + print(' TEMPLATES_DIR: ', TEMPLATES_DIR) + print() + print(' OUTPUT_DIR: ', OUTPUT_DIR) + print(' SOURCES_DIR: ', SOURCES_DIR) + print(' ARCHIVE_DIR: ', ARCHIVE_DIR) + print(' DATABASE_DIR: ', DATABASE_DIR) + print() + print( + '[√] Django:'.ljust(14), + 'python3 {} --version\n'.format(DJANGO_BINARY), + ' '*13, DJANGO_VERSION, '\n', + ) print( '[{}] CURL:'.format('√' if USE_CURL else 'X').ljust(14), '{} --version\n'.format(shutil.which(CURL_BINARY)), @@ -132,8 +162,11 @@ def main(args=None) -> None: if not os.path.exists(OUTPUT_DIR): print('{green}[+] Created a new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI)) os.makedirs(OUTPUT_DIR) + os.makedirs(SOURCES_DIR) + os.makedirs(ARCHIVE_DIR) + os.makedirs(DATABASE_DIR) else: - not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store'}) + not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store', '.venv', 'venv', 'virtualenv', '.virtualenv'}) index_exists = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json')) if not_empty and not index_exists: print( diff --git a/archivebox/core/archive_methods.py b/archivebox/legacy/archive_methods.py similarity index 99% rename from archivebox/core/archive_methods.py rename to archivebox/legacy/archive_methods.py index add5a069..d30d008d 100644 --- a/archivebox/core/archive_methods.py +++ b/archivebox/legacy/archive_methods.py @@ -4,13 +4,13 @@ from typing import Dict, List, Tuple, Optional from collections import defaultdict from datetime import datetime -from core.schema import Link, ArchiveResult, ArchiveOutput -from core.index import ( +from .schema import Link, ArchiveResult, ArchiveOutput +from .index import ( write_link_index, patch_links_index, load_json_link_index, ) -from core.config import ( +from .config import ( CURL_BINARY, GIT_BINARY, WGET_BINARY, @@ -40,7 +40,7 @@ from core.config import ( YOUTUBEDL_VERSION, WGET_AUTO_COMPRESSION, ) -from core.util import ( +from .util import ( enforce_types, domain, extension, @@ -54,7 +54,7 @@ from core.util import ( chrome_args, run, PIPE, DEVNULL, ) -from core.logs import ( +from .logs import ( log_link_archiving_started, log_link_archiving_finished, log_archive_method_started, diff --git a/archivebox/core/config.py b/archivebox/legacy/config.py similarity index 95% rename from archivebox/core/config.py rename to archivebox/legacy/config.py index f9f5ea57..413bed68 100644 --- a/archivebox/core/config.py +++ b/archivebox/legacy/config.py @@ -1,6 +1,7 @@ import os import re import sys +import django import shutil from typing import Optional @@ -58,7 +59,6 @@ YOUTUBEDL_BINARY = os.getenv('YOUTUBEDL_BINARY', 'youtube-dl') CHROME_BINARY = os.getenv('CHROME_BINARY', None) - # ****************************************************************************** ### Terminal Configuration @@ -79,7 +79,7 @@ if not USE_COLOR: ANSI = {k: '' for k in ANSI.keys()} -REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) +REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')) if OUTPUT_DIR: OUTPUT_DIR = os.path.abspath(OUTPUT_DIR) else: @@ -87,11 +87,14 @@ else: ARCHIVE_DIR_NAME = 'archive' SOURCES_DIR_NAME = 'sources' +DATABASE_DIR_NAME = 'database' ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME) SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME) +DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME) PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox') -TEMPLATES_DIR = os.path.join(PYTHON_DIR, 'templates') +LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy') +TEMPLATES_DIR = os.path.join(LEGACY_DIR, 'templates') if COOKIES_FILE: COOKIES_FILE = os.path.abspath(COOKIES_FILE) @@ -100,8 +103,8 @@ URL_BLACKLIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE) if URL_BLACKLIST el ########################### Environment & Dependencies ######################### -VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip() -GIT_SHA = VERSION.split('+')[1] +VERSION = open(os.path.join(REPO_DIR, 'VERSION'), 'r').read().strip() +GIT_SHA = VERSION.split('+')[-1] or 'unknown' ### Check Python environment python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor)) @@ -196,6 +199,10 @@ def find_chrome_data_dir() -> Optional[str]: # ****************************************************************************** try: + ### Get Django version + DJANGO_BINARY = django.__file__.replace('__init__.py', 'bin/django-admin.py') + DJANGO_VERSION = '{}.{}.{} {} ({})'.format(*django.VERSION) + ### Make sure curl is installed if USE_CURL: USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG diff --git a/archivebox/core/index.py b/archivebox/legacy/index.py similarity index 97% rename from archivebox/core/index.py rename to archivebox/legacy/index.py index 516e4304..98d9e3df 100644 --- a/archivebox/core/index.py +++ b/archivebox/legacy/index.py @@ -5,8 +5,8 @@ from datetime import datetime from string import Template from typing import List, Tuple, Iterator, Optional, Mapping -from core.schema import Link, ArchiveResult -from core.config import ( +from .schema import Link, ArchiveResult +from .config import ( OUTPUT_DIR, TEMPLATES_DIR, VERSION, @@ -14,7 +14,7 @@ from core.config import ( FOOTER_INFO, TIMEOUT, ) -from core.util import ( +from .util import ( ts_to_date, merge_links, urlencode, @@ -27,9 +27,9 @@ from core.util import ( copy_and_overwrite, atomic_write, ) -from core.parse import parse_links -from core.links import validate_links -from core.logs import ( +from .parse import parse_links +from .links import validate_links +from .logs import ( log_indexing_process_started, log_indexing_started, log_indexing_finished, diff --git a/archivebox/core/links.py b/archivebox/legacy/links.py similarity index 96% rename from archivebox/core/links.py rename to archivebox/legacy/links.py index fa4f53e6..914c3575 100644 --- a/archivebox/core/links.py +++ b/archivebox/legacy/links.py @@ -1,14 +1,14 @@ from typing import Iterable from collections import OrderedDict -from core.schema import Link -from core.util import ( +from .schema import Link +from .util import ( scheme, fuzzy_url, merge_links, ) -from core.config import URL_BLACKLIST_PTN +from .config import URL_BLACKLIST_PTN def validate_links(links: Iterable[Link]) -> Iterable[Link]: diff --git a/archivebox/core/logs.py b/archivebox/legacy/logs.py similarity index 98% rename from archivebox/core/logs.py rename to archivebox/legacy/logs.py index 0b9243c2..d9b92422 100644 --- a/archivebox/core/logs.py +++ b/archivebox/legacy/logs.py @@ -5,8 +5,8 @@ from datetime import datetime from dataclasses import dataclass from typing import Optional -from core.schema import Link, ArchiveResult -from core.config import ANSI, OUTPUT_DIR +from .schema import Link, ArchiveResult +from .config import ANSI, OUTPUT_DIR @dataclass diff --git a/archivebox/core/parse.py b/archivebox/legacy/parse.py similarity index 99% rename from archivebox/core/parse.py rename to archivebox/legacy/parse.py index 9a6936c0..49ffa7fd 100644 --- a/archivebox/core/parse.py +++ b/archivebox/legacy/parse.py @@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable from datetime import datetime import xml.etree.ElementTree as etree -from core.config import TIMEOUT -from core.util import ( +from .config import TIMEOUT +from .util import ( htmldecode, str_between, URL_REGEX, diff --git a/archivebox/core/purge.py b/archivebox/legacy/purge.py similarity index 93% rename from archivebox/core/purge.py rename to archivebox/legacy/purge.py index d9a5deda..ddc64b6b 100755 --- a/archivebox/core/purge.py +++ b/archivebox/legacy/purge.py @@ -6,8 +6,8 @@ from os.path import exists, join from shutil import rmtree from typing import List -from core.config import ARCHIVE_DIR, OUTPUT_DIR -from core.index import parse_json_links_index, write_html_links_index, write_json_links_index +from .config import ARCHIVE_DIR, OUTPUT_DIR +from .index import parse_json_links_index, write_html_links_index, write_json_links_index def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: diff --git a/archivebox/core/schema.py b/archivebox/legacy/schema.py similarity index 100% rename from archivebox/core/schema.py rename to archivebox/legacy/schema.py diff --git a/archivebox/templates/index.html b/archivebox/legacy/templates/index.html similarity index 100% rename from archivebox/templates/index.html rename to archivebox/legacy/templates/index.html diff --git a/archivebox/templates/index_row.html b/archivebox/legacy/templates/index_row.html similarity index 100% rename from archivebox/templates/index_row.html rename to archivebox/legacy/templates/index_row.html diff --git a/archivebox/templates/link_index.html b/archivebox/legacy/templates/link_index.html similarity index 100% rename from archivebox/templates/link_index.html rename to archivebox/legacy/templates/link_index.html diff --git a/archivebox/static/archive.png b/archivebox/legacy/templates/static/archive.png similarity index 100% rename from archivebox/static/archive.png rename to archivebox/legacy/templates/static/archive.png diff --git a/archivebox/static/bootstrap.min.css b/archivebox/legacy/templates/static/bootstrap.min.css similarity index 100% rename from archivebox/static/bootstrap.min.css rename to archivebox/legacy/templates/static/bootstrap.min.css diff --git a/archivebox/static/external.png b/archivebox/legacy/templates/static/external.png similarity index 100% rename from archivebox/static/external.png rename to archivebox/legacy/templates/static/external.png diff --git a/archivebox/static/jquery.dataTables.min.css b/archivebox/legacy/templates/static/jquery.dataTables.min.css similarity index 100% rename from archivebox/static/jquery.dataTables.min.css rename to archivebox/legacy/templates/static/jquery.dataTables.min.css diff --git a/archivebox/static/jquery.dataTables.min.js b/archivebox/legacy/templates/static/jquery.dataTables.min.js similarity index 100% rename from archivebox/static/jquery.dataTables.min.js rename to archivebox/legacy/templates/static/jquery.dataTables.min.js diff --git a/archivebox/static/jquery.min.js b/archivebox/legacy/templates/static/jquery.min.js similarity index 100% rename from archivebox/static/jquery.min.js rename to archivebox/legacy/templates/static/jquery.min.js diff --git a/archivebox/static/sort_asc.png b/archivebox/legacy/templates/static/sort_asc.png similarity index 100% rename from archivebox/static/sort_asc.png rename to archivebox/legacy/templates/static/sort_asc.png diff --git a/archivebox/static/sort_both.png b/archivebox/legacy/templates/static/sort_both.png similarity index 100% rename from archivebox/static/sort_both.png rename to archivebox/legacy/templates/static/sort_both.png diff --git a/archivebox/static/sort_desc.png b/archivebox/legacy/templates/static/sort_desc.png similarity index 100% rename from archivebox/static/sort_desc.png rename to archivebox/legacy/templates/static/sort_desc.png diff --git a/archivebox/static/spinner.gif b/archivebox/legacy/templates/static/spinner.gif similarity index 100% rename from archivebox/static/spinner.gif rename to archivebox/legacy/templates/static/spinner.gif diff --git a/archivebox/core/util.py b/archivebox/legacy/util.py similarity index 98% rename from archivebox/core/util.py rename to archivebox/legacy/util.py index cf314287..8121a988 100644 --- a/archivebox/core/util.py +++ b/archivebox/legacy/util.py @@ -26,8 +26,8 @@ from subprocess import ( from base32_crockford import encode as base32_encode # type: ignore -from core.schema import Link -from core.config import ( +from .schema import Link +from .config import ( ANSI, TERM_WIDTH, SOURCES_DIR, @@ -38,9 +38,8 @@ from core.config import ( CHECK_SSL_VALIDITY, WGET_USER_AGENT, CHROME_OPTIONS, - PYTHON_DIR, ) -from core.logs import pretty_path +from .logs import pretty_path ### Parsing Helpers @@ -332,14 +331,6 @@ def wget_output_path(link: Link) -> Optional[str]: return None -@enforce_types -def read_js_script(script_name: str) -> str: - script_path = os.path.join(PYTHON_DIR, 'scripts', script_name) - - with open(script_path, 'r') as f: - return f.read().split('// INFO BELOW HERE')[0].strip() - - ### String Manipulation & Logging Helpers @enforce_types diff --git a/archivebox/manage.py b/archivebox/manage.py index cc70dfd5..52c21895 100755 --- a/archivebox/manage.py +++ b/archivebox/manage.py @@ -3,7 +3,7 @@ import os import sys if __name__ == '__main__': - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/archivebox/tests/firefox_export.html b/archivebox/tests/firefox_export.html deleted file mode 100644 index 99d0bd0e..00000000 --- a/archivebox/tests/firefox_export.html +++ /dev/null @@ -1,34 +0,0 @@ -<!DOCTYPE NETSCAPE-Bookmark-file-1> -<!-- This is an automatically generated file. - It will be read and overwritten. - DO NOT EDIT! --> -<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> -<TITLE>Bookmarks</TITLE> -<H1>Bookmarks Menu</H1> - -<DL><p> - <DT><A HREF="place:folder=BOOKMARKS_MENU&folder=UNFILED_BOOKMARKS&folder=TOOLBAR&queryType=1&sort=12&maxResults=10&excludeQueries=1" ADD_DATE="1409779227" LAST_MODIFIED="1470506008">Recently Bookmarked</A> - <DT><A HREF="place:type=6&sort=14&maxResults=10" ADD_DATE="1470506008" LAST_MODIFIED="1470506008">Recent Tags</A> - <HR> <DT><H3 ADD_DATE="1409779227" LAST_MODIFIED="1409779227">Mozilla Firefox</H3> - <DL><p> - <DT><A HREF="https://www.mozilla.org/en-US/firefox/help/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/0-1409779227970" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHWSURBVHjaYvz//z8DJQAggJiQOe/fv2fv7Oz8rays/N+VkfG/iYnJfyD/1+rVq7ffu3dPFpsBAAHEAHIBCJ85c8bN2Nj4vwsDw/8zQLwKiO8CcRoQu0DxqlWrdsHUwzBAAIGJmTNnPgYa9j8UqhFElwPxf2MIDeIrKSn9FwSJoRkAEEAM0DD4DzMAyPi/G+QKY4hh5WAXGf8PDQ0FGwJ22d27CjADAAIIrLmjo+MXA9R2kAHvGBA2wwx6B8W7od6CeQcggKCmCEL8bgwxYCbUIGTDVkHDBia+CuotgACCueD3TDQN75D4xmAvCoK9ARMHBzAw0AECiBHkAlC0Mdy7x9ABNA3obAZXIAa6iKEcGlMVQHwWyjYuL2d4v2cPg8vZswx7gHyAAAK7AOif7SAbOqCmn4Ha3AHFsIDtgPq/vLz8P4MSkJ2W9h8ggBjevXvHDo4FQUQg/kdypqCg4H8lUIACnQ/SOBMYI8bAsAJFPcj1AAEEjwVQqLpAbXmH5BJjqI0gi9DTAAgDBBCcAVLkgmQ7yKCZxpCQxqUZhAECCJ4XgMl493ug21ZD+aDAXH0WLM4A9MZPXJkJIIAwTAR5pQMalaCABQUULttBGCCAGCnNzgABBgAMJ5THwGvJLAAAAABJRU5ErkJggg==">Help and Tutorials</A> - <DT><A HREF="https://www.mozilla.org/en-US/firefox/customize/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/1-1409779227971" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHWSURBVHjaYvz//z8DJQAggJiQOe/fv2fv7Oz8rays/N+VkfG/iYnJfyD/1+rVq7ffu3dPFpsBAAHEAHIBCJ85c8bN2Nj4vwsDw/8zQLwKiO8CcRoQu0DxqlWrdsHUwzBAAIGJmTNnPgYa9j8UqhFElwPxf2MIDeIrKSn9FwSJoRkAEEAM0DD4DzMAyPi/G+QKY4hh5WAXGf8PDQ0FGwJ22d27CjADAAIIrLmjo+MXA9R2kAHvGBA2wwx6B8W7od6CeQcggKCmCEL8bgwxYCbUIGTDVkHDBia+CuotgACCueD3TDQN75D4xmAvCoK9ARMHBzAw0AECiBHkAlC0Mdy7x9ABNA3obAZXIAa6iKEcGlMVQHwWyjYuL2d4v2cPg8vZswx7gHyAAAK7AOif7SAbOqCmn4Ha3AHFsIDtgPq/vLz8P4MSkJ2W9h8ggBjevXvHDo4FQUQg/kdypqCg4H8lUIACnQ/SOBMYI8bAsAJFPcj1AAEEjwVQqLpAbXmH5BJjqI0gi9DTAAgDBBCcAVLkgmQ7yKCZxpCQxqUZhAECCJ4XgMl493ug21ZD+aDAXH0WLM4A9MZPXJkJIIAwTAR5pQMalaCABQUULttBGCCAGCnNzgABBgAMJ5THwGvJLAAAAABJRU5ErkJggg==">Customize Firefox</A> - <DT><A HREF="https://www.mozilla.org/en-US/contribute/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/2-1409779227973" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHWSURBVHjaYvz//z8DJQAggJiQOe/fv2fv7Oz8rays/N+VkfG/iYnJfyD/1+rVq7ffu3dPFpsBAAHEAHIBCJ85c8bN2Nj4vwsDw/8zQLwKiO8CcRoQu0DxqlWrdsHUwzBAAIGJmTNnPgYa9j8UqhFElwPxf2MIDeIrKSn9FwSJoRkAEEAM0DD4DzMAyPi/G+QKY4hh5WAXGf8PDQ0FGwJ22d27CjADAAIIrLmjo+MXA9R2kAHvGBA2wwx6B8W7od6CeQcggKCmCEL8bgwxYCbUIGTDVkHDBia+CuotgACCueD3TDQN75D4xmAvCoK9ARMHBzAw0AECiBHkAlC0Mdy7x9ABNA3obAZXIAa6iKEcGlMVQHwWyjYuL2d4v2cPg8vZswx7gHyAAAK7AOif7SAbOqCmn4Ha3AHFsIDtgPq/vLz8P4MSkJ2W9h8ggBjevXvHDo4FQUQg/kdypqCg4H8lUIACnQ/SOBMYI8bAsAJFPcj1AAEEjwVQqLpAbXmH5BJjqI0gi9DTAAgDBBCcAVLkgmQ7yKCZxpCQxqUZhAECCJ4XgMl493ug21ZD+aDAXH0WLM4A9MZPXJkJIIAwTAR5pQMalaCABQUULttBGCCAGCnNzgABBgAMJ5THwGvJLAAAAABJRU5ErkJggg==">Get Involved</A> - <DT><A HREF="https://www.mozilla.org/en-US/about/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/3-1409779227974" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHWSURBVHjaYvz//z8DJQAggJiQOe/fv2fv7Oz8rays/N+VkfG/iYnJfyD/1+rVq7ffu3dPFpsBAAHEAHIBCJ85c8bN2Nj4vwsDw/8zQLwKiO8CcRoQu0DxqlWrdsHUwzBAAIGJmTNnPgYa9j8UqhFElwPxf2MIDeIrKSn9FwSJoRkAEEAM0DD4DzMAyPi/G+QKY4hh5WAXGf8PDQ0FGwJ22d27CjADAAIIrLmjo+MXA9R2kAHvGBA2wwx6B8W7od6CeQcggKCmCEL8bgwxYCbUIGTDVkHDBia+CuotgACCueD3TDQN75D4xmAvCoK9ARMHBzAw0AECiBHkAlC0Mdy7x9ABNA3obAZXIAa6iKEcGlMVQHwWyjYuL2d4v2cPg8vZswx7gHyAAAK7AOif7SAbOqCmn4Ha3AHFsIDtgPq/vLz8P4MSkJ2W9h8ggBjevXvHDo4FQUQg/kdypqCg4H8lUIACnQ/SOBMYI8bAsAJFPcj1AAEEjwVQqLpAbXmH5BJjqI0gi9DTAAgDBBCcAVLkgmQ7yKCZxpCQxqUZhAECCJ4XgMl493ug21ZD+aDAXH0WLM4A9MZPXJkJIIAwTAR5pQMalaCABQUULttBGCCAGCnNzgABBgAMJ5THwGvJLAAAAABJRU5ErkJggg==">About Us</A> - </DL><p> - <DT><H3 ADD_DATE="1497562973" LAST_MODIFIED="1497562974">[Folder Name]</H3> - <DL><p> - <DT><A HREF="https://duckduckgo.com/?q=firefox+export+bookmarks&t=ffhp&ia=web" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://duckduckgo.com/favicon.ico" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAG7UlEQVRYha2XbYwbVxWG798oorJQhYKqSJPxzJ35gWDVComiKrK86/H4Y+2J21/5kIyUgiKk1GqArVpEXQlKSlFNm35IFMVREgQNJE4KAsqCpiGtsrsIJjvO2N6xvbPj9SbZJBvjdsmGzcYvP+xxxt6PbBFHeiTP9T3nfe+ZO+NrQjYZmsJ4aoo3WU+w2fqTO7S5BA839Sd3aPUEm60p3qSmMJ7N1n1gWArnqyfYbD3Bw2F2F9eYVTi1h11cwz2nlvDmLIXz/c/CmsJ46gk2W9/Fo76LR11hrXqCTVmKyKxvVmTqCTZVV1irm5dgs5+5I5bCDNQV1prtCDsrsRRmwE7QdE3hVOd7Z05N4VQ7QdOWwgx0O9dToz2+KXE7zjVqCg87xmWcYrU4p9YUHpsizqmOaTvGZWoKDzvONR5ownSJO5vJjrPZ8ruv4E5+HHfy41j862ksvDGC+p7HHmjEjrfbX1O8SceEuZ4JTWE8M1GuYcfvi8/Edmh2nMeN359Ef9xbbOL6ywdgx/kNmYnt0BwTdpzHTJRrrLkn7DibteM87GEuoymMx+qI23EehQMhFA6EYP/mXTSbzR4jn7x/DFdSsQ1NWB0T9jCXaY+x2R7xsiL6ZmIUVozX2mb47EyMwuHKMzHcna+v6oI7br4+AndOP3aczxJCiBXjtZkYRVkRfV0Dlbg3Z3UGy4ros2IUDuaer2HxxrUNxZ24+vxeuHP7cdevxL05QgghRUVkrBhFtbP66SinupOMt38IAJiYvIpj7+n4p351lfDpP5p4Nv0XfPzLcxsamI5yKiGEVGO8ZsUoiorIkGpESE0PU1QjQsoMMAPTwxRu9DPtDfjzX+mb6kL9YBz9NdyYAWbArUkqUW+uOtx2Ux7mMtVhCjf66ftPwNx8E8Xp62sKH3z5HCbys/jXuWPor+GmPMxliorIVIcpKlFvjlTCrGWG+AYhhExHOLUapXDzj/eyXZHHd7+Fw79Q1zTxpfhreOH1P+G2Po7+Gm6mI+3bUAmzViXMWqQSpSh3BqdCfKMSpXBz/s3DXZFvvHAKj+9+C3PzzVUG5uabmJtv4rY+jkqUYvopDtd+8Ag++fXnsXjOgyvf245KlGKqs9hyhFMrUQpSiVCU5Y6rCEU/Y+mDq579/mgtTaK1+De0bp3AXfNbuP3BQ1jRt/RwM7OtW5MQQsoyp1YiFKQcoTA7BsoRin70b4YxOzu7tvJ/ZnCvGlwl5rA8sRW33vkCrL1sT01CCDFlTi1HKIgZvm+gJPENM0zhprTrK5icnFxTf6UkdsUWz3q6NLIPY+6729FfywxTlKT2LTBlTjXDFGQqxFqlEGu5B/u5+IdzGxpYnti6ppgZppjew/aOOYsNsdZUiLVIKeTNTYUpNFlkzCCXmQpT9PPhkcNrGmgtTWLF2IYVfQv+/cFDuPHati6fnvVgeWIrVvQtuPajL3ZrmUEuo8kiMxWmKIW8OVIICalSiMKQvUkzwAyUQhT9jD/3NBYWFtY0URvZi9nvbEcj+zDunP9czx749KwHV77/SE8tM8AMGLI3WQpRFEJCimiyyJRCFCWp/SouyqzVb0DbH4ZlWesaWMv0WhTl9q0uSbxWCrW7TgghpBDk1KJMkZdFX0HyKkWZop/1NuKNk0dWzV2PguRV8rLoK8oUhWB7LxBCCMnLoq8gUxidLhiSN1eQKdx8dOoEAODm0jWUbt03s1QtdOf8+ekv4/A7T+C3hx5Ff74htX/9DInXCp3F9pwJDInmCkEBeVlMqwrjMSSqFYICHM7/NI3l5WW8Xz2B/aMSRj7ah1z5OEbtM3jp6E68dHQn9o9K2D8qIfOzr8Oda0hUUxXGk5fFdOc6t+pEpPoYz2WJNoygAEMWk51rzQgKMIICxlN7sbCw0DWwf1TCU2cC3c9u0kd3wsm7LFFN9TEeQxaTneuG6lvnmG74mAEn0fC3TegSzRlBAdoeP0zT7DGw+3cbG9AlmlN9jMfwt8WNoADD94CTseEXk5clAZclAZOSmCaEkMKgV7nsZ62xsTFo1z/uCv1k7EU8rx7CG39/FcWTr+DUs4/hx28/gZMjj1qFQa9CCCGTkph26hl+MbmhuLsTeoA28pKASwGqaUPtDXPhwgWf3ZzKniy+ad1c6j2mrTQb1qTEZ5wVakOi71KAanlJgB6gjQeuvD9UH+PRB2kuHxCQDwjQhzhV94vJu7du5FqtltpqtdR79+5p3bdiq6U2tYtp3S8m9SFO7eYNtm/DZxJ3hzYk+vQhTtUDAhyqh/ahemgfZl99DvPHj2D++BHMvPhtGMpXu3P0IU51Ovd/iYuyyGh+IaUN0twlP2tNDglwc8nPWtogzWl+IXVRXv8PbH/8Fyy0G0H5udT6AAAAAElFTkSuQmCC">firefox export bookmarks at DuckDuckGo</A> - <DT><A HREF="https://duckduckgo.com/?q=archive+firefox+bookmarks&t=ffab&ia=web" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://duckduckgo.com/favicon.ico" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAG7UlEQVRYha2XbYwbVxWG798oorJQhYKqSJPxzJ35gWDVComiKrK86/H4Y+2J21/5kIyUgiKk1GqArVpEXQlKSlFNm35IFMVREgQNJE4KAsqCpiGtsrsIJjvO2N6xvbPj9SbZJBvjdsmGzcYvP+xxxt6PbBFHeiTP9T3nfe+ZO+NrQjYZmsJ4aoo3WU+w2fqTO7S5BA839Sd3aPUEm60p3qSmMJ7N1n1gWArnqyfYbD3Bw2F2F9eYVTi1h11cwz2nlvDmLIXz/c/CmsJ46gk2W9/Fo76LR11hrXqCTVmKyKxvVmTqCTZVV1irm5dgs5+5I5bCDNQV1prtCDsrsRRmwE7QdE3hVOd7Z05N4VQ7QdOWwgx0O9dToz2+KXE7zjVqCg87xmWcYrU4p9YUHpsizqmOaTvGZWoKDzvONR5ownSJO5vJjrPZ8ruv4E5+HHfy41j862ksvDGC+p7HHmjEjrfbX1O8SceEuZ4JTWE8M1GuYcfvi8/Edmh2nMeN359Ef9xbbOL6ywdgx/kNmYnt0BwTdpzHTJRrrLkn7DibteM87GEuoymMx+qI23EehQMhFA6EYP/mXTSbzR4jn7x/DFdSsQ1NWB0T9jCXaY+x2R7xsiL6ZmIUVozX2mb47EyMwuHKMzHcna+v6oI7br4+AndOP3aczxJCiBXjtZkYRVkRfV0Dlbg3Z3UGy4ros2IUDuaer2HxxrUNxZ24+vxeuHP7cdevxL05QgghRUVkrBhFtbP66SinupOMt38IAJiYvIpj7+n4p351lfDpP5p4Nv0XfPzLcxsamI5yKiGEVGO8ZsUoiorIkGpESE0PU1QjQsoMMAPTwxRu9DPtDfjzX+mb6kL9YBz9NdyYAWbArUkqUW+uOtx2Ux7mMtVhCjf66ftPwNx8E8Xp62sKH3z5HCbys/jXuWPor+GmPMxliorIVIcpKlFvjlTCrGWG+AYhhExHOLUapXDzj/eyXZHHd7+Fw79Q1zTxpfhreOH1P+G2Po7+Gm6mI+3bUAmzViXMWqQSpSh3BqdCfKMSpXBz/s3DXZFvvHAKj+9+C3PzzVUG5uabmJtv4rY+jkqUYvopDtd+8Ag++fXnsXjOgyvf245KlGKqs9hyhFMrUQpSiVCU5Y6rCEU/Y+mDq579/mgtTaK1+De0bp3AXfNbuP3BQ1jRt/RwM7OtW5MQQsoyp1YiFKQcoTA7BsoRin70b4YxOzu7tvJ/ZnCvGlwl5rA8sRW33vkCrL1sT01CCDFlTi1HKIgZvm+gJPENM0zhprTrK5icnFxTf6UkdsUWz3q6NLIPY+6729FfywxTlKT2LTBlTjXDFGQqxFqlEGu5B/u5+IdzGxpYnti6ppgZppjew/aOOYsNsdZUiLVIKeTNTYUpNFlkzCCXmQpT9PPhkcNrGmgtTWLF2IYVfQv+/cFDuPHati6fnvVgeWIrVvQtuPajL3ZrmUEuo8kiMxWmKIW8OVIICalSiMKQvUkzwAyUQhT9jD/3NBYWFtY0URvZi9nvbEcj+zDunP9czx749KwHV77/SE8tM8AMGLI3WQpRFEJCimiyyJRCFCWp/SouyqzVb0DbH4ZlWesaWMv0WhTl9q0uSbxWCrW7TgghpBDk1KJMkZdFX0HyKkWZop/1NuKNk0dWzV2PguRV8rLoK8oUhWB7LxBCCMnLoq8gUxidLhiSN1eQKdx8dOoEAODm0jWUbt03s1QtdOf8+ekv4/A7T+C3hx5Ff74htX/9DInXCp3F9pwJDInmCkEBeVlMqwrjMSSqFYICHM7/NI3l5WW8Xz2B/aMSRj7ah1z5OEbtM3jp6E68dHQn9o9K2D8qIfOzr8Oda0hUUxXGk5fFdOc6t+pEpPoYz2WJNoygAEMWk51rzQgKMIICxlN7sbCw0DWwf1TCU2cC3c9u0kd3wsm7LFFN9TEeQxaTneuG6lvnmG74mAEn0fC3TegSzRlBAdoeP0zT7DGw+3cbG9AlmlN9jMfwt8WNoADD94CTseEXk5clAZclAZOSmCaEkMKgV7nsZ62xsTFo1z/uCv1k7EU8rx7CG39/FcWTr+DUs4/hx28/gZMjj1qFQa9CCCGTkph26hl+MbmhuLsTeoA28pKASwGqaUPtDXPhwgWf3ZzKniy+ad1c6j2mrTQb1qTEZ5wVakOi71KAanlJgB6gjQeuvD9UH+PRB2kuHxCQDwjQhzhV94vJu7du5FqtltpqtdR79+5p3bdiq6U2tYtp3S8m9SFO7eYNtm/DZxJ3hzYk+vQhTtUDAhyqh/ahemgfZl99DvPHj2D++BHMvPhtGMpXu3P0IU51Ovd/iYuyyGh+IaUN0twlP2tNDglwc8nPWtogzWl+IXVRXv8PbH/8Fyy0G0H5udT6AAAAAElFTkSuQmCC">archive firefox bookmarks at DuckDuckGo</A> - <DT><A HREF="https://github.com/nodiscc" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAADC0lEQVRYhb2Wv2tUQRDHv9Fwefdul535zp7GRrGz9RcoiEIK/wdBjBhFOwsVxMI/wMbSQrSyiWKwE0t/NHbRIoURJdqI2iRBBZVocxeez93L5XI68Jo3M/v5zuzOvgf0b9F7P2Uid8zsZTRb3tpu/9rabv+KZstm9tJE7njvpwDEdazb28qy3E1y2sgfXeBaj5E/SE6XZbl7YLCqBpI3jVzpF5wQskLypqqGdVcdyTeDgutPJN/03Q3n3ISRi8OCV7qx6Jyb6AkPIez9F/CqiBDC3hxcjXxbS5gz8v4gooxc7OTO1d6/DSHoXwKMvF1fRFWPd9xj3vupaPahOnYUeUyRx9WxjGYfOqM4BgCqejwh7nb90O1LVVGW5Z5qnKqGRqOxC8CmRBM3NRqNXfUTX5blnsza+6rVz2SC9vc8NH1YWZb7M1s0040ZN/JnKkhEJjcqQEQmMwJ+AhiHOnc2FdAm3wPgRgUAIMl3KYY6dxYkp1NO7/2pIcABAN77UykGyWnUx6TbnnVfnz1MVUNqm42cQzT7+pfDbGFY8K6Z2UKdE82+ItWaaDY/bAHR7HWKlRRg5NKwBRi5lBSQdQBbhsjfki00isxm7oATw6KLyInkVovMguStjPMFgNEh8EejyIvMGN6CtlrHUs5OwPWN0klez62vrdYxAPDR7EsuyMh7AMYHYG8z8l5u3Wj2BYAHAKjqjVVVqtecc0co8qQi4nubvMsQznS+YpsTwFHfbB5U584ZOWPk9xy8w7mxmqmq26PZtw5shSIXAIwZ+TzRkRkAIwkBIybyoBe0Uv03Vd3+R7aIXK0GNZvNA0VR7Kx/SFT1UK7nzrnD/QgQkaup/FGSzyqVPupq895PqeqVVqt1NNP+rrXXgpN8hh7TNV69MkXkfA9YymSN1r/GWge6KIod0Wx+NSnGpxrCJe/9SVW9DKAYREA0my+KYke/lZDkw8wVLesVQPIhBvi5GWEIp6PZx0EFRPITQziD9NT0bY4iF8zslZGf0fndztiYkZ/N7BVDuAjAbQT8X+w36KQvZccCoxkAAAAASUVORK5CYII=">nodiscc (nodiscc) · GitHub</A> - <DT><A HREF="https://github.com/pirate/ArchiveBox#troubleshooting" ADD_DATE="1497562975" LAST_MODIFIED="1497562975" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAADC0lEQVRYhb2Wv2tUQRDHv9Fwefdul535zp7GRrGz9RcoiEIK/wdBjBhFOwsVxMI/wMbSQrSyiWKwE0t/NHbRIoURJdqI2iRBBZVocxeez93L5XI68Jo3M/v5zuzOvgf0b9F7P2Uid8zsZTRb3tpu/9rabv+KZstm9tJE7njvpwDEdazb28qy3E1y2sgfXeBaj5E/SE6XZbl7YLCqBpI3jVzpF5wQskLypqqGdVcdyTeDgutPJN/03Q3n3ISRi8OCV7qx6Jyb6AkPIez9F/CqiBDC3hxcjXxbS5gz8v4gooxc7OTO1d6/DSHoXwKMvF1fRFWPd9xj3vupaPahOnYUeUyRx9WxjGYfOqM4BgCqejwh7nb90O1LVVGW5Z5qnKqGRqOxC8CmRBM3NRqNXfUTX5blnsza+6rVz2SC9vc8NH1YWZb7M1s0040ZN/JnKkhEJjcqQEQmMwJ+AhiHOnc2FdAm3wPgRgUAIMl3KYY6dxYkp1NO7/2pIcABAN77UykGyWnUx6TbnnVfnz1MVUNqm42cQzT7+pfDbGFY8K6Z2UKdE82+ItWaaDY/bAHR7HWKlRRg5NKwBRi5lBSQdQBbhsjfki00isxm7oATw6KLyInkVovMguStjPMFgNEh8EejyIvMGN6CtlrHUs5OwPWN0klez62vrdYxAPDR7EsuyMh7AMYHYG8z8l5u3Wj2BYAHAKjqjVVVqtecc0co8qQi4nubvMsQznS+YpsTwFHfbB5U584ZOWPk9xy8w7mxmqmq26PZtw5shSIXAIwZ+TzRkRkAIwkBIybyoBe0Uv03Vd3+R7aIXK0GNZvNA0VR7Kx/SFT1UK7nzrnD/QgQkaup/FGSzyqVPupq895PqeqVVqt1NNP+rrXXgpN8hh7TNV69MkXkfA9YymSN1r/GWge6KIod0Wx+NSnGpxrCJe/9SVW9DKAYREA0my+KYke/lZDkw8wVLesVQPIhBvi5GWEIp6PZx0EFRPITQziD9NT0bY4iF8zslZGf0fndztiYkZ/N7BVDuAjAbQT8X+w36KQvZccCoxkAAAAASUVORK5CYII=">pirate/ArchiveBox · Github</A> - <DT><A HREF="http://www.cs.unc.edu/~fabian/papers/foniks-oak11.pdf" ADD_DATE="1497562976" LAST_MODIFIED="1497562976" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAADC0lEQVRYhb2Wv2tUQRDHv9Fwefdul535zp7GRrGz9RcoiEIK/wdBjBhFOwsVxMI/wMbSQrSyiWKwE0t/NHbRIoURJdqI2iRBBZVocxeez93L5XI68Jo3M/v5zuzOvgf0b9F7P2Uid8zsZTRb3tpu/9rabv+KZstm9tJE7njvpwDEdazb28qy3E1y2sgfXeBaj5E/SE6XZbl7YLCqBpI3jVzpF5wQskLypqqGdVcdyTeDgutPJN/03Q3n3ISRi8OCV7qx6Jyb6AkPIez9F/CqiBDC3hxcjXxbS5gz8v4gooxc7OTO1d6/DSHoXwKMvF1fRFWPd9xj3vupaPahOnYUeUyRx9WxjGYfOqM4BgCqejwh7nb90O1LVVGW5Z5qnKqGRqOxC8CmRBM3NRqNXfUTX5blnsza+6rVz2SC9vc8NH1YWZb7M1s0040ZN/JnKkhEJjcqQEQmMwJ+AhiHOnc2FdAm3wPgRgUAIMl3KYY6dxYkp1NO7/2pIcABAN77UykGyWnUx6TbnnVfnz1MVUNqm42cQzT7+pfDbGFY8K6Z2UKdE82+ItWaaDY/bAHR7HWKlRRg5NKwBRi5lBSQdQBbhsjfki00isxm7oATw6KLyInkVovMguStjPMFgNEh8EejyIvMGN6CtlrHUs5OwPWN0klez62vrdYxAPDR7EsuyMh7AMYHYG8z8l5u3Wj2BYAHAKjqjVVVqtecc0co8qQi4nubvMsQznS+YpsTwFHfbB5U584ZOWPk9xy8w7mxmqmq26PZtw5shSIXAIwZ+TzRkRkAIwkBIybyoBe0Uv03Vd3+R7aIXK0GNZvNA0VR7Kx/SFT1UK7nzrnD/QgQkaup/FGSzyqVPupq895PqeqVVqt1NNP+rrXXgpN8hh7TNV69MkXkfA9YymSN1r/GWge6KIod0Wx+NSnGpxrCJe/9SVW9DKAYREA0my+KYke/lZDkw8wVLesVQPIhBvi5GWEIp6PZx0EFRPITQziD9NT0bY4iF8zslZGf0fndztiYkZ/N7BVDuAjAbQT8X+w36KQvZccCoxkAAAAASUVORK5CYII=">Phonotactic Reconstruction of Encrypted VoIP Conversations</A> - <DT><A HREF="https://www.ghacks.net/2009/07/23/firefox-bookmarks-archiver/" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://www.ghacks.net/wp-content/uploads/2005/10/favicon.ico" ICON="data:image/png;base64,AAABAAEAEBAAAAEAGABoAwAAFgAAACgAAAAQAAAAIAAAAAEAGAAAAAAAAAMAAAAAAAAAAAAAAAAAAAAAAAD+/P76/Pzs7Ozg2uXNyeW/ueOXmuKJleOpsOrAvubNzOff3ebv7+77/f39/f7+/v79/vv+/f77+v3s7vjIy++5vO+truh9keNggNuRpemrtua/0Orz+f79/P38/v3+/v78/f78/vna7PpyltlTfNxsjOSdruq4u+6grPFlidxxl+SdtummvN7w+Pz9/v3+//79/fv49/2cqttZgNtFeOJGeeNbhdaqtubGxeuyw/VuleFvm9ybuui2yOb5/fv+/v79/Pzj4ve8vumzuvCZqO1ojeCUsvSJq+pyneGAqOqYuupsm+F7ouCkveXk6/b8/f74+fqhsuSRouios/G5v+vDx+mjtu9Jht43h+k+iuJ5qeG61vdsnNeNsevU3vD7/v7h7fxUgNRDe+JSg96JpObByu+dt+s6ieM3jOkzj+dBkt281PK31vFmm9XC1/T7/f7d6fV8ntxfkuRFhN1Bg+NtnOalyvRElts4kuc4k+c9leWn0ezm7/ebweaNuuD3/P7z9fnY1+vV3PGsyu1bld9Cjd9nouCFw+1QnN48mONWoNnC5/fY5/XP6vqHstr1+v7x+P3AyeTAz+7f6fTS5vdhpeA5kOlssOG54/iv2e/C5/iZ0ux2s9ve8/u91un3/P72+/yCpNpIh96JtOXg8vvZ7/xjp90+meGQxezy+/ze8vpcpto/ouG+4/Tw9vb8/v78/fu91O9eldxMjdp+seLp+P7I6vdNoeJIouDC5vbw/f6Jxek9oOWi0Or4/P3+/v73/v3v+f3E1+qex/FOktubyur2+/yez+05pN93ueXy/fy94/dQo9e+3PH7/f7//v78/v38/f32+PfA2exanuBVmuHg8vza8vxVqeBPpN3a8vva6/Wm0+P0+v38/v7+/v79/f77/P77/v3t+fyrz+lwp9XT7PXl+fxost1VptbQ6fjz+Pz0/vv+/vz9/v7+/v/8/vz8/v38/vz8/fz5/P3x+v7y/Pv5+vnJ5PPQ8Pn2/Pz8/fz+/v38/f7+/v3///4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">Firefox Bookmarks Archiver - gHacks Tech News</A> - </DL><p> - <DT><H3 ADD_DATE="1409779227" LAST_MODIFIED="1470506008" PERSONAL_TOOLBAR_FOLDER="true">Bookmarks Toolbar</H3> - <DD>Add bookmarks to this folder to see them displayed on the Bookmarks Toolbar - <DL><p> - <DT><A HREF="place:sort=8&maxResults=10" ADD_DATE="1470506008" LAST_MODIFIED="1470506008">Most Visited</A> - <DT><A HREF="https://www.mozilla.org/en-US/firefox/central/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227">Getting Started</A> - </DL><p> -</DL> diff --git a/archivebox/tests/pinboard_export.html b/archivebox/tests/pinboard_export.html deleted file mode 100644 index e12b5e41..00000000 --- a/archivebox/tests/pinboard_export.html +++ /dev/null @@ -1,12 +0,0 @@ -<!DOCTYPE NETSCAPE-Bookmark-file-1> -<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> -<TITLE>Pinboard Bookmarks</TITLE> -<H1>Bookmarks</H1> -<DL> -<p> - -<DT><A HREF="https://github.com/trailofbits/algo" ADD_DATE="1542616733" PRIVATE="1" TOREAD="1" TAGS="vpn,scripts,toread">Algo VPN scripts</A> -<DT><A HREF="http://www.ulisp.com/" ADD_DATE="1542374412" PRIVATE="1" TOREAD="1" TAGS="arduino,avr,embedded,lisp,toread">uLisp</A> - -</DL> -</p> diff --git a/archivebox/tests/pinboard_export.json b/archivebox/tests/pinboard_export.json deleted file mode 100644 index c39d08dd..00000000 --- a/archivebox/tests/pinboard_export.json +++ /dev/null @@ -1,8 +0,0 @@ -[{"href":"https:\/\/en.wikipedia.org\/wiki\/International_Typographic_Style","description":"International Typographic Style - Wikipedia, the free encyclopedia","extended":"","meta":"32f4cc916e6f5919cc19aceb10559cc1","hash":"3dd64e155e16731d20350bec6bef7cb5","time":"2016-06-07T11:27:08Z","shared":"no","toread":"yes","tags":""}, -{"href":"https:\/\/news.ycombinator.com\/item?id=11686984","description":"Announcing Certbot: EFF's Client for Let's Encrypt | Hacker News","extended":"","meta":"4a49602ba5d20ec3505c75d38ebc1d63","hash":"1c1acb53a5bd520e8529ce4f9600abee","time":"2016-05-13T05:46:16Z","shared":"no","toread":"yes","tags":""}, -{"href":"https:\/\/github.com\/google\/styleguide","description":"GitHub - google\/styleguide: Style guides for Google-originated open-source projects","extended":"","meta":"15a8d50f7295f18ccb6dd19cb689c68a","hash":"1028bf9872d8e4ea1b1858f4044abb58","time":"2016-02-24T08:49:25Z","shared":"no","toread":"no","tags":"code.style.guide programming reference web.dev"}, -{"href":"http:\/\/en.wikipedia.org\/wiki\/List_of_XML_and_HTML_character_entity_references","description":"List of XML and HTML character entity references - Wikipedia, the free encyclopedia","extended":"","meta":"6683a70f0f59c92c0bfd0bce653eab69","hash":"344d975c6251a8d460971fa2c43d9bbb","time":"2014-06-16T04:17:15Z","shared":"no","toread":"no","tags":"html reference web.dev typography"}, -{"href":"https:\/\/pushover.net\/","description":"Pushover: Simple Notifications for Android, iOS, and Desktop","extended":"","meta":"1e68511234d9390d10b7772c8ccc4b9e","hash":"bb93374ead8a937b18c7c46e13168a7d","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"app android"}, -{"href":"http:\/\/www.reddit.com\/r\/Android","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 1"}, -{"href":"http:\/\/www.reddit.com\/r\/Android2","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e2","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 2"}, -{"href":"http:\/\/www.reddit.com\/r\/Android3","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e4","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 3"}] diff --git a/archivebox/tests/pinboard_export.rss b/archivebox/tests/pinboard_export.rss deleted file mode 100644 index a300720a..00000000 --- a/archivebox/tests/pinboard_export.rss +++ /dev/null @@ -1,46 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/"> - <channel rdf:about="http://pinboard.in"> - <title>Pinboard (private aaronmueller)</title> - <link>https://pinboard.in/u:aaronmueller/private/</link> - <description></description> - <items> - <rdf:Seq> - <rdf:li rdf:resource="https://mehkee.com/"/> - <rdf:li rdf:resource="https://qmk.fm/"/> - </rdf:Seq> - </items> - </channel> - - <item rdf:about="https://mehkee.com/"> - <title>Mehkee - Mechanical Keyboard Parts & Accessories</title> - <dc:date>2018-11-08T21:29:32+00:00</dc:date> - <link>https://mehkee.com/</link> - <dc:creator>aaronmueller</dc:creator> - <dc:subject>keyboard gadget diy</dc:subject> - <dc:source>http://pinboard.in/</dc:source> - <dc:identifier>http://pinboard.in/u:aaronmueller/b:xxx/</dc:identifier> - <taxo:topics> - <rdf:Bag> - <rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:keyboard"/> - <rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:gadget"/> - <rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:diy"/> - </rdf:Bag> - </taxo:topics> - </item> - <item rdf:about="https://qmk.fm/"> - <title>QMK Firmware - An open source firmware for AVR and ARM based keyboards</title> - <dc:date>2018-11-06T22:36:21+00:00</dc:date> - <link>https://qmk.fm/</link> - <dc:creator>aaronmueller</dc:creator> - <dc:subject>firmware keyboard</dc:subject> - <dc:source>http://pinboard.in/</dc:source> - <dc:identifier>http://pinboard.in/u:aaronmueller/b:xxx/</dc:identifier> - <taxo:topics> - <rdf:Bag> - <rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:firmware"/> - <rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:keyboard"/> - </rdf:Bag> - </taxo:topics> - </item> -</rdf:RDF> diff --git a/archivebox/tests/pinboard_export.xml b/archivebox/tests/pinboard_export.xml deleted file mode 100644 index 9dce0f54..00000000 --- a/archivebox/tests/pinboard_export.xml +++ /dev/null @@ -1,5 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - <posts user="aaronmueller"> -<post href="https://github.com/trailofbits/algo" time="2018-11-19T08:38:53Z" description="Algo VPN scripts" extended="" tag="vpn scripts" hash="18d708f67bb26d843b1cac4530bb52aa" shared="no" toread="yes" /> -<post href="http://www.ulisp.com/" time="2018-11-16T13:20:12Z" description="uLisp" extended="" tag="arduino avr embedded lisp" hash="2a17ae95925a03a5b9bb38cf7f6c6f9b" shared="no" toread="yes" /> -</posts> diff --git a/archivebox/tests/pinboard_export_2.json b/archivebox/tests/pinboard_export_2.json deleted file mode 100644 index b106039c..00000000 --- a/archivebox/tests/pinboard_export_2.json +++ /dev/null @@ -1,2 +0,0 @@ -[{"href":"https:\/\/github.com\/trailofbits\/algo","description":"Algo VPN scripts","extended":"","meta":"62325ba3b577683aee854d7f191034dc","hash":"18d708f67bb26d843b1cac4530bb52aa","time":"2018-11-19T08:38:53Z","shared":"no","toread":"yes","tags":"vpn scripts"}, -{"href":"http:\/\/www.ulisp.com\/","description":"uLisp","extended":"","meta":"7bd0c0ef31f69d1459e3d37366e742b3","hash":"2a17ae95925a03a5b9bb38cf7f6c6f9b","time":"2018-11-16T13:20:12Z","shared":"no","toread":"yes","tags":"arduino avr embedded lisp"}] diff --git a/archivebox/tests/pocket_export.html b/archivebox/tests/pocket_export.html deleted file mode 100644 index bb51c0c6..00000000 --- a/archivebox/tests/pocket_export.html +++ /dev/null @@ -1,38 +0,0 @@ -<!DOCTYPE html> -<html> - <!--So long and thanks for all the fish--> - <head> - <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> - <title>Pocket Export</title> - </head> - <body> - <h1>Unread</h1> - <ul> - <li><a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3110382/" time_added="1493913054" tags="">The Radical Plasticity Thesis: How the Brain Learns to be Conscious</a></li> -<li><a href="https://martinfowler.com/eaaDev/uiArchs.html" time_added="1493909628" tags="">GUI Architectures</a></li> -<li><a href="https://issuu.com/crowdcraft/docs/shanghai-talk-july-2012" time_added="1493900327" tags="make512">Shanghai Talk July 2012 by Mike Hall - issuu</a></li> -<li><a href="http://make512.weebly.com/about-us.html" time_added="1493900002" tags="">About Us - make512</a></li> -<li><a href="https://openzfsonosx.org/wiki/ZFS_on_Boot" time_added="1493887140" tags="">ZFS on Boot - OpenZFS on OS X</a></li> -<li><a href="http://www.softpanorama.org/DNS/history.shtml" time_added="1493869958" tags="">History of DNS</a></li> -<li><a href="https://chromium.googlesource.com/chromium/src/+/master/docs/linux_sandboxing.md" time_added="1493869649" tags="">Linux Sandboxing</a></li> -<li><a href="https://hackernoon.com/rems-and-ems-and-why-you-probably-dont-need-them-664b9ce1e09f" time_added="1493694979" tags="">rems and ems, and why you probably don’t need them – Hacker Noon</a></li> -<li><a href="https://wiki.archlinux.org/index.php/full_system_backup_with_rsync" time_added="1493581911" tags="">Full system backup with rsync - ArchWiki</a></li> -<li><a href="https://www.youtube.com/watch?v=iNnAQpAHfmA" time_added="1493581911" tags="">SingUnltd. - Nature Boy (Flying Lotus Massage Situation Sample?! )</a></li> - </ul> - - <h1>Read Archive</h1> - <ul> - <li><a href="https://github.com/Droogans/unmaintainable-code" time_added="1478739800" tags="">Droogans/unmaintainable-code: An easier to share version of the infamous ht</a></li> -<li><a href="http://www.benstopford.com/2015/02/14/log-structured-merge-trees/" time_added="1478739709" tags="">Log Structured Merge Trees - ben stopford</a></li> -<li><a href="http://jgthms.com/web-design-in-4-minutes/#share" time_added="1478739628" tags="">Web Design in 4 minutes</a></li> -<li><a href="https://eev.ee/blog/2016/07/26/the-hardest-problem-in-computer-science/" time_added="1478739622" tags="">The hardest problem in computer science / fuzzy notepad</a></li> -<li><a href="https://medium.com/@iamjordanlittle/9-underutilized-features-in-css-90ced6ddbfe7#.690ah7whf" time_added="1476686912" tags="">9 Underutilized Features in CSS – Medium</a></li> -<li><a href="http://themacro.com/articles/2016/09/employee-1-coinbase/" time_added="1476686907" tags="">Employee #1: Coinbase · The Macro</a></li> -<li><a href="https://juokaz.com/blog/becoming-a-cto" time_added="1476686904" tags="">Becoming a CTO // Juozas Kaziukėnas</a></li> -<li><a href="https://backchannel.com/the-internet-really-has-changed-everything-here-s-the-proof-928eaead18a8#.ekfmwcjh2" time_added="1476686896" tags="">The Internet Really Has Changed Everything. Here’s the Proof.</a></li> -<li><a href="http://www.hindawi.com/journals/ijbm/2011/172389/" time_added="1424321329" tags="">Experimental and Modeling Study of Collagen Scaffolds with the Effects of C</a></li> -<li><a href="http://search.cpan.org/dist/Locale-Maketext/lib/Locale/Maketext/TPJ13.pod?#A_Localization_Horror_Story:_It_Could_Happen_To_You" time_added="1424306906" tags="">Locale::Maketext::TPJ13 - search.cpan.org</a></li> - - </ul> - </body> -</html> diff --git a/archivebox/tests/rss_export.xml b/archivebox/tests/rss_export.xml deleted file mode 100644 index 69eb9bc2..00000000 --- a/archivebox/tests/rss_export.xml +++ /dev/null @@ -1,228 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" - xmlns:content="http://purl.org/rss/1.0/modules/content/" - xmlns:wfw="http://wellformedweb.org/CommentAPI/" - xmlns:dc="http://purl.org/dc/elements/1.1/" - xmlns:atom="http://www.w3.org/2005/Atom" - > - -<channel> - -<title>My Reading List: Read and Unread</title> -<description>Items I've saved to read</description> -<link>http://readitlaterlist.com/users/nikisweeting/feed/all</link> -<atom:link href="http://readitlaterlist.com/users/nikisweeting/feed/all" rel="self" type="application/rss+xml" /> - - -<item> -<title><![CDATA[Cell signaling]]></title> -<category>Unread</category> -<link>https://en.wikipedia.org/wiki/Cell_signaling</link> -<guid>https://en.wikipedia.org/wiki/Cell_signaling</guid> -<pubDate>Mon, 30 Oct 2017 01:12:10 -0500</pubDate> -</item> -<item> -<title><![CDATA[Hayflick limit]]></title> -<category>Unread</category> -<link>https://en.wikipedia.org/wiki/Hayflick_limit</link> -<guid>https://en.wikipedia.org/wiki/Hayflick_limit</guid> -<pubDate>Mon, 30 Oct 2017 01:11:38 -0500</pubDate> -</item> -<item> -<title><![CDATA[Even moderate drinking by parents can upset children – study]]></title> -<category>Unread</category> -<link>https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal</link> -<guid>https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal</guid> -<pubDate>Mon, 30 Oct 2017 01:11:30 -0500</pubDate> -</item> -<item> -<title><![CDATA[How Merkle trees enable the decentralized Web]]></title> -<category>Unread</category> -<link>https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web</link> -<guid>https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web</guid> -<pubDate>Mon, 30 Oct 2017 01:11:30 -0500</pubDate> -</item> -<item> -<title><![CDATA[Inertial navigation system]]></title> -<category>Unread</category> -<link>https://en.wikipedia.org/wiki/Inertial_navigation_system</link> -<guid>https://en.wikipedia.org/wiki/Inertial_navigation_system</guid> -<pubDate>Mon, 30 Oct 2017 01:10:10 -0500</pubDate> -</item> -<item> -<title><![CDATA[Dead reckoning]]></title> -<category>Unread</category> -<link>https://en.wikipedia.org/wiki/Dead_reckoning</link> -<guid>https://en.wikipedia.org/wiki/Dead_reckoning</guid> -<pubDate>Mon, 30 Oct 2017 01:10:08 -0500</pubDate> -</item> -<item> -<title><![CDATA[Calling Rust From Python]]></title> -<category>Unread</category> -<link>https://bheisler.github.io/post/calling-rust-in-python</link> -<guid>https://bheisler.github.io/post/calling-rust-in-python</guid> -<pubDate>Mon, 30 Oct 2017 01:04:33 -0500</pubDate> -</item> -<item> -<title><![CDATA[Why would anyone choose Docker over fat binaries?]]></title> -<category>Unread</category> -<link>http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries</link> -<guid>http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries</guid> -<pubDate>Sun, 29 Oct 2017 14:57:25 -0500</pubDate> -</item> -<item> -<title><![CDATA[]]></title> -<category>Unread</category> -<link>https://heml.io</link> -<guid>https://heml.io</guid> -<pubDate>Sun, 29 Oct 2017 14:55:26 -0500</pubDate> -</item> -<item> -<title><![CDATA[A surprising amount of people want to be in North Korea]]></title> -<category>Unread</category> -<link>https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad</link> -<guid>https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad</guid> -<pubDate>Sat, 28 Oct 2017 05:41:41 -0500</pubDate> -</item> -<item> -<title><![CDATA[Learning a Hierarchy]]></title> -<category>Unread</category> -<link>https://blog.openai.com/learning-a-hierarchy</link> -<guid>https://blog.openai.com/learning-a-hierarchy</guid> -<pubDate>Thu, 26 Oct 2017 16:43:48 -0500</pubDate> -</item> -<item> -<title><![CDATA[High Performance Browser Networking]]></title> -<category>Unread</category> -<link>https://hpbn.co</link> -<guid>https://hpbn.co</guid> -<pubDate>Wed, 25 Oct 2017 19:05:24 -0500</pubDate> -</item> -<item> -<title><![CDATA[What tender and juicy drama is going on at your school/workplace?]]></title> -<category>Unread</category> -<link>https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v</link> -<guid>https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v</guid> -<pubDate>Wed, 25 Oct 2017 18:05:58 -0500</pubDate> -</item> -<item> -<title><![CDATA[Using an SSH Bastion Host]]></title> -<category>Unread</category> -<link>https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host</link> -<guid>https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host</guid> -<pubDate>Wed, 25 Oct 2017 11:38:47 -0500</pubDate> -</item> -<item> -<title><![CDATA[Let's Define "undefined" | NathanShane.me]]></title> -<category>Unread</category> -<link>https://nathanshane.me/blog/let's-define-undefined</link> -<guid>https://nathanshane.me/blog/let's-define-undefined</guid> -<pubDate>Wed, 25 Oct 2017 11:32:59 -0500</pubDate> -</item> -<item> -<title><![CDATA[Control theory]]></title> -<category>Unread</category> -<link>https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function</link> -<guid>https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function</guid> -<pubDate>Tue, 24 Oct 2017 22:57:43 -0500</pubDate> -</item> -<item> -<title><![CDATA[J012-86-intractable.pdf]]></title> -<category>Unread</category> -<link>http://mit.edu/~jnt/Papers/J012-86-intractable.pdf</link> -<guid>http://mit.edu/~jnt/Papers/J012-86-intractable.pdf</guid> -<pubDate>Tue, 24 Oct 2017 22:56:32 -0500</pubDate> -</item> -<item> -<title><![CDATA[Dynamic Programming: First Principles]]></title> -<category>Unread</category> -<link>http://flawlessrhetoric.com/Dynamic-Programming-First-Principles</link> -<guid>http://flawlessrhetoric.com/Dynamic-Programming-First-Principles</guid> -<pubDate>Tue, 24 Oct 2017 22:56:30 -0500</pubDate> -</item> -<item> -<title><![CDATA[What Would Happen If There Were No Number 6?]]></title> -<category>Unread</category> -<link>https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6</link> -<guid>https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6</guid> -<pubDate>Tue, 24 Oct 2017 22:21:59 -0500</pubDate> -</item> -<item> -<title><![CDATA[Ten Basic Rules for Adventure]]></title> -<category>Unread</category> -<link>https://outsideonline.com/2252916/10-basic-rules-adventure</link> -<guid>https://outsideonline.com/2252916/10-basic-rules-adventure</guid> -<pubDate>Tue, 24 Oct 2017 20:56:25 -0500</pubDate> -</item> -<item> -<title><![CDATA[Insects Are In Serious Trouble]]></title> -<category>Unread</category> -<link>https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true</link> -<guid>https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true</guid> -<pubDate>Mon, 23 Oct 2017 23:10:10 -0500</pubDate> -</item> -<item> -<title><![CDATA[Netflix/bless]]></title> -<category>Unread</category> -<link>https://github.com/Netflix/bless</link> -<guid>https://github.com/Netflix/bless</guid> -<pubDate>Mon, 23 Oct 2017 23:04:46 -0500</pubDate> -</item> -<item> -<title><![CDATA[Getting Your First 10 Customers]]></title> -<category>Unread</category> -<link>https://stripe.com/atlas/guides/starting-sales</link> -<guid>https://stripe.com/atlas/guides/starting-sales</guid> -<pubDate>Mon, 23 Oct 2017 22:27:36 -0500</pubDate> -</item> -<item> -<title><![CDATA[GPS Hardware]]></title> -<category>Unread</category> -<link>https://novasummits.com/gps-hardware</link> -<guid>https://novasummits.com/gps-hardware</guid> -<pubDate>Mon, 23 Oct 2017 04:44:40 -0500</pubDate> -</item> -<item> -<title><![CDATA[Bicycle Tires and Tubes]]></title> -<category>Unread</category> -<link>http://sheldonbrown.com/tires.html#pressure</link> -<guid>http://sheldonbrown.com/tires.html#pressure</guid> -<pubDate>Mon, 23 Oct 2017 01:28:32 -0500</pubDate> -</item> -<item> -<title><![CDATA[Tire light is on]]></title> -<category>Unread</category> -<link>https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe</link> -<guid>https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe</guid> -<pubDate>Mon, 23 Oct 2017 01:21:42 -0500</pubDate> -</item> -<item> -<title><![CDATA[Bad_Salish_Boo ?? on Twitter]]></title> -<category>Unread</category> -<link>https://t.co/PDLlNjACv9</link> -<guid>https://t.co/PDLlNjACv9</guid> -<pubDate>Sat, 21 Oct 2017 06:48:07 -0500</pubDate> -</item> -<item> -<title><![CDATA[Is an Open Marriage a Happier Marriage?]]></title> -<category>Unread</category> -<link>https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html</link> -<guid>https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html</guid> -<pubDate>Fri, 20 Oct 2017 13:08:52 -0500</pubDate> -</item> -<item> -<title><![CDATA[The Invention of Monogamy]]></title> -<category>Unread</category> -<link>https://thenib.com/the-invention-of-monogamy</link> -<guid>https://thenib.com/the-invention-of-monogamy</guid> -<pubDate>Fri, 20 Oct 2017 12:19:00 -0500</pubDate> -</item> -<item> -<title><![CDATA[Google Chrome May Add a Permission to Stop In-Browser Cryptocurrency Miners]]></title> -<category>Unread</category> -<link>https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners</link> -<guid>https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners</guid> -<pubDate>Fri, 20 Oct 2017 03:57:41 -0500</pubDate> -</item> -</channel> - -</rss> diff --git a/archivebox/tests/tests.py b/archivebox/tests/tests.py deleted file mode 100755 index 33fd9ba4..00000000 --- a/archivebox/tests/tests.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -from os.path import dirname, pardir, join -from subprocess import check_output, check_call -from tempfile import TemporaryDirectory -from typing import List - -import pytest - - -ARCHIVER_BIN = join(dirname(__file__), pardir, 'archive.py') - - -class Helper: - def __init__(self, output_dir: str): - self.output_dir = output_dir - - def run(self, links, env=None, env_defaults=None): - if env_defaults is None: - env_defaults = { - # we don't wanna spam archive.org witin our tests.. - 'SUBMIT_ARCHIVE_DOT_ORG': 'False', - } - if env is None: - env = {} - - env = dict(**env_defaults, **env) - - jj = [] - for url in links: - jj.append({ - 'href': url, - 'description': url, - }) - input_json = join(self.output_dir, 'input.json') - with open(input_json, 'w') as fo: - json.dump(jj, fo) - - if env is None: - env = {} - env['OUTPUT_DIR'] = self.output_dir - check_call( - [ARCHIVER_BIN, input_json], - env={**os.environ.copy(), **env}, - ) - - -class TestArchiver: - def setup(self): - # self.tdir = TemporaryDirectory(dir='hello') - class AAA: - name = 'hello' - self.tdir = AAA() - - def teardown(self): - pass - # self.tdir.cleanup() - - @property - def output_dir(self): - return self.tdir.name - - def test_fetch_favicon_false(self): - h = Helper(self.output_dir) - - h.run(links=[ - 'https://google.com', - ], env={ - 'FETCH_FAVICON': 'False', - }) - # for now no asserts, good enough if it isn't failing - - def test_3000_links(self): - """ - The pages are deliberatly unreachable. The tool should gracefully process all of them even though individual links are failing. - """ - h = Helper(self.output_dir) - - h.run(links=[ - f'https://localhost:123/whatever_{i}.html' for i in range(3000) - ], env={ - 'FETCH_FAVICON': 'False', - 'FETCH_SCREENSHOT': 'False', - 'FETCH_PDF': 'False', - 'FETCH_DOM': 'False', - 'CHECK_SSL_VALIDITY': 'False', - }) - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/bin/README.md b/bin/README.md deleted file mode 100644 index 88459dda..00000000 --- a/bin/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Binaries for running ArchiveBox - -This folder contains all the executables that ArchiveBox provides. - - -# Adding it to your `$PATH` -To be able to run ArchiveBox from anywhere on your system, you can add this entire folder to your path, like so: - -**Edit `~/.bash_profile`:** -```bash -export PATH=/opt/ArchiveBox/bin:$PATH -``` - -# Running executables directly - -If you don't want to add ArchiveBox to your `$PATH` you can also call these executables directly with their full path, like so: - -`/opt/ArchiveBox/bin/ArchiveBox https://example.com/some/feed.rss` diff --git a/bin/archivebox b/bin/archivebox deleted file mode 120000 index 053f14ab..00000000 --- a/bin/archivebox +++ /dev/null @@ -1 +0,0 @@ -../archivebox/archive.py \ No newline at end of file diff --git a/bin/archivebox b/bin/archivebox new file mode 100755 index 00000000..601d4c25 --- /dev/null +++ b/bin/archivebox @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import os +import sys + + +BIN_DIR = os.path.dirname(os.path.abspath(__file__)) +REPO_DIR = os.path.abspath(os.path.join(BIN_DIR, os.pardir)) +sys.path.append(REPO_DIR) + +from archivebox.__main__ import main + + +if __name__ == '__main__': + main(sys.argv) diff --git a/bin/archivebox-purge b/bin/archivebox-purge deleted file mode 120000 index 1bb208e1..00000000 --- a/bin/archivebox-purge +++ /dev/null @@ -1 +0,0 @@ -../archivebox/purge.py \ No newline at end of file diff --git a/setup.py b/setup.py index d3ce3963..d853492b 100644 --- a/setup.py +++ b/setup.py @@ -37,10 +37,11 @@ setuptools.setup( python_requires='>=3.6', install_requires=[ "base32-crockford==0.3.0", + "django==2.2", ], entry_points={ 'console_scripts': [ - 'archivebox = archivebox.archive:main', + 'archivebox = archivebox.__main__:main', ], }, package_data={