wip initial django setup

This commit is contained in:
Nick Sweeting 2019-04-02 16:36:41 -04:00
parent 03f300d050
commit 27708152d2
36 changed files with 257 additions and 59 deletions

View file

@ -1,5 +1,5 @@
__name__ = 'archivebox' #__name__ = 'archivebox'
__package__ = 'archivebox' #__package__ = 'archivebox'

View file

@ -0,0 +1 @@
../VERSION

View file

View file

@ -0,0 +1,123 @@
"""
Django settings for archivebox project.
Generated by 'django-admin startproject' using Django 2.1.7.
For more information on this file, see
https://docs.djangoproject.com/en/2.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/2.1/ref/settings/
"""
import os
# Build paths inside the project like this: os.path.join(COLLECTION_DIR, ...)
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
COLLECTION_DIR = os.path.abspath(os.curdir)
print(REPO_DIR)
print(COLLECTION_DIR)
raise SystemExit(0)
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'm-ma!-z^0b5w4%**le#ig!7-d@h($t02q*96h*-ua+$lm9bvao'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'core',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'archivebox.urls'
ACTIVE_THEME = 'default'
TEMPLATES_DIR = os.path.join(REPO_DIR, 'themes', ACTIVE_THEME)
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [TEMPLATES_DIR],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'archivebox.wsgi.application'
# Database
# https://docs.djangoproject.com/en/2.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(COLLECTION_DIR, 'database.sqlite3'),
}
}
# Password validation
# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/2.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/2.1/howto/static-files/
STATIC_URL = '/static/'

View file

@ -0,0 +1,21 @@
"""archivebox URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/2.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path
urlpatterns = [
path('admin/', admin.site.urls),
]

View file

@ -0,0 +1,16 @@
"""
WSGI config for archivebox project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
application = get_wsgi_application()

View file

3
archivebox/core/admin.py Normal file
View file

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

5
archivebox/core/apps.py Normal file
View file

@ -0,0 +1,5 @@
from django.apps import AppConfig
class CoreConfig(AppConfig):
name = 'core'

View file

@ -16,11 +16,11 @@ import shutil
from typing import List, Optional from typing import List, Optional
from .schema import Link from core.schema import Link
from .links import links_after_timestamp from core.links import links_after_timestamp
from .index import write_links_index, load_links_index from core.index import write_links_index, load_links_index
from .archive_methods import archive_link from core.archive_methods import archive_link
from .config import ( from core.config import (
ONLY_NEW, ONLY_NEW,
OUTPUT_DIR, OUTPUT_DIR,
VERSION, VERSION,
@ -41,12 +41,12 @@ from .config import (
FETCH_GIT, FETCH_GIT,
FETCH_MEDIA, FETCH_MEDIA,
) )
from .util import ( from core.util import (
enforce_types, enforce_types,
handle_stdin_import, handle_stdin_import,
handle_file_import, handle_file_import,
) )
from .logs import ( from core.logs import (
log_archiving_started, log_archiving_started,
log_archiving_paused, log_archiving_paused,
log_archiving_finished, log_archiving_finished,
@ -142,7 +142,7 @@ def main(args=None) -> None:
" If you're trying to create a new archive, you must run archivebox inside a completely empty directory." " If you're trying to create a new archive, you must run archivebox inside a completely empty directory."
"\n\n" "\n\n"
" {lightred}Hint:{reset} To import a data folder created by an older version of ArchiveBox, \n" " {lightred}Hint:{reset} To import a data folder created by an older version of ArchiveBox, \n"
" just cd into the folder and run the archivebox comamnd to pick up where you left off.\n\n" " just cd into the folder and run the archivebox command to pick up where you left off.\n\n"
" (Always make sure your data folder is backed up first before updating ArchiveBox)" " (Always make sure your data folder is backed up first before updating ArchiveBox)"
).format(OUTPUT_DIR, **ANSI) ).format(OUTPUT_DIR, **ANSI)
) )

View file

@ -4,13 +4,13 @@ from typing import Dict, List, Tuple, Optional
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from .schema import Link, ArchiveResult, ArchiveOutput from core.schema import Link, ArchiveResult, ArchiveOutput
from .index import ( from core.index import (
write_link_index, write_link_index,
patch_links_index, patch_links_index,
load_json_link_index, load_json_link_index,
) )
from .config import ( from core.config import (
CURL_BINARY, CURL_BINARY,
GIT_BINARY, GIT_BINARY,
WGET_BINARY, WGET_BINARY,
@ -40,7 +40,7 @@ from .config import (
YOUTUBEDL_VERSION, YOUTUBEDL_VERSION,
WGET_AUTO_COMPRESSION, WGET_AUTO_COMPRESSION,
) )
from .util import ( from core.util import (
enforce_types, enforce_types,
domain, domain,
extension, extension,
@ -54,7 +54,7 @@ from .util import (
chrome_args, chrome_args,
run, PIPE, DEVNULL, run, PIPE, DEVNULL,
) )
from .logs import ( from core.logs import (
log_link_archiving_started, log_link_archiving_started,
log_link_archiving_finished, log_link_archiving_finished,
log_archive_method_started, log_archive_method_started,

View file

@ -5,8 +5,8 @@ from datetime import datetime
from string import Template from string import Template
from typing import List, Tuple, Iterator, Optional, Mapping from typing import List, Tuple, Iterator, Optional, Mapping
from .schema import Link, ArchiveResult from core.schema import Link, ArchiveResult
from .config import ( from core.config import (
OUTPUT_DIR, OUTPUT_DIR,
TEMPLATES_DIR, TEMPLATES_DIR,
VERSION, VERSION,
@ -14,7 +14,8 @@ from .config import (
FOOTER_INFO, FOOTER_INFO,
TIMEOUT, TIMEOUT,
) )
from .util import ( from core.util import (
ts_to_date,
merge_links, merge_links,
urlencode, urlencode,
htmlencode, htmlencode,
@ -26,9 +27,9 @@ from .util import (
copy_and_overwrite, copy_and_overwrite,
atomic_write, atomic_write,
) )
from .parse import parse_links from core.parse import parse_links
from .links import validate_links from core.links import validate_links
from .logs import ( from core.logs import (
log_indexing_process_started, log_indexing_process_started,
log_indexing_started, log_indexing_started,
log_indexing_finished, log_indexing_finished,
@ -284,6 +285,7 @@ def write_html_link_index(link: Link, link_dir: Optional[str]=None) -> None:
'tags': link.tags or 'untagged', 'tags': link.tags or 'untagged',
'status': 'archived' if link.is_archived else 'not yet archived', 'status': 'archived' if link.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger', 'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date(link.oldest_archive_date),
} }
html_index = Template(link_html).substitute(**template_vars) html_index = Template(link_html).substitute(**template_vars)

View file

@ -1,14 +1,14 @@
from typing import Iterable from typing import Iterable
from collections import OrderedDict from collections import OrderedDict
from .schema import Link from core.schema import Link
from .util import ( from core.util import (
scheme, scheme,
fuzzy_url, fuzzy_url,
merge_links, merge_links,
) )
from .config import URL_BLACKLIST_PTN from core.config import URL_BLACKLIST_PTN
def validate_links(links: Iterable[Link]) -> Iterable[Link]: def validate_links(links: Iterable[Link]) -> Iterable[Link]:

View file

@ -5,8 +5,8 @@ from datetime import datetime
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional from typing import Optional
from .schema import Link, ArchiveResult from core.schema import Link, ArchiveResult
from .config import ANSI, OUTPUT_DIR from core.config import ANSI, OUTPUT_DIR
@dataclass @dataclass

View file

@ -0,0 +1,10 @@
from django.core.management.base import BaseCommand
from core.archive import main
class Command(BaseCommand):
help = 'ArchiveBox test.bee'
def handle(self, *args, **kwargs):
main()

View file

View file

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View file

@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable
from datetime import datetime from datetime import datetime
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .config import TIMEOUT from core.config import TIMEOUT
from .util import ( from core.util import (
htmldecode, htmldecode,
str_between, str_between,
URL_REGEX, URL_REGEX,

View file

@ -6,8 +6,8 @@ from os.path import exists, join
from shutil import rmtree from shutil import rmtree
from typing import List from typing import List
from .config import ARCHIVE_DIR, OUTPUT_DIR from core.config import ARCHIVE_DIR, OUTPUT_DIR
from .index import parse_json_links_index, write_html_links_index, write_json_links_index from core.index import parse_json_links_index, write_html_links_index, write_json_links_index
def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:

View file

@ -221,28 +221,20 @@ class Link:
return ts_to_date(self.updated) if self.updated else None return ts_to_date(self.updated) if self.updated else None
@property @property
def oldest_archive_date(self) -> Optional[datetime]: def archive_dates(self) -> List[datetime]:
from .util import ts_to_date return [
result.start_ts
most_recent = min(
(ts_to_date(result.start_ts)
for method in self.history.keys() for method in self.history.keys()
for result in self.history[method]), for result in self.history[method]
default=None, ]
)
return ts_to_date(most_recent) if most_recent else None @property
def oldest_archive_date(self) -> Optional[datetime]:
return min(self.archive_dates, default=None)
@property @property
def newest_archive_date(self) -> Optional[datetime]: def newest_archive_date(self) -> Optional[datetime]:
from .util import ts_to_date return max(self.archive_dates, default=None)
most_recent = max(
(ts_to_date(result.start_ts)
for method in self.history.keys()
for result in self.history[method]),
default=None,
)
return ts_to_date(most_recent) if most_recent else None
### Archive Status Helpers ### Archive Status Helpers
@property @property

3
archivebox/core/tests.py Normal file
View file

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View file

@ -26,8 +26,8 @@ from subprocess import (
from base32_crockford import encode as base32_encode # type: ignore from base32_crockford import encode as base32_encode # type: ignore
from .schema import Link from core.schema import Link
from .config import ( from core.config import (
ANSI, ANSI,
TERM_WIDTH, TERM_WIDTH,
SOURCES_DIR, SOURCES_DIR,
@ -40,7 +40,7 @@ from .config import (
CHROME_OPTIONS, CHROME_OPTIONS,
PYTHON_DIR, PYTHON_DIR,
) )
from .logs import pretty_path from core.logs import pretty_path
### Parsing Helpers ### Parsing Helpers
@ -62,17 +62,17 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links
without_www = lambda url: url.replace('://www.', '://', 1) without_www = lambda url: url.replace('://www.', '://', 1)
without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?') without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?')
fuzzy_url = lambda url: without_trailing_slash(without_www(without_scheme(url.lower()))) fuzzy_url = lambda url: without_trailing_slash(without_www(without_scheme(url.lower())))
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
short_ts = lambda ts: str(parse_date(ts).timestamp()).split('.')[0]
ts_to_date = lambda ts: parse_date(ts).strftime('%Y-%m-%d %H:%M')
ts_to_iso = lambda ts: parse_date(ts).isoformat()
urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace') urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace')
urldecode = lambda s: s and unquote(s) urldecode = lambda s: s and unquote(s)
htmlencode = lambda s: s and escape(s, quote=True) htmlencode = lambda s: s and escape(s, quote=True)
htmldecode = lambda s: s and unescape(s) htmldecode = lambda s: s and unescape(s)
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20] short_ts = lambda ts: str(parse_date(ts).timestamp()).split('.')[0]
ts_to_date = lambda ts: ts and parse_date(ts).strftime('%Y-%m-%d %H:%M')
ts_to_iso = lambda ts: ts and parse_date(ts).isoformat()
URL_REGEX = re.compile( URL_REGEX = re.compile(
r'http[s]?://' # start matching from allowed schemes r'http[s]?://' # start matching from allowed schemes
@ -357,12 +357,12 @@ def str_between(string: str, start: str, end: str=None) -> str:
def parse_date(date: Any) -> Optional[datetime]: def parse_date(date: Any) -> Optional[datetime]:
"""Parse unix timestamps, iso format, and human-readable strings""" """Parse unix timestamps, iso format, and human-readable strings"""
if isinstance(date, datetime):
return date
if date is None: if date is None:
return None return None
if isinstance(date, datetime):
return date
if isinstance(date, (float, int)): if isinstance(date, (float, int)):
date = str(date) date = str(date)

3
archivebox/core/views.py Normal file
View file

@ -0,0 +1,3 @@
from django.shortcuts import render
# Create your views here.

15
archivebox/manage.py Executable file
View file

@ -0,0 +1,15 @@
#!/usr/bin/env python
import os
import sys
if __name__ == '__main__':
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)

View file

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 158 B

After

Width:  |  Height:  |  Size: 158 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 201 B

After

Width:  |  Height:  |  Size: 201 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 157 B

After

Width:  |  Height:  |  Size: 157 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

Before After
Before After

View file

@ -1,3 +1,4 @@
django
base32-crockford base32-crockford
setuptools setuptools