mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-06-01 07:18:27 -04:00
Merge branch 'dev' into search_index_extract_html_text
This commit is contained in:
commit
a680724367
29 changed files with 3230 additions and 1654 deletions
|
@ -90,8 +90,13 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
|
||||
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
||||
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
||||
'URL_WHITELIST': {'type': str, 'default': None},
|
||||
|
||||
'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages
|
||||
'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
|
||||
|
||||
'ADMIN_USERNAME': {'type': str, 'default': None},
|
||||
'ADMIN_PASSWORD': {'type': str, 'default': None},
|
||||
|
||||
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
|
||||
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
|
||||
},
|
||||
|
@ -143,6 +148,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
||||
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
||||
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
||||
'SAVE_ALLOWLIST': {'type': dict, 'default': {},},
|
||||
'SAVE_DENYLIST': {'type': dict, 'default': {},},
|
||||
},
|
||||
|
||||
'ARCHIVE_METHOD_OPTIONS': {
|
||||
|
@ -231,12 +238,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
|
||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
||||
'WGET_BINARY': {'type': str, 'default': 'wget'}, # also can accept wget2
|
||||
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
|
||||
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
|
||||
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')},
|
||||
#'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
|
||||
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'},
|
||||
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('postlight-parser')},
|
||||
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
|
||||
'NODE_BINARY': {'type': str, 'default': 'node'},
|
||||
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
|
||||
'CHROME_BINARY': {'type': str, 'default': None},
|
||||
|
@ -374,6 +380,8 @@ def get_commit_hash(config):
|
|||
############################## Derived Config ##################################
|
||||
|
||||
|
||||
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||
|
||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
||||
'USER': {'default': lambda c: SYSTEM_USER},
|
||||
|
@ -390,8 +398,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
|
||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||
'URL_WHITELIST_PTN': {'default': lambda c: c['URL_WHITELIST'] and re.compile(c['URL_WHITELIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
|
||||
|
||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
||||
|
@ -435,7 +443,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
|
||||
|
||||
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
|
||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
|
||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
|
||||
|
||||
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
||||
|
@ -465,10 +473,11 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)},
|
||||
'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
||||
'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)},
|
||||
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
|
||||
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
|
||||
}
|
||||
|
||||
|
||||
|
||||
################################### Helpers ####################################
|
||||
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ class ConfigDict(BaseConfig, total=False):
|
|||
MEDIA_TIMEOUT: int
|
||||
OUTPUT_PERMISSIONS: str
|
||||
RESTRICT_FILE_NAMES: str
|
||||
URL_BLACKLIST: str
|
||||
URL_DENYLIST: str
|
||||
|
||||
SECRET_KEY: Optional[str]
|
||||
BIND_ADDR: str
|
||||
|
|
|
@ -41,7 +41,7 @@ class AddLinkForm(forms.Form):
|
|||
# label="Exclude patterns",
|
||||
# min_length='1',
|
||||
# required=False,
|
||||
# initial=URL_BLACKLIST,
|
||||
# initial=URL_DENYLIST,
|
||||
# )
|
||||
# timeout = forms.IntegerField(
|
||||
# initial=TIMEOUT,
|
||||
|
|
|
@ -6,9 +6,6 @@ import re
|
|||
import logging
|
||||
import tempfile
|
||||
|
||||
import ldap
|
||||
from django_auth_ldap.config import LDAPSearch
|
||||
|
||||
from pathlib import Path
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
|
@ -97,33 +94,43 @@ AUTHENTICATION_BACKENDS = [
|
|||
]
|
||||
|
||||
if LDAP:
|
||||
global AUTH_LDAP_SERVER_URI
|
||||
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
|
||||
try:
|
||||
import ldap
|
||||
from django_auth_ldap.config import LDAPSearch
|
||||
|
||||
global AUTH_LDAP_BIND_DN
|
||||
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
|
||||
global AUTH_LDAP_SERVER_URI
|
||||
global AUTH_LDAP_BIND_DN
|
||||
global AUTH_LDAP_BIND_PASSWORD
|
||||
global AUTH_LDAP_USER_SEARCH
|
||||
global AUTH_LDAP_USER_ATTR_MAP
|
||||
|
||||
global AUTH_LDAP_BIND_PASSWORD
|
||||
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
|
||||
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
|
||||
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
|
||||
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
|
||||
|
||||
global AUTH_LDAP_USER_SEARCH
|
||||
AUTH_LDAP_USER_SEARCH = LDAPSearch(
|
||||
LDAP_USER_BASE,
|
||||
ldap.SCOPE_SUBTREE,
|
||||
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
|
||||
)
|
||||
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP
|
||||
AUTH_LDAP_USER_ATTR_MAP = {
|
||||
'username': LDAP_USERNAME_ATTR,
|
||||
'first_name': LDAP_FIRSTNAME_ATTR,
|
||||
'last_name': LDAP_LASTNAME_ATTR,
|
||||
'email': LDAP_EMAIL_ATTR,
|
||||
}
|
||||
AUTH_LDAP_USER_SEARCH = LDAPSearch(
|
||||
LDAP_USER_BASE,
|
||||
ldap.SCOPE_SUBTREE,
|
||||
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
|
||||
)
|
||||
|
||||
AUTH_LDAP_USER_ATTR_MAP = {
|
||||
'username': LDAP_USERNAME_ATTR,
|
||||
'first_name': LDAP_FIRSTNAME_ATTR,
|
||||
'last_name': LDAP_LASTNAME_ATTR,
|
||||
'email': LDAP_EMAIL_ATTR,
|
||||
}
|
||||
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django_auth_ldap.backend.LDAPBackend',
|
||||
]
|
||||
except ModuleNotFoundError:
|
||||
sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
|
||||
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
|
||||
# sys.exit(1)
|
||||
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django_auth_ldap.backend.LDAPBackend',
|
||||
]
|
||||
|
||||
################################################################################
|
||||
### Debug Settings
|
||||
|
|
|
@ -4,12 +4,16 @@ import os
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Optional, List, Iterable, Union
|
||||
from typing import Callable, Optional, List, Iterable, Union
|
||||
from datetime import datetime, timezone
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from ..config import (
|
||||
SAVE_ALLOWLIST_PTN,
|
||||
SAVE_DENYLIST_PTN,
|
||||
)
|
||||
from ..core.settings import ERROR_LOG
|
||||
from ..index.schema import Link
|
||||
from ..index.schema import ArchiveResult, Link
|
||||
from ..index.sql import write_link_to_sql_index
|
||||
from ..index import (
|
||||
load_link_details,
|
||||
|
@ -43,7 +47,11 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
|
|||
from .headers import should_save_headers, save_headers
|
||||
|
||||
|
||||
def get_default_archive_methods():
|
||||
ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
|
||||
SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
|
||||
ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
|
||||
|
||||
def get_default_archive_methods() -> List[ArchiveMethodEntry]:
|
||||
return [
|
||||
('favicon', should_save_favicon, save_favicon),
|
||||
('headers', should_save_headers, save_headers),
|
||||
|
@ -71,12 +79,30 @@ ARCHIVE_METHODS_INDEXING_PRECEDENCE = [
|
|||
('wget', 6)
|
||||
]
|
||||
|
||||
|
||||
@enforce_types
|
||||
def ignore_methods(to_ignore: List[str]):
|
||||
def get_archive_methods_for_link(link: Link) -> Iterable[ArchiveMethodEntry]:
|
||||
DEFAULT_METHODS = get_default_archive_methods()
|
||||
allowed_methods = {
|
||||
m for pat, methods in
|
||||
SAVE_ALLOWLIST_PTN.items()
|
||||
if pat.search(link.url)
|
||||
for m in methods
|
||||
} or { m[0] for m in DEFAULT_METHODS }
|
||||
denied_methods = {
|
||||
m for pat, methods in
|
||||
SAVE_DENYLIST_PTN.items()
|
||||
if pat.search(link.url)
|
||||
for m in methods
|
||||
}
|
||||
allowed_methods -= denied_methods
|
||||
|
||||
return (m for m in DEFAULT_METHODS if m[0] in allowed_methods)
|
||||
|
||||
@enforce_types
|
||||
def ignore_methods(to_ignore: List[str]) -> Iterable[str]:
|
||||
ARCHIVE_METHODS = get_default_archive_methods()
|
||||
methods = filter(lambda x: x[0] not in to_ignore, ARCHIVE_METHODS)
|
||||
methods = map(lambda x: x[0], methods)
|
||||
return list(methods)
|
||||
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore]
|
||||
|
||||
@enforce_types
|
||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
||||
|
@ -89,11 +115,11 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
except Snapshot.DoesNotExist:
|
||||
snapshot = write_link_to_sql_index(link)
|
||||
|
||||
ARCHIVE_METHODS = get_default_archive_methods()
|
||||
active_methods = get_archive_methods_for_link(link)
|
||||
|
||||
if methods:
|
||||
ARCHIVE_METHODS = [
|
||||
method for method in ARCHIVE_METHODS
|
||||
active_methods = [
|
||||
method for method in active_methods
|
||||
if method[0] in methods
|
||||
]
|
||||
|
||||
|
@ -110,7 +136,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||
start_ts = datetime.now(timezone.utc)
|
||||
|
||||
for method_name, should_run, method_function in ARCHIVE_METHODS:
|
||||
for method_name, should_run, method_function in active_methods:
|
||||
try:
|
||||
if method_name not in link.history:
|
||||
link.history[method_name] = []
|
||||
|
|
|
@ -71,7 +71,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
result = run(cmd, cwd=out_dir, timeout=timeout)
|
||||
try:
|
||||
result_json = json.loads(result.stdout)
|
||||
assert result_json and 'content' in result_json
|
||||
assert result_json and 'content' in result_json, 'Readability output is not valid JSON'
|
||||
except json.JSONDecodeError:
|
||||
raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
|
||||
|
||||
|
@ -85,7 +85,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
|
||||
output_tail = [
|
||||
line.strip()
|
||||
for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
|
||||
for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
|
||||
if line.strip()
|
||||
]
|
||||
hints = (
|
||||
|
|
|
@ -22,8 +22,8 @@ from ..config import (
|
|||
JSON_INDEX_FILENAME,
|
||||
OUTPUT_DIR,
|
||||
TIMEOUT,
|
||||
URL_BLACKLIST_PTN,
|
||||
URL_WHITELIST_PTN,
|
||||
URL_DENYLIST_PTN,
|
||||
URL_ALLOWLIST_PTN,
|
||||
stderr,
|
||||
OUTPUT_PERMISSIONS
|
||||
)
|
||||
|
@ -142,9 +142,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
|
|||
continue
|
||||
if scheme(link.url) not in ('http', 'https', 'ftp'):
|
||||
continue
|
||||
if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url):
|
||||
if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url):
|
||||
continue
|
||||
if URL_WHITELIST_PTN and (not URL_WHITELIST_PTN.search(link.url)):
|
||||
if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)):
|
||||
continue
|
||||
|
||||
yield link
|
||||
|
|
|
@ -533,11 +533,27 @@ def log_shell_welcome_msg():
|
|||
### Helpers
|
||||
|
||||
@enforce_types
|
||||
def pretty_path(path: Union[Path, str]) -> str:
|
||||
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=OUTPUT_DIR) -> str:
|
||||
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
||||
pwd = Path('.').resolve()
|
||||
# parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
|
||||
return str(path).replace(str(pwd) + '/', './')
|
||||
pwd = str(Path(pwd)) # .resolve()
|
||||
path = str(path)
|
||||
|
||||
if not path:
|
||||
return path
|
||||
|
||||
# replace long absolute paths with ./ relative ones to save on terminal output width
|
||||
if path.startswith(pwd) and (pwd != '/'):
|
||||
path = path.replace(pwd, '.', 1)
|
||||
|
||||
# quote paths containing spaces
|
||||
if ' ' in path:
|
||||
path = f'"{path}"'
|
||||
|
||||
# if path is just a plain dot, replace it back with the absolute path for clarity
|
||||
if path == '.':
|
||||
path = pwd
|
||||
|
||||
return path
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -578,6 +594,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
|||
else:
|
||||
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
|
||||
|
||||
|
||||
if folder['path']:
|
||||
if Path(folder['path']).exists():
|
||||
num_files = (
|
||||
|
@ -592,13 +609,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
|||
# add symbol @ next to filecount if path is a remote filesystem mount
|
||||
num_files = f'{num_files} @' if num_files else '@'
|
||||
|
||||
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
|
||||
if path and ' ' in path:
|
||||
path = f'"{path}"'
|
||||
|
||||
# if path is just a plain dot, replace it back with the full path for clarity
|
||||
if path == '.':
|
||||
path = str(OUTPUT_DIR)
|
||||
path = pretty_path(folder['path'])
|
||||
|
||||
return ' '.join((
|
||||
ANSI[color],
|
||||
|
@ -629,9 +640,7 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
|
|||
else:
|
||||
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
|
||||
|
||||
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
|
||||
if path and ' ' in path:
|
||||
path = f'"{path}"'
|
||||
path = pretty_path(dependency['path'])
|
||||
|
||||
return ' '.join((
|
||||
ANSI[color],
|
||||
|
|
|
@ -112,6 +112,8 @@ from .config import (
|
|||
load_all_config,
|
||||
CONFIG,
|
||||
USER_CONFIG,
|
||||
ADMIN_USERNAME,
|
||||
ADMIN_PASSWORD,
|
||||
get_real_name,
|
||||
setup_django,
|
||||
)
|
||||
|
@ -216,7 +218,7 @@ def version(quiet: bool=False,
|
|||
if not quiet:
|
||||
# 0.6.3
|
||||
# ArchiveBox v0.6.3 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
|
||||
# DEBUG=False IN_DOCKER=True IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 501:20 SEARCH_BACKEND=ripgrep
|
||||
# DEBUG=False IN_DOCKER=True IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
|
||||
|
||||
p = platform.uname()
|
||||
print(
|
||||
|
@ -236,7 +238,8 @@ def version(quiet: bool=False,
|
|||
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
|
||||
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||
f'FS_PERMS={OUTPUT_PERMISSIONS} {PUID}:{PGID}',
|
||||
f'FS_USER={PUID}:{PGID}',
|
||||
f'FS_PERMS={OUTPUT_PERMISSIONS}',
|
||||
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
|
||||
)
|
||||
print()
|
||||
|
@ -251,19 +254,19 @@ def version(quiet: bool=False,
|
|||
|
||||
print()
|
||||
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
|
||||
for name, folder in CODE_LOCATIONS.items():
|
||||
print(printable_folder_status(name, folder))
|
||||
for name, path in CODE_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
|
||||
print()
|
||||
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
|
||||
for name, folder in EXTERNAL_LOCATIONS.items():
|
||||
print(printable_folder_status(name, folder))
|
||||
for name, path in EXTERNAL_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
|
||||
print()
|
||||
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
|
||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||
for name, folder in DATA_LOCATIONS.items():
|
||||
print(printable_folder_status(name, folder))
|
||||
for name, path in DATA_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
else:
|
||||
print()
|
||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||
|
@ -419,14 +422,16 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
|
|||
write_main_index(list(pending_links.values()), out_dir=out_dir)
|
||||
|
||||
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if (ADMIN_USERNAME and ADMIN_PASSWORD) and not User.objects.filter(username=ADMIN_USERNAME).exists():
|
||||
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
|
||||
User.objects.create_superuser(username=ADMIN_USERNAME, password=ADMIN_PASSWORD)
|
||||
|
||||
if existing_index:
|
||||
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
|
||||
else:
|
||||
# TODO: allow creating new supersuer via env vars on first init
|
||||
# if config.HTTP_USER and config.HTTP_PASS:
|
||||
# from django.contrib.auth.models import User
|
||||
# User.objects.create_superuser(HTTP_USER, '', HTTP_PASS)
|
||||
|
||||
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
|
||||
|
||||
json_index = out_dir / JSON_INDEX_FILENAME
|
||||
|
|
|
@ -1,62 +1,3 @@
|
|||
{% extends "base.html" %}
|
||||
{% load static %}
|
||||
|
||||
{% block body %}
|
||||
<div id="toolbar">
|
||||
<form id="changelist-search" action="{% url 'public-index' %}" method="get">
|
||||
<div>
|
||||
<label for="searchbar"><img src="/static/admin/img/search.svg" alt="Search"></label>
|
||||
<input type="text" size="40" name="q" value="" id="searchbar" autofocus placeholder="Title, URL, tags, timestamp, or content...".>
|
||||
<input type="submit" value="Search" style="height: 36px; padding-top: 6px; margin: 8px"/>
|
||||
<input type="button"
|
||||
value="♺"
|
||||
title="Refresh..."
|
||||
onclick="location.href='{% url 'public-index' %}'"
|
||||
style="background-color: rgba(121, 174, 200, 0.8); height: 30px; font-size: 0.8em; margin-top: 12px; padding-top: 6px; float:right">
|
||||
</input>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width: 100px;">Bookmarked</th>
|
||||
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
|
||||
<th style="width: 140px">Files</th>
|
||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for link in object_list %}
|
||||
{% include 'main_index_row.html' with link=link %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
<center>
|
||||
<span class="step-links">
|
||||
{% if page_obj.has_previous %}
|
||||
<a href="{% url 'public-index' %}?page=1">« first</a>
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.previous_page_number }}">previous</a>
|
||||
{% endif %}
|
||||
|
||||
<span class="current">
|
||||
Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}.
|
||||
</span>
|
||||
|
||||
{% if page_obj.has_next %}
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
|
||||
{% if page_obj.has_next %}
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
<br>
|
||||
</center>
|
||||
{% endblock %}
|
||||
{% extends "admin/base_site.html" %}
|
||||
{% load i18n admin_urls static admin_list %}
|
||||
{% load core_tags %}
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
<br/>
|
||||
<div class="loader"></div>
|
||||
<br/>
|
||||
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...
|
||||
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
|
||||
</center>
|
||||
</div>
|
||||
<form id="add-form" method="POST" class="p-form">{% csrf_token %}
|
||||
|
@ -46,19 +46,22 @@
|
|||
</form>
|
||||
<br/><br/><br/>
|
||||
<center id="delay-warning" style="display: none">
|
||||
<small>(it's safe to leave this page, adding will continue in the background)</small>
|
||||
<small>(you will be redirected to your <a href="/">Snapshot list</a> momentarily, its safe to close this page at any time)</small>
|
||||
</center>
|
||||
{% if absolute_add_path %}
|
||||
<center id="bookmarklet">
|
||||
<!-- <center id="bookmarklet">
|
||||
<p>Bookmark this link to quickly add to your archive:
|
||||
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
|
||||
</center>
|
||||
</center> -->
|
||||
{% endif %}
|
||||
<script>
|
||||
document.getElementById('add-form').addEventListener('submit', function(event) {
|
||||
document.getElementById('in-progress').style.display = 'block'
|
||||
document.getElementById('add-form').style.display = 'none'
|
||||
document.getElementById('delay-warning').style.display = 'block'
|
||||
setTimeout(function() {
|
||||
window.location = '/'
|
||||
}, 2000)
|
||||
return true
|
||||
})
|
||||
</script>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue