mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
swap more direct settings.CONFIG access to abx getters
This commit is contained in:
parent
b61f6ff8d8
commit
4b6f08b0fe
3 changed files with 26 additions and 18 deletions
|
@ -8,6 +8,8 @@ from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List, Optional, Iterator, Any, Union
|
from typing import List, Optional, Iterator, Any, Union
|
||||||
|
|
||||||
|
import abx.archivebox.reads
|
||||||
|
|
||||||
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
|
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
|
||||||
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
|
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
|
||||||
|
|
||||||
|
@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
MAIN_INDEX_HEADER = {
|
MAIN_INDEX_HEADER = {
|
||||||
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
||||||
'schema': 'archivebox.index.json',
|
'schema': 'archivebox.index.json',
|
||||||
|
@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
||||||
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
||||||
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
||||||
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
||||||
'dependencies': settings.BINARIES.to_dict(),
|
'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if with_headers:
|
if with_headers:
|
||||||
output = {
|
output = {
|
||||||
**MAIN_INDEX_HEADER,
|
**MAIN_INDEX_HEADER,
|
||||||
|
|
|
@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
||||||
from rich import print
|
from rich import print
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from archivebox import CONSTANTS
|
|
||||||
|
import abx.archivebox.reads
|
||||||
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
from archivebox.config.paths import get_or_create_working_lib_dir
|
from archivebox.config.paths import get_or_create_working_lib_dir
|
||||||
|
|
||||||
|
@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
||||||
|
|
||||||
package_manager_names = ', '.join(
|
package_manager_names = ', '.join(
|
||||||
f'[yellow]{binprovider.name}[/yellow]'
|
f'[yellow]{binprovider.name}[/yellow]'
|
||||||
for binprovider in reversed(list(settings.BINPROVIDERS.values()))
|
for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
|
||||||
if not binproviders or (binproviders and binprovider.name in binproviders)
|
if not binproviders or (binproviders and binprovider.name in binproviders)
|
||||||
)
|
)
|
||||||
print(f'[+] Setting up package managers {package_manager_names}...')
|
print(f'[+] Setting up package managers {package_manager_names}...')
|
||||||
for binprovider in reversed(list(settings.BINPROVIDERS.values())):
|
for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
|
||||||
if binproviders and binprovider.name not in binproviders:
|
if binproviders and binprovider.name not in binproviders:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
|
@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
for binary in reversed(list(settings.BINARIES.values())):
|
for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
|
||||||
if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
|
if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
|
||||||
# obviously must already be installed if we are running
|
# obviously must already be installed if we are running
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -5,7 +5,7 @@ import requests
|
||||||
import json as pyjson
|
import json as pyjson
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
|
|
||||||
from typing import List, Optional, Any
|
from typing import List, Optional, Any, Callable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from inspect import signature
|
from inspect import signature
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
|
||||||
from base32_crockford import encode as base32_encode # type: ignore
|
from base32_crockford import encode as base32_encode # type: ignore
|
||||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||||
try:
|
try:
|
||||||
import chardet
|
import chardet # type:ignore
|
||||||
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
detect_encoding = lambda rawdata: "utf-8"
|
detect_encoding = lambda rawdata: "utf-8"
|
||||||
|
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config.constants import CONSTANTS
|
||||||
from archivebox.config.common import ARCHIVING_CONFIG
|
|
||||||
|
|
||||||
from .logging import COLOR_DICT
|
from .logging import COLOR_DICT
|
||||||
|
|
||||||
|
@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def parse_date(date: Any) -> Optional[datetime]:
|
def parse_date(date: Any) -> datetime:
|
||||||
"""Parse unix timestamps, iso format, and human-readable strings"""
|
"""Parse unix timestamps, iso format, and human-readable strings"""
|
||||||
|
|
||||||
if date is None:
|
if date is None:
|
||||||
return None
|
return None # type: ignore
|
||||||
|
|
||||||
if isinstance(date, datetime):
|
if isinstance(date, datetime):
|
||||||
if date.tzinfo is None:
|
if date.tzinfo is None:
|
||||||
|
@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
|
||||||
def download_url(url: str, timeout: int=None) -> str:
|
def download_url(url: str, timeout: int=None) -> str:
|
||||||
"""Download the contents of a remote url and return the text"""
|
"""Download the contents of a remote url and return the text"""
|
||||||
|
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
|
@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
|
||||||
return url.rsplit('/', 1)[-1]
|
return url.rsplit('/', 1)[-1]
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def get_headers(url: str, timeout: int=None) -> str:
|
def get_headers(url: str, timeout: int | None=None) -> str:
|
||||||
"""Download the contents of a remote url and return the headers"""
|
"""Download the contents of a remote url and return the headers"""
|
||||||
|
# TODO: get rid of this and use an abx pluggy hook instead
|
||||||
|
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str:
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def dedupe(options: List[str]) -> List[str]:
|
def dedupe(options: List[str]) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Deduplicates the given options. Options that come later clobber earlier
|
Deduplicates the given CLI args by key=value. Options that come later override earlier.
|
||||||
conflicting options.
|
|
||||||
"""
|
"""
|
||||||
deduped = {}
|
deduped = {}
|
||||||
|
|
||||||
for option in options:
|
for option in options:
|
||||||
deduped[option.split('=')[0]] = option
|
key = option.split('=')[0]
|
||||||
|
deduped[key] = option
|
||||||
|
|
||||||
return list(deduped.values())
|
return list(deduped.values())
|
||||||
|
|
||||||
|
@ -347,6 +352,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
|
||||||
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
|
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
|
||||||
return tuple(obj)
|
return tuple(obj)
|
||||||
|
|
||||||
|
elif isinstance(obj, Callable):
|
||||||
|
return str(obj)
|
||||||
|
|
||||||
return pyjson.JSONEncoder.default(self, obj)
|
return pyjson.JSONEncoder.default(self, obj)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue