move almost all config into new archivebox.CONSTANTS

This commit is contained in:
Nick Sweeting 2024-09-25 05:10:09 -07:00
parent f5e8d99fdf
commit bb65b2dbec
No known key found for this signature in database
32 changed files with 982 additions and 840 deletions
archivebox/index

View file

@ -8,38 +8,36 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
import archivebox
from .schema import Link
from ..system import atomic_write
from ..util import enforce_types
from ..config import (
VERSION,
OUTPUT_DIR,
FOOTER_INFO,
DEPENDENCIES,
JSON_INDEX_FILENAME,
ARCHIVE_DIR_NAME,
ANSI
)
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
'schema': 'archivebox.index.json',
'copyright_info': FOOTER_INFO,
'meta': {
'project': 'ArchiveBox',
'version': VERSION,
'git_sha': VERSION, # not used anymore, but kept for backwards compatibility
'website': 'https://ArchiveBox.io',
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
'dependencies': DEPENDENCIES,
},
}
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
from django.conf import settings
from plugins_sys.config.apps import SERVER_CONFIG
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
'schema': 'archivebox.index.json',
'copyright_info': SERVER_CONFIG.FOOTER_INFO,
'meta': {
'project': 'ArchiveBox',
'version': archivebox.VERSION,
'git_sha': archivebox.VERSION, # not used anymore, but kept for backwards compatibility
'website': 'https://ArchiveBox.io',
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
'dependencies': settings.BINARIES.to_dict(),
},
}
if with_headers:
output = {
**MAIN_INDEX_HEADER,
@ -54,10 +52,12 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
@enforce_types
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
"""parse an archive index json file and return the list of links"""
index_path = Path(out_dir) / JSON_INDEX_FILENAME
from plugins_sys.config.constants import CONSTANTS
index_path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
try:
@ -77,14 +77,14 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
yield Link.from_json(link_json)
except KeyError:
try:
detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
detail_index_path = CONSTANTS.ARCHIVE_DIR / link_json['timestamp']
yield parse_json_link_details(str(detail_index_path))
except KeyError:
# as a last effort, try to guess the missing values out of existing ones
try:
yield Link.from_json(link_json, guess=True)
except KeyError:
print(" {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
# print(" {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
continue
return ()
@ -94,15 +94,19 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
"""write a json file with some info about the link"""
from plugins_sys.config.constants import CONSTANTS
out_dir = out_dir or link.link_dir
path = Path(out_dir) / JSON_INDEX_FILENAME
path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
atomic_write(str(path), link._asdict(extended=True))
@enforce_types
def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=False) -> Optional[Link]:
def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Optional[Link]:
"""load the json link index from a given directory"""
existing_index = Path(out_dir) / JSON_INDEX_FILENAME
from plugins_sys.config.constants import CONSTANTS
existing_index = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if existing_index.exists():
with open(existing_index, 'r', encoding='utf-8') as f:
try:
@ -117,7 +121,9 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=Fal
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
"""read through all the archive data folders and return the parsed links"""
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
from plugins_sys.config.constants import CONSTANTS
for entry in os.scandir(CONSTANTS.ARCHIVE_DIR):
if entry.is_dir(follow_symlinks=True):
if (Path(entry.path) / 'index.json').exists():
try: