mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-06-01 23:38:29 -04:00
move almost all config into new archivebox.CONSTANTS
This commit is contained in:
parent
f5e8d99fdf
commit
bb65b2dbec
32 changed files with 982 additions and 840 deletions
archivebox/index
|
@ -8,38 +8,36 @@ from pathlib import Path
|
|||
from datetime import datetime, timezone
|
||||
from typing import List, Optional, Iterator, Any, Union
|
||||
|
||||
import archivebox
|
||||
|
||||
from .schema import Link
|
||||
from ..system import atomic_write
|
||||
from ..util import enforce_types
|
||||
from ..config import (
|
||||
VERSION,
|
||||
OUTPUT_DIR,
|
||||
FOOTER_INFO,
|
||||
DEPENDENCIES,
|
||||
JSON_INDEX_FILENAME,
|
||||
ARCHIVE_DIR_NAME,
|
||||
ANSI
|
||||
)
|
||||
|
||||
|
||||
MAIN_INDEX_HEADER = {
|
||||
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
||||
'schema': 'archivebox.index.json',
|
||||
'copyright_info': FOOTER_INFO,
|
||||
'meta': {
|
||||
'project': 'ArchiveBox',
|
||||
'version': VERSION,
|
||||
'git_sha': VERSION, # not used anymore, but kept for backwards compatibility
|
||||
'website': 'https://ArchiveBox.io',
|
||||
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
||||
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
||||
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
||||
'dependencies': DEPENDENCIES,
|
||||
},
|
||||
}
|
||||
|
||||
@enforce_types
|
||||
def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
||||
from django.conf import settings
|
||||
from plugins_sys.config.apps import SERVER_CONFIG
|
||||
|
||||
MAIN_INDEX_HEADER = {
|
||||
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
||||
'schema': 'archivebox.index.json',
|
||||
'copyright_info': SERVER_CONFIG.FOOTER_INFO,
|
||||
'meta': {
|
||||
'project': 'ArchiveBox',
|
||||
'version': archivebox.VERSION,
|
||||
'git_sha': archivebox.VERSION, # not used anymore, but kept for backwards compatibility
|
||||
'website': 'https://ArchiveBox.io',
|
||||
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
||||
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
||||
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
||||
'dependencies': settings.BINARIES.to_dict(),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if with_headers:
|
||||
output = {
|
||||
**MAIN_INDEX_HEADER,
|
||||
|
@ -54,10 +52,12 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
|||
|
||||
|
||||
@enforce_types
|
||||
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
|
||||
"""parse an archive index json file and return the list of links"""
|
||||
|
||||
index_path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
from plugins_sys.config.constants import CONSTANTS
|
||||
|
||||
index_path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
|
@ -77,14 +77,14 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
|||
yield Link.from_json(link_json)
|
||||
except KeyError:
|
||||
try:
|
||||
detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
|
||||
detail_index_path = CONSTANTS.ARCHIVE_DIR / link_json['timestamp']
|
||||
yield parse_json_link_details(str(detail_index_path))
|
||||
except KeyError:
|
||||
# as a last effort, try to guess the missing values out of existing ones
|
||||
try:
|
||||
yield Link.from_json(link_json, guess=True)
|
||||
except KeyError:
|
||||
print(" {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
|
||||
# print(" {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
|
||||
continue
|
||||
return ()
|
||||
|
||||
|
@ -94,15 +94,19 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
|||
def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
||||
"""write a json file with some info about the link"""
|
||||
|
||||
from plugins_sys.config.constants import CONSTANTS
|
||||
|
||||
out_dir = out_dir or link.link_dir
|
||||
path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
|
||||
atomic_write(str(path), link._asdict(extended=True))
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=False) -> Optional[Link]:
|
||||
def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Optional[Link]:
|
||||
"""load the json link index from a given directory"""
|
||||
existing_index = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
from plugins_sys.config.constants import CONSTANTS
|
||||
|
||||
existing_index = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
|
||||
if existing_index.exists():
|
||||
with open(existing_index, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
|
@ -117,7 +121,9 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=Fal
|
|||
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
|
||||
"""read through all the archive data folders and return the parsed links"""
|
||||
|
||||
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
|
||||
from plugins_sys.config.constants import CONSTANTS
|
||||
|
||||
for entry in os.scandir(CONSTANTS.ARCHIVE_DIR):
|
||||
if entry.is_dir(follow_symlinks=True):
|
||||
if (Path(entry.path) / 'index.json').exists():
|
||||
try:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue