mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-23 03:06:55 -04:00
Merge branch 'master' into tags
This commit is contained in:
commit
a850b4a9d9
31 changed files with 651 additions and 396 deletions
|
@ -575,7 +575,7 @@ def is_archived(link: Link) -> bool:
|
|||
return is_valid(link) and link.is_archived
|
||||
|
||||
def is_unarchived(link: Link) -> bool:
|
||||
if not os.path.exists(link.link_dir):
|
||||
if not Path(link.link_dir).exists():
|
||||
return True
|
||||
return not link.is_archived
|
||||
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
__package__ = 'archivebox.index'
|
||||
|
||||
import os
|
||||
|
||||
from string import Template
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Iterator, Mapping
|
||||
|
@ -30,11 +28,10 @@ from ..config import (
|
|||
FAVICON_FILENAME,
|
||||
)
|
||||
|
||||
join = lambda *paths: os.path.join(*paths)
|
||||
MAIN_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index.html')
|
||||
MINIMAL_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index_minimal.html')
|
||||
MAIN_INDEX_ROW_TEMPLATE = join(TEMPLATES_DIR, 'main_index_row.html')
|
||||
LINK_DETAILS_TEMPLATE = join(TEMPLATES_DIR, 'link_details.html')
|
||||
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
||||
MINIMAL_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_minimal.html')
|
||||
MAIN_INDEX_ROW_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_row.html')
|
||||
LINK_DETAILS_TEMPLATE = str(Path(TEMPLATES_DIR) / 'link_details.html')
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
|
||||
|
||||
|
@ -44,8 +41,8 @@ TITLE_LOADING_MSG = 'Not yet archived...'
|
|||
def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
|
||||
"""parse an archive index html file and return the list of urls"""
|
||||
|
||||
index_path = join(out_dir, HTML_INDEX_FILENAME)
|
||||
if os.path.exists(index_path):
|
||||
index_path = Path(out_dir) / HTML_INDEX_FILENAME
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if 'class="link-url"' in line:
|
||||
|
@ -56,12 +53,12 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
|
|||
def write_html_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
|
||||
"""write the html link index to a given path"""
|
||||
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, ROBOTS_TXT_FILENAME), join(out_dir, ROBOTS_TXT_FILENAME))
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, STATIC_DIR_NAME), join(out_dir, STATIC_DIR_NAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / FAVICON_FILENAME), str(out_dir / FAVICON_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / ROBOTS_TXT_FILENAME), str(out_dir / ROBOTS_TXT_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / STATIC_DIR_NAME), str(out_dir / STATIC_DIR_NAME))
|
||||
|
||||
rendered_html = main_index_template(links, finished=finished)
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
atomic_write(str(out_dir / HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -100,7 +97,7 @@ def main_index_row_template(link: Link) -> str:
|
|||
|
||||
# before pages are finished archiving, show fallback loading favicon
|
||||
'favicon_url': (
|
||||
join(ARCHIVE_DIR_NAME, link.timestamp, 'favicon.ico')
|
||||
str(Path(ARCHIVE_DIR_NAME) / link.timestamp / 'favicon.ico')
|
||||
# if link['is_archived'] else ''
|
||||
),
|
||||
|
||||
|
@ -119,7 +116,7 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
|||
out_dir = out_dir or link.link_dir
|
||||
|
||||
rendered_html = link_details_template(link)
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
atomic_write(str(Path(out_dir) / HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -45,8 +45,8 @@ MAIN_INDEX_HEADER = {
|
|||
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
"""parse an archive index json file and return the list of links"""
|
||||
|
||||
index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
if os.path.exists(index_path):
|
||||
index_path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
links = pyjson.load(f)['links']
|
||||
for link_json in links:
|
||||
|
@ -86,7 +86,7 @@ def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
|||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
atomic_write(os.path.join(out_dir, JSON_INDEX_FILENAME), main_index_json)
|
||||
atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
|
||||
|
||||
|
||||
### Link Details Index
|
||||
|
@ -96,15 +96,15 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
|||
"""write a json file with some info about the link"""
|
||||
|
||||
out_dir = out_dir or link.link_dir
|
||||
path = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
atomic_write(path, link._asdict(extended=True))
|
||||
path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
atomic_write(str(path), link._asdict(extended=True))
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=False) -> Optional[Link]:
|
||||
"""load the json link index from a given directory"""
|
||||
existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
if os.path.exists(existing_index):
|
||||
existing_index = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
if existing_index.exists():
|
||||
with open(existing_index, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
link_json = pyjson.load(f)
|
||||
|
@ -118,9 +118,9 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=Fal
|
|||
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
|
||||
"""read through all the archive data folders and return the parsed links"""
|
||||
|
||||
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
|
||||
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
|
||||
if entry.is_dir(follow_symlinks=True):
|
||||
if os.path.exists(os.path.join(entry.path, 'index.json')):
|
||||
if (Path(entry.path) / 'index.json').exists():
|
||||
try:
|
||||
link = parse_json_link_details(entry.path)
|
||||
except KeyError:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
__package__ = 'archivebox.index'
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -250,7 +249,7 @@ class Link:
|
|||
@property
|
||||
def link_dir(self) -> str:
|
||||
from ..config import CONFIG
|
||||
return os.path.join(CONFIG['ARCHIVE_DIR'], self.timestamp)
|
||||
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
|
||||
|
||||
@property
|
||||
def archive_path(self) -> str:
|
||||
|
@ -369,7 +368,7 @@ class Link:
|
|||
)
|
||||
|
||||
return any(
|
||||
os.path.exists(os.path.join(ARCHIVE_DIR, self.timestamp, path))
|
||||
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
|
||||
for path in output_paths
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue