mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-30 14:35:20 -04:00
add timezone support, tons of CSS and layout improvements, more detailed snapshot admin form info, ability to sort by recently updated, better grid view styling, better table layouts, better dark mode support
This commit is contained in:
parent
cf7d7e4990
commit
a9986f1f05
28 changed files with 681 additions and 549 deletions
|
@ -11,7 +11,7 @@ import re
|
|||
from io import StringIO
|
||||
|
||||
from typing import IO, Tuple, List, Optional
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ..system import atomic_write
|
||||
|
@ -147,7 +147,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
|
|||
|
||||
@enforce_types
|
||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
||||
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
||||
atomic_write(source_path, raw_text)
|
||||
log_source_saved(source_file=source_path)
|
||||
|
@ -157,7 +157,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
|
|||
@enforce_types
|
||||
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
|
||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
||||
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
|
||||
|
||||
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
|
|
|
@ -4,7 +4,7 @@ __package__ = 'archivebox.parsers'
|
|||
import re
|
||||
|
||||
from typing import IO, Iterable, Optional
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
|
@ -46,7 +46,7 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
|
|||
for archivable_url in re.findall(URL_REGEX, url):
|
||||
yield Link(
|
||||
url=htmldecode(archivable_url),
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[html_file.name],
|
||||
|
|
|
@ -3,7 +3,7 @@ __package__ = 'archivebox.parsers'
|
|||
import json
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
|
@ -30,7 +30,7 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
|
||||
|
||||
# Parse the timestamp
|
||||
ts_str = str(datetime.now().timestamp())
|
||||
ts_str = str(datetime.now(timezone.utc).timestamp())
|
||||
if link.get('timestamp'):
|
||||
# chrome/ff histories use a very precise timestamp
|
||||
ts_str = str(link['timestamp'] / 10000000)
|
||||
|
|
|
@ -4,7 +4,7 @@ __description__ = 'Plain Text'
|
|||
import re
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ..index.schema import Link
|
||||
|
@ -29,7 +29,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
if Path(line).exists():
|
||||
yield Link(
|
||||
url=line,
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
|
@ -42,7 +42,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
for url in re.findall(URL_REGEX, line):
|
||||
yield Link(
|
||||
url=htmldecode(url),
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
|
@ -54,7 +54,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
for sub_url in re.findall(URL_REGEX, line[1:]):
|
||||
yield Link(
|
||||
url=htmldecode(sub_url),
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
|
|
|
@ -2,7 +2,7 @@ __package__ = 'archivebox.parsers'
|
|||
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from xml.etree import ElementTree
|
||||
|
||||
|
@ -36,7 +36,7 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
if ts_str:
|
||||
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
|
||||
else:
|
||||
time = datetime.now()
|
||||
time = datetime.now(timezone.utc)
|
||||
|
||||
yield Link(
|
||||
url=htmldecode(url),
|
||||
|
|
|
@ -4,7 +4,7 @@ __description__ = 'URL list'
|
|||
import re
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
|
@ -25,7 +25,7 @@ def parse_url_list(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
|
||||
yield Link(
|
||||
url=url,
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue