add timezone support, tons of CSS and layout improvements, more detailed snapshot admin form info, ability to sort by recently updated, better grid view styling, better table layouts, better dark mode support

This commit is contained in:
Nick Sweeting 2021-04-10 04:19:30 -04:00
parent cf7d7e4990
commit a9986f1f05
28 changed files with 681 additions and 549 deletions

View file

@ -11,7 +11,7 @@ import re
from io import StringIO
from typing import IO, Tuple, List, Optional
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from ..system import atomic_write
@ -147,7 +147,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
@enforce_types
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
ts = str(datetime.now().timestamp()).split('.', 1)[0]
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
atomic_write(source_path, raw_text)
log_source_saved(source_file=source_path)
@ -157,7 +157,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
@enforce_types
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
ts = str(datetime.now().timestamp()).split('.', 1)[0]
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):

View file

@ -4,7 +4,7 @@ __package__ = 'archivebox.parsers'
import re
from typing import IO, Iterable, Optional
from datetime import datetime
from datetime import datetime, timezone
from ..index.schema import Link
from ..util import (
@ -46,7 +46,7 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
for archivable_url in re.findall(URL_REGEX, url):
yield Link(
url=htmldecode(archivable_url),
timestamp=str(datetime.now().timestamp()),
timestamp=str(datetime.now(timezone.utc).timestamp()),
title=None,
tags=None,
sources=[html_file.name],

View file

@ -3,7 +3,7 @@ __package__ = 'archivebox.parsers'
import json
from typing import IO, Iterable
from datetime import datetime
from datetime import datetime, timezone
from ..index.schema import Link
from ..util import (
@ -30,7 +30,7 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
# Parse the timestamp
ts_str = str(datetime.now().timestamp())
ts_str = str(datetime.now(timezone.utc).timestamp())
if link.get('timestamp'):
# chrome/ff histories use a very precise timestamp
ts_str = str(link['timestamp'] / 10000000)

View file

@ -4,7 +4,7 @@ __description__ = 'Plain Text'
import re
from typing import IO, Iterable
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from ..index.schema import Link
@ -29,7 +29,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
if Path(line).exists():
yield Link(
url=line,
timestamp=str(datetime.now().timestamp()),
timestamp=str(datetime.now(timezone.utc).timestamp()),
title=None,
tags=None,
sources=[text_file.name],
@ -42,7 +42,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
for url in re.findall(URL_REGEX, line):
yield Link(
url=htmldecode(url),
timestamp=str(datetime.now().timestamp()),
timestamp=str(datetime.now(timezone.utc).timestamp()),
title=None,
tags=None,
sources=[text_file.name],
@ -54,7 +54,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
for sub_url in re.findall(URL_REGEX, line[1:]):
yield Link(
url=htmldecode(sub_url),
timestamp=str(datetime.now().timestamp()),
timestamp=str(datetime.now(timezone.utc).timestamp()),
title=None,
tags=None,
sources=[text_file.name],

View file

@ -2,7 +2,7 @@ __package__ = 'archivebox.parsers'
from typing import IO, Iterable
from datetime import datetime
from datetime import datetime, timezone
from xml.etree import ElementTree
@ -36,7 +36,7 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
if ts_str:
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
else:
time = datetime.now()
time = datetime.now(timezone.utc)
yield Link(
url=htmldecode(url),

View file

@ -4,7 +4,7 @@ __description__ = 'URL list'
import re
from typing import IO, Iterable
from datetime import datetime
from datetime import datetime, timezone
from ..index.schema import Link
from ..util import (
@ -25,7 +25,7 @@ def parse_url_list(text_file: IO[str], **_kwargs) -> Iterable[Link]:
yield Link(
url=url,
timestamp=str(datetime.now().timestamp()),
timestamp=str(datetime.now(timezone.utc).timestamp()),
title=None,
tags=None,
sources=[text_file.name],