mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-22 02:45:10 -04:00
Merge branch 'master' into django
This commit is contained in:
commit
cb67b09f9d
29 changed files with 418 additions and 911 deletions
|
@ -1 +1 @@
|
|||
0.4.2
|
||||
0.4.3
|
||||
|
|
|
@ -3,4 +3,5 @@ __package__ = 'archivebox'
|
|||
from . import core
|
||||
from . import cli
|
||||
|
||||
# The main CLI source code, is in 'archivebox/main.py'
|
||||
from .main import *
|
||||
|
|
|
@ -44,6 +44,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'TIMEOUT': {'type': int, 'default': 60},
|
||||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
||||
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
||||
'URL_BLACKLIST': {'type': str, 'default': None},
|
||||
},
|
||||
|
||||
|
@ -77,6 +78,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
|
||||
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
||||
|
||||
'CURL_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) curl/{CURL_VERSION}'}
|
||||
'WGET_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}'},
|
||||
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'},
|
||||
|
||||
|
@ -85,6 +87,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
|
||||
'CHROME_HEADLESS': {'type': bool, 'default': True},
|
||||
'CHROME_SANDBOX': {'type': bool, 'default': True},
|
||||
|
||||
},
|
||||
|
||||
'DEPENDENCY_CONFIG': {
|
||||
|
@ -130,7 +133,7 @@ DEFAULT_CLI_COLORS = {
|
|||
ANSI = {k: '' for k in DEFAULT_CLI_COLORS.keys()}
|
||||
|
||||
STATICFILE_EXTENSIONS = {
|
||||
# 99.999% of the time, URLs ending in these extentions are static files
|
||||
# 99.999% of the time, URLs ending in these extensions are static files
|
||||
# that can be downloaded as-is, not html pages that need to be rendered
|
||||
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
||||
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
||||
|
@ -147,7 +150,7 @@ STATICFILE_EXTENSIONS = {
|
|||
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
||||
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
||||
|
||||
# Thse are always treated as pages, not as static files, never add them:
|
||||
# These are always treated as pages, not as static files, never add them:
|
||||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
}
|
||||
|
||||
|
@ -210,8 +213,9 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'DJANGO_BINARY': {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')},
|
||||
'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
|
||||
|
||||
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
||||
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['FETCH_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
||||
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
|
||||
'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
|
||||
'SAVE_FAVICON': {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
|
||||
'SAVE_ARCHIVE_DOT_ORG': {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},
|
||||
|
||||
|
@ -480,6 +484,7 @@ def find_chrome_binary() -> Optional[str]:
|
|||
'chromium-browser',
|
||||
'chromium',
|
||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||
'chrome',
|
||||
'google-chrome',
|
||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
'google-chrome-stable',
|
||||
|
@ -506,6 +511,7 @@ def find_chrome_data_dir() -> Optional[str]:
|
|||
'~/.config/chromium',
|
||||
'~/Library/Application Support/Chromium',
|
||||
'~/AppData/Local/Chromium/User Data',
|
||||
'~/.config/chrome',
|
||||
'~/.config/google-chrome',
|
||||
'~/Library/Application Support/Google/Chrome',
|
||||
'~/AppData/Local/Google/Chrome/User Data',
|
||||
|
|
|
@ -13,6 +13,7 @@ from ..config import (
|
|||
CURL_BINARY,
|
||||
CURL_VERSION,
|
||||
CHECK_SSL_VALIDITY,
|
||||
CURL_USER_AGENT,
|
||||
)
|
||||
from ..cli.logging import TimedProgress
|
||||
|
||||
|
@ -37,14 +38,16 @@ def save_favicon(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
|
|||
'--max-time', str(timeout),
|
||||
'--location',
|
||||
'--output', str(output),
|
||||
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else [],
|
||||
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
|
||||
'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)),
|
||||
]
|
||||
status = 'succeeded'
|
||||
status = 'pending'
|
||||
timer = TimedProgress(timeout, prefix=' ')
|
||||
try:
|
||||
run(cmd, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)
|
||||
chmod_file(output, cwd=out_dir)
|
||||
status = 'succeeded'
|
||||
except Exception as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
|
|
|
@ -24,6 +24,7 @@ from ..config import (
|
|||
SAVE_WARC,
|
||||
WGET_BINARY,
|
||||
WGET_VERSION,
|
||||
RESTRICT_FILE_NAMES,
|
||||
CHECK_SSL_VALIDITY,
|
||||
SAVE_WGET_REQUISITES,
|
||||
WGET_AUTO_COMPRESSION,
|
||||
|
@ -66,14 +67,14 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
|
|||
'--span-hosts',
|
||||
'--no-parent',
|
||||
'-e', 'robots=off',
|
||||
'--restrict-file-names=windows',
|
||||
'--timeout={}'.format(timeout),
|
||||
*([] if SAVE_WARC else ['--timestamping']),
|
||||
*(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
|
||||
*(['--warc-file={}'.format(warc_path)] if SAVE_WARC else []),
|
||||
*(['--page-requisites'] if SAVE_WGET_REQUISITES else []),
|
||||
*(['--user-agent={}'.format(WGET_USER_AGENT)] if WGET_USER_AGENT else []),
|
||||
*(['--load-cookies', COOKIES_FILE] if COOKIES_FILE else []),
|
||||
*(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
|
||||
*([] if SAVE_WARC else ['--timestamping']),
|
||||
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
|
||||
link.url,
|
||||
]
|
||||
|
|
|
@ -325,7 +325,8 @@ def patch_main_index(link: Link, out_dir: str=OUTPUT_DIR) -> None:
|
|||
# Patch HTML main index
|
||||
html_path = os.path.join(out_dir, 'index.html')
|
||||
with open(html_path, 'r') as f:
|
||||
html = f.read().split('\n')
|
||||
html = f.read().splitlines()
|
||||
|
||||
for idx, line in enumerate(html):
|
||||
if title and ('<span data-title-for="{}"'.format(link.url) in line):
|
||||
html[idx] = '<span>{}</span>'.format(title)
|
||||
|
@ -333,7 +334,7 @@ def patch_main_index(link: Link, out_dir: str=OUTPUT_DIR) -> None:
|
|||
html[idx] = '<span>{}</span>'.format(successful)
|
||||
break
|
||||
|
||||
atomic_write('\n'.join(html), html_path)
|
||||
atomic_write(html_path, '\n'.join(html))
|
||||
|
||||
|
||||
### Link Details Index
|
||||
|
|
|
@ -41,7 +41,7 @@ TITLE_LOADING_MSG = 'Not yet archived...'
|
|||
def parse_html_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[str]:
|
||||
"""parse an archive index html file and return the list of urls"""
|
||||
|
||||
index_path = os.path.join(out_dir, HTML_INDEX_FILENAME)
|
||||
index_path = join(out_dir, HTML_INDEX_FILENAME)
|
||||
if os.path.exists(index_path):
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
|
@ -58,7 +58,7 @@ def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished:
|
|||
copy_and_overwrite(join(TEMPLATES_DIR, STATIC_DIR_NAME), join(out_dir, STATIC_DIR_NAME))
|
||||
|
||||
rendered_html = main_index_template(links, finished=finished)
|
||||
atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -116,7 +116,7 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
|||
out_dir = out_dir or link.link_dir
|
||||
|
||||
rendered_html = link_details_template(link)
|
||||
atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -74,7 +74,7 @@ def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
|
|||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
atomic_write(main_index_json, os.path.join(out_dir, JSON_INDEX_FILENAME))
|
||||
atomic_write(os.path.join(out_dir, JSON_INDEX_FILENAME), main_index_json)
|
||||
|
||||
|
||||
### Link Details Index
|
||||
|
@ -86,7 +86,7 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
|||
out_dir = out_dir or link.link_dir
|
||||
path = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
|
||||
atomic_write(link._asdict(extended=True), path)
|
||||
atomic_write(path, link._asdict(extended=True))
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -13,7 +13,6 @@ import os
|
|||
from typing import Tuple, List
|
||||
from datetime import datetime
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..system import atomic_write
|
||||
from ..config import (
|
||||
ANSI,
|
||||
|
@ -29,6 +28,7 @@ from ..util import (
|
|||
enforce_types,
|
||||
URL_REGEX,
|
||||
)
|
||||
from ..index.schema import Link
|
||||
from ..cli.logging import pretty_path, TimedProgress
|
||||
from .pocket_html import parse_pocket_html_export
|
||||
from .pinboard_rss import parse_pinboard_rss_export
|
||||
|
@ -93,8 +93,7 @@ def save_stdin_to_sources(raw_text: str, out_dir: str=OUTPUT_DIR) -> str:
|
|||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
|
||||
source_path = os.path.join(sources_dir, '{}-{}.txt'.format('stdin', ts))
|
||||
|
||||
atomic_write(raw_text, source_path)
|
||||
atomic_write(source_path, raw_text)
|
||||
return source_path
|
||||
|
||||
|
||||
|
@ -112,6 +111,7 @@ def save_file_to_sources(path: str, timeout: int=TIMEOUT, out_dir: str=OUTPUT_DI
|
|||
source_path = os.path.join(sources_dir, '{}-{}.txt'.format(basename(path), ts))
|
||||
|
||||
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
# Source is a URL that needs to be downloaded
|
||||
source_path = os.path.join(sources_dir, '{}-{}.txt'.format(domain(path), ts))
|
||||
print('{}[*] [{}] Downloading {}{}'.format(
|
||||
ANSI['green'],
|
||||
|
@ -134,10 +134,11 @@ def save_file_to_sources(path: str, timeout: int=TIMEOUT, out_dir: str=OUTPUT_DI
|
|||
raise SystemExit(1)
|
||||
|
||||
else:
|
||||
# Source is a path to a local file on the filesystem
|
||||
with open(path, 'r') as f:
|
||||
raw_source_text = f.read()
|
||||
|
||||
atomic_write(raw_source_text, source_path)
|
||||
atomic_write(source_path, raw_source_text)
|
||||
|
||||
print(' > {}'.format(pretty_path(source_path)))
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import json as pyjson
|
|||
from typing import Optional, Union, Set, Tuple
|
||||
|
||||
from crontab import CronTab
|
||||
from atomicwrites import atomic_write as awrite
|
||||
|
||||
from subprocess import (
|
||||
Popen,
|
||||
|
@ -22,10 +23,10 @@ from .util import enforce_types, ExtendedEncoder
|
|||
from .config import OUTPUT_PERMISSIONS
|
||||
|
||||
|
||||
|
||||
def run(*popenargs, input=None, capture_output=False, timeout=None, check=False, **kwargs):
|
||||
"""Patched of subprocess.run to fix blocking io making timeout=innefective"""
|
||||
|
||||
|
||||
if input is not None:
|
||||
if 'stdin' in kwargs:
|
||||
raise ValueError('stdin and input arguments may not both be used.')
|
||||
|
@ -59,30 +60,14 @@ def run(*popenargs, input=None, capture_output=False, timeout=None, check=False,
|
|||
return CompletedProcess(process.args, retcode, stdout, stderr)
|
||||
|
||||
|
||||
def atomic_write(contents: Union[dict, str, bytes], path: str) -> None:
|
||||
def atomic_write(path: str, contents: Union[dict, str, bytes], overwrite: bool=True) -> None:
|
||||
"""Safe atomic write to filesystem by writing to temp file + atomic rename"""
|
||||
try:
|
||||
tmp_file = '{}.tmp'.format(path)
|
||||
|
||||
if isinstance(contents, bytes):
|
||||
args = {'mode': 'wb+'}
|
||||
|
||||
with awrite(path, overwrite=overwrite) as f:
|
||||
if isinstance(contents, dict):
|
||||
pyjson.dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
|
||||
else:
|
||||
args = {'mode': 'w+', 'encoding': 'utf-8'}
|
||||
|
||||
with open(tmp_file, **args) as f:
|
||||
if isinstance(contents, dict):
|
||||
pyjson.dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
|
||||
else:
|
||||
f.write(contents)
|
||||
|
||||
os.fsync(f.fileno())
|
||||
|
||||
os.rename(tmp_file, path)
|
||||
chmod_file(path)
|
||||
finally:
|
||||
if os.path.exists(tmp_file):
|
||||
os.remove(tmp_file)
|
||||
|
||||
f.write(contents)
|
||||
|
||||
@enforce_types
|
||||
def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, timeout: int=30) -> None:
|
||||
|
@ -105,7 +90,8 @@ def copy_and_overwrite(from_path: str, to_path: str):
|
|||
shutil.copytree(from_path, to_path)
|
||||
else:
|
||||
with open(from_path, 'rb') as src:
|
||||
atomic_write(src.read(), to_path)
|
||||
contents = src.read()
|
||||
atomic_write(to_path, contents)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -6,6 +6,37 @@
|
|||
<title>Archived Sites</title>
|
||||
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||
<style>
|
||||
:root {
|
||||
--bg-main: #efefef;
|
||||
--accent-1: #aa1e55;
|
||||
--accent-2: #ffebeb;
|
||||
--accent-3: #efefef;
|
||||
|
||||
--text-1: #1c1c1c;
|
||||
--text-2: #eaeaea;
|
||||
--text-main: #1a1a1a;
|
||||
--font-main: "Gill Sans", Helvetica, sans-serif;
|
||||
}
|
||||
/* Dark Mode (WIP) */
|
||||
/*
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--accent-2: hsl(160, 100%, 96%);
|
||||
|
||||
--text-1: #eaeaea;
|
||||
--text-2: #1a1a1a;
|
||||
--bg-main: #101010;
|
||||
}
|
||||
|
||||
#table-bookmarks_wrapper,
|
||||
#table-bookmarks_wrapper img,
|
||||
tbody td:nth-child(3),
|
||||
tbody td:nth-child(3) span,
|
||||
footer {
|
||||
filter: invert(100%);
|
||||
}
|
||||
}*/
|
||||
|
||||
html, body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
|
@ -14,11 +45,12 @@
|
|||
text-align: center;
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
font-family: "Gill Sans", Helvetica, sans-serif;
|
||||
font-family: var(--font-main);
|
||||
}
|
||||
|
||||
.header-top small {
|
||||
font-weight: 200;
|
||||
color: #efefef;
|
||||
color: var(--accent-3);
|
||||
}
|
||||
|
||||
.header-top {
|
||||
|
@ -31,8 +63,8 @@
|
|||
font-size: calc(11px + 0.84vw);
|
||||
font-weight: 200;
|
||||
padding: 4px 4px;
|
||||
border-bottom: 3px solid #aa1e55;
|
||||
background-color: #aa1e55;
|
||||
border-bottom: 3px solid var(--accent-1);
|
||||
background-color: var(--accent-1);
|
||||
}
|
||||
input[type=search] {
|
||||
width: 22vw;
|
||||
|
@ -86,7 +118,7 @@
|
|||
height: 35px;
|
||||
}
|
||||
tbody tr:nth-child(odd) {
|
||||
background-color: #ffebeb !important;
|
||||
background-color: var(--accent-2) !important;
|
||||
}
|
||||
table tr td {
|
||||
white-space: nowrap;
|
||||
|
@ -146,7 +178,7 @@
|
|||
color:black;
|
||||
}
|
||||
tr td a.title small {
|
||||
background-color: #efefef;
|
||||
background-color: var(--accent-3);
|
||||
border-radius: 4px;
|
||||
float:right
|
||||
}
|
||||
|
|
|
@ -4,6 +4,34 @@
|
|||
<title>Archived Sites</title>
|
||||
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||
<style>
|
||||
:root {
|
||||
--accent-1: #aa1e55;
|
||||
--accent-2: #ffebeb;
|
||||
--accent-3: #efefef;
|
||||
|
||||
--bg-main: #efefef;
|
||||
--text-main: black;
|
||||
--text-1: #1a1a1a;
|
||||
--text-2: #eaeaea;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--accent-2: hsl(160, 100%, 96%);
|
||||
|
||||
--text-1: #eaeaea;
|
||||
--text-2: #1a1a1a;
|
||||
--bg-main: #101010;
|
||||
}
|
||||
|
||||
#table-bookmarks_wrapper,
|
||||
#table-bookmarks_wrapper img,
|
||||
tbody td:nth-child(3),
|
||||
tbody td:nth-child(3) span,
|
||||
footer {
|
||||
filter: invert(100%);
|
||||
}
|
||||
}
|
||||
html, body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
|
@ -13,7 +41,10 @@
|
|||
margin: 0px;
|
||||
padding: 0px;
|
||||
font-family: "Gill Sans", Helvetica, sans-serif;
|
||||
background: var(--bg-main);
|
||||
color: var(--text-main);
|
||||
}
|
||||
<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
|
||||
.header-top small {
|
||||
font-weight: 200;
|
||||
color: #efefef;
|
||||
|
@ -24,6 +55,33 @@
|
|||
height: auto;
|
||||
min-height: 40px;
|
||||
margin: 0px;
|
||||
=======
|
||||
header {
|
||||
background-color: var(--accent-1);
|
||||
color: var(--text-1);
|
||||
padding: 10px;
|
||||
padding-top: 0px;
|
||||
padding-bottom: 15px;
|
||||
/*height: 40px;*/
|
||||
}
|
||||
header h1 {
|
||||
margin: 7px 0px;
|
||||
font-size: 35px;
|
||||
font-weight: 300;
|
||||
color: var(--text-1);
|
||||
}
|
||||
header h1 img {
|
||||
height: 44px;
|
||||
vertical-align: bottom;
|
||||
}
|
||||
header a {
|
||||
text-decoration: none !important;
|
||||
color: var(--text-1);
|
||||
}
|
||||
.header-center {
|
||||
margin: auto;
|
||||
float: none;
|
||||
>>>>>>> master:archivebox/templates/index.html
|
||||
text-align: center;
|
||||
color: white;
|
||||
font-size: calc(11px + 0.84vw);
|
||||
|
@ -32,11 +90,17 @@
|
|||
border-bottom: 3px solid #aa1e55;
|
||||
background-color: #aa1e55;
|
||||
}
|
||||
<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
|
||||
input[type=search] {
|
||||
width: 22vw;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #aeaeae;
|
||||
padding: 3px 5px;
|
||||
=======
|
||||
.header-center small {
|
||||
color: var(--text-2);
|
||||
opacity: 0.7;
|
||||
>>>>>>> master:archivebox/templates/index.html
|
||||
}
|
||||
.nav > div {
|
||||
min-height: 30px;
|
||||
|
@ -45,9 +109,14 @@
|
|||
text-decoration: none;
|
||||
color: rgba(0,0,0,0.6);
|
||||
}
|
||||
<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
|
||||
.header-top a:hover {
|
||||
text-decoration: none;
|
||||
color: rgba(0,0,0,0.9);
|
||||
=======
|
||||
header + div {
|
||||
padding-top: 10px;
|
||||
>>>>>>> master:archivebox/templates/index.html
|
||||
}
|
||||
.header-top .col-lg-4 {
|
||||
text-align: center;
|
||||
|
@ -84,7 +153,7 @@
|
|||
height: 35px;
|
||||
}
|
||||
tbody tr:nth-child(odd) {
|
||||
background-color: #ffebeb !important;
|
||||
background-color: var(--accent-2) !important;
|
||||
}
|
||||
table tr td {
|
||||
white-space: nowrap;
|
||||
|
@ -144,7 +213,7 @@
|
|||
color:black;
|
||||
}
|
||||
tr td a.title small {
|
||||
background-color: #efefef;
|
||||
background-color: var(--accent-3);
|
||||
border-radius: 4px;
|
||||
float:right
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import re
|
||||
import ssl
|
||||
import json as pyjson
|
||||
|
||||
|
||||
from typing import List, Optional, Any
|
||||
|
@ -12,8 +13,7 @@ from html import escape, unescape
|
|||
from datetime import datetime
|
||||
from dateutil import parser as dateparser
|
||||
|
||||
from base32_crockford import encode as base32_encode # type: ignore
|
||||
import json as pyjson
|
||||
from base32_crockford import encode as base32_encode # type: ignore
|
||||
|
||||
from .config import (
|
||||
TIMEOUT,
|
||||
|
@ -23,6 +23,12 @@ from .config import (
|
|||
CHROME_OPTIONS,
|
||||
)
|
||||
|
||||
try:
|
||||
import chardet
|
||||
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
||||
except ImportError:
|
||||
detect_encoding = lambda rawdata: "utf-8"
|
||||
|
||||
### Parsing Helpers
|
||||
|
||||
# All of these are (str) -> str
|
||||
|
@ -158,8 +164,9 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
|
|||
insecure = ssl._create_unverified_context()
|
||||
resp = urlopen(req, timeout=timeout, context=insecure)
|
||||
|
||||
encoding = resp.headers.get_content_charset() or 'utf-8' # type: ignore
|
||||
return resp.read().decode(encoding)
|
||||
rawdata = resp.read()
|
||||
encoding = resp.headers.get_content_charset() or detect_encoding(rawdata)
|
||||
return rawdata.decode(encoding)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue