Merge branch 'master' into archive-result

This commit is contained in:
Nick Sweeting 2020-11-27 23:18:11 -05:00 committed by GitHub
commit efe3027797
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
43 changed files with 743 additions and 280 deletions

View file

@ -0,0 +1,172 @@
"""
base32-crockford
================
A Python module implementing the alternate base32 encoding as described
by Douglas Crockford at: http://www.crockford.com/wrmg/base32.html.
He designed the encoding to:
* Be human and machine readable
* Be compact
* Be error resistant
* Be pronounceable
It uses a symbol set of 10 digits and 22 letters, excluding I, L O and
U. Decoding is not case sensitive, and 'i' and 'l' are converted to '1'
and 'o' is converted to '0'. Encoding uses only upper-case characters.
Hyphens may be present in symbol strings to improve readability, and
are removed when decoding.
A check symbol can be appended to a symbol string to detect errors
within the string.
"""
import re
import sys
PY3 = sys.version_info[0] == 3
if not PY3:
import string as str
__all__ = ["encode", "decode", "normalize"]
if PY3:
string_types = (str,)
else:
string_types = (basestring,) # noqa
# The encoded symbol space does not include I, L, O or U
symbols = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
# These five symbols are exclusively for checksum values
check_symbols = '*~$=U'
encode_symbols = dict((i, ch) for (i, ch) in enumerate(symbols + check_symbols))
decode_symbols = dict((ch, i) for (i, ch) in enumerate(symbols + check_symbols))
normalize_symbols = str.maketrans('IiLlOo', '111100')
valid_symbols = re.compile('^[%s]+[%s]?$' % (symbols,
re.escape(check_symbols)))
base = len(symbols)
check_base = len(symbols + check_symbols)
def encode(number, checksum=False, split=0):
"""Encode an integer into a symbol string.
A ValueError is raised on invalid input.
If checksum is set to True, a check symbol will be
calculated and appended to the string.
If split is specified, the string will be divided into
clusters of that size separated by hyphens.
The encoded string is returned.
"""
number = int(number)
if number < 0:
raise ValueError("number '%d' is not a positive integer" % number)
split = int(split)
if split < 0:
raise ValueError("split '%d' is not a positive integer" % split)
check_symbol = ''
if checksum:
check_symbol = encode_symbols[number % check_base]
if number == 0:
return '0' + check_symbol
symbol_string = ''
while number > 0:
remainder = number % base
number //= base
symbol_string = encode_symbols[remainder] + symbol_string
symbol_string = symbol_string + check_symbol
if split:
chunks = []
for pos in range(0, len(symbol_string), split):
chunks.append(symbol_string[pos:pos + split])
symbol_string = '-'.join(chunks)
return symbol_string
def decode(symbol_string, checksum=False, strict=False):
"""Decode an encoded symbol string.
If checksum is set to True, the string is assumed to have a
trailing check symbol which will be validated. If the
checksum validation fails, a ValueError is raised.
If strict is set to True, a ValueError is raised if the
normalization step requires changes to the string.
The decoded string is returned.
"""
symbol_string = normalize(symbol_string, strict=strict)
if checksum:
symbol_string, check_symbol = symbol_string[:-1], symbol_string[-1]
number = 0
for symbol in symbol_string:
number = number * base + decode_symbols[symbol]
if checksum:
check_value = decode_symbols[check_symbol]
modulo = number % check_base
if check_value != modulo:
raise ValueError("invalid check symbol '%s' for string '%s'" %
(check_symbol, symbol_string))
return number
def normalize(symbol_string, strict=False):
"""Normalize an encoded symbol string.
Normalization provides error correction and prepares the
string for decoding. These transformations are applied:
1. Hyphens are removed
2. 'I', 'i', 'L' or 'l' are converted to '1'
3. 'O' or 'o' are converted to '0'
4. All characters are converted to uppercase
A TypeError is raised if an invalid string type is provided.
A ValueError is raised if the normalized string contains
invalid characters.
If the strict parameter is set to True, a ValueError is raised
if any of the above transformations are applied.
The normalized string is returned.
"""
if isinstance(symbol_string, string_types):
if not PY3:
try:
symbol_string = symbol_string.encode('ascii')
except UnicodeEncodeError:
raise ValueError("string should only contain ASCII characters")
else:
raise TypeError("string is of invalid type %s" %
symbol_string.__class__.__name__)
norm_string = symbol_string.replace('-', '').translate(normalize_symbols).upper()
if not valid_symbols.match(norm_string):
raise ValueError("string '%s' contains invalid characters" % norm_string)
if strict and norm_string != symbol_string:
raise ValueError("string '%s' requires normalization" % symbol_string)
return norm_string

View file

@ -62,10 +62,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
help="Re-archive URLs from scratch, overwriting any existing files"
)
parser.add_argument(
'--init', #'-i',
"--init", #'-i',
action='store_true',
help="Init/upgrade the curent data directory before adding",
)
parser.add_argument(
"--extract",
type=str,
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
This does not take precedence over the configuration",
default=""
)
command = parser.parse_args(args or ())
urls = command.urls
stdin_urls = accept_stdin(stdin)
@ -83,6 +90,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
overwrite=command.overwrite,
init=command.init,
out_dir=pwd or OUTPUT_DIR,
extractors=command.extract,
)

View file

@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex'),
choices=('exact', 'substring', 'domain', 'regex','tag'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)

View file

@ -50,7 +50,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex'),
choices=('exact', 'substring', 'domain', 'regex','tag'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)

View file

@ -36,7 +36,7 @@ from .config_stubs import (
#
# ******************************************************************************
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
# Documentation: https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
# env USE_COLOR=True CHROME_BINARY=chromium archivebox add < example.html
# ******************************************************************************
@ -98,8 +98,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
'CURL_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) curl/{CURL_VERSION}'},
'WGET_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}'},
'CURL_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
'WGET_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'},
'COOKIES_FILE': {'type': str, 'default': None},
@ -157,6 +157,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'READABILITY_BINARY': {'type': str, 'default': 'readability-extractor'},
'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'NODE_BINARY': {'type': str, 'default': 'node'},
'CHROME_BINARY': {'type': str, 'default': None},
},
}
@ -248,7 +249,7 @@ CONFIG_HEADER = (
# archivebox init
#
# A list of all possible config with documentation and examples can be found here:
# https://github.com/pirate/ArchiveBox/wiki/Configuration
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
""")
@ -296,6 +297,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
@ -318,6 +320,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'])},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
@ -505,7 +509,7 @@ def load_config(defaults: ConfigDefaultDict,
stderr(' Check your config for mistakes and try again (your archive data is unaffected).')
stderr()
stderr(' For config documentation and examples see:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration')
stderr()
raise
raise SystemExit(2)
@ -565,7 +569,7 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
# stderr(f' {binary} --version')
# stderr()
# stderr(' If you don\'t want to install it, you can disable it via config. See here for more info:')
# stderr(' https://github.com/pirate/ArchiveBox/wiki/Install')
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Install')
return None
def bin_path(binary: Optional[str]) -> Optional[str]:
@ -643,12 +647,15 @@ def find_chrome_data_dir() -> Optional[str]:
return None
def wget_supports_compression(config):
cmd = [
config['WGET_BINARY'],
"--compression=auto",
"--help",
]
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
try:
cmd = [
config['WGET_BINARY'],
"--compression=auto",
"--help",
]
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
except (FileNotFoundError, OSError):
return False
def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
return {
@ -662,6 +669,11 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
'enabled': True,
'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
},
# 'NODE_MODULES_DIR': {
# 'path': ,
# 'enabled': ,
# 'is_valid': (...).exists(),
# },
}
def get_external_locations(config: ConfigDict) -> ConfigValue:
@ -715,6 +727,13 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
def get_dependency_info(config: ConfigDict) -> ConfigValue:
return {
'ARCHIVEBOX_BINARY': {
'path': bin_path(config['ARCHIVEBOX_BINARY']),
'version': config['VERSION'],
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
'enabled': True,
'is_valid': True,
},
'PYTHON_BINARY': {
'path': bin_path(config['PYTHON_BINARY']),
'version': config['PYTHON_VERSION'],
@ -743,6 +762,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_WGET'],
'is_valid': bool(config['WGET_VERSION']),
},
'NODE_BINARY': {
'path': bin_path(config['NODE_BINARY']),
'version': config['NODE_VERSION'],
'hash': bin_hash(config['NODE_BINARY']),
'enabled': config['USE_NODE'],
'is_valid': bool(config['SINGLEFILE_VERSION']),
},
'SINGLEFILE_BINARY': {
'path': bin_path(config['SINGLEFILE_BINARY']),
'version': config['SINGLEFILE_VERSION'],
@ -828,13 +854,13 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
if config['USER'] == 'root':
stderr('[!] ArchiveBox should never be run as root!', color='red')
stderr(' For more information, see the security overview documentation:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
raise SystemExit(2)
### Check Python environment
if sys.version_info[:3] < (3, 6, 0):
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
stderr(' See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
raise SystemExit(2)
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
@ -854,7 +880,7 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
stderr(' For more info see:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
stderr()
stderr(' Try removing /Default from the end e.g.:')
@ -878,7 +904,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
)
)
if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
hint(('npm install --prefix . "git+https://github.com/pirate/ArchiveBox.git"',
hint(('npm install --prefix . "git+https://github.com/ArchiveBox/ArchiveBox.git"',
f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning',
''), prefix=' ')
stderr('')
@ -889,7 +915,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
stderr()
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr()
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
@ -898,7 +924,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
stderr()
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr()
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
@ -907,7 +933,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' (Setting it somewhere over 60 seconds is recommended)')
stderr()
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#save_media')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
stderr()
def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:

View file

@ -86,7 +86,7 @@ class SnapshotAdmin(admin.ModelAdmin):
list_display = ('added', 'title_str', 'url_str', 'files', 'size')
sort_fields = ('title_str', 'url_str', 'added')
readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
search_fields = ('url', 'timestamp', 'title', 'tags')
search_fields = ['url', 'timestamp', 'title', 'tags__name']
fields = (*readonly_fields, 'title', 'tags')
list_filter = ('added', 'updated', 'tags')
ordering = ['-added']

View file

@ -14,7 +14,7 @@ urlpatterns = [
path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}),
path('favicon.ico', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'favicon.ico'}),
path('docs/', RedirectView.as_view(url='https://github.com/pirate/ArchiveBox/wiki'), name='Docs'),
path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
path('archive/', RedirectView.as_view(url='/')),
path('archive/<path:path>', LinkDetails.as_view(), name='LinkAssets'),

View file

@ -361,6 +361,7 @@ LINK_FILTERS = {
'substring': lambda pattern: Q(url__icontains=pattern),
'regex': lambda pattern: Q(url__iregex=pattern),
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
'tag': lambda pattern: Q(tags__name=pattern),
}
@enforce_types

View file

@ -32,9 +32,9 @@ MAIN_INDEX_HEADER = {
'version': VERSION,
'git_sha': GIT_SHA,
'website': 'https://ArchiveBox.io',
'docs': 'https://github.com/pirate/ArchiveBox/wiki',
'source': 'https://github.com/pirate/ArchiveBox',
'issues': 'https://github.com/pirate/ArchiveBox/issues',
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
'dependencies': DEPENDENCIES,
},
}

View file

@ -447,7 +447,7 @@ def log_shell_welcome_msg():
print('{green}from archivebox import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
print()
print('[i] Welcome to the ArchiveBox Shell!')
print(' https://github.com/pirate/ArchiveBox/wiki/Usage#Shell-Usage')
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
print()
print(' {lightred}Hint:{reset} Example use:'.format(**ANSI))
print(' print(Snapshot.objects.filter(is_archived=True).count())')

View file

@ -3,6 +3,7 @@ __package__ = 'archivebox'
import os
import sys
import shutil
import platform
from pathlib import Path
from datetime import date
@ -111,6 +112,7 @@ from .logging_util import (
ALLOWED_IN_OUTPUT_DIR = {
'lost+found',
'.DS_Store',
'.venv',
'venv',
@ -178,7 +180,7 @@ def help(out_dir: Path=OUTPUT_DIR) -> None:
archivebox update --resume=15109948213.123
{lightred}Documentation:{reset}
https://github.com/pirate/ArchiveBox/wiki
https://github.com/ArchiveBox/ArchiveBox/wiki
'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **ANSI))
else:
@ -197,7 +199,7 @@ def help(out_dir: Path=OUTPUT_DIR) -> None:
print(' 2. archivebox init')
print()
print('For more information, see the documentation here:')
print(' https://github.com/pirate/ArchiveBox/wiki')
print(' https://github.com/ArchiveBox/ArchiveBox/wiki')
@enforce_types
@ -209,6 +211,8 @@ def version(quiet: bool=False,
print(VERSION)
else:
print('ArchiveBox v{}'.format(VERSION))
p = platform.uname()
print(p.system, platform.platform(), p.machine)
print()
print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
@ -525,11 +529,14 @@ def add(urls: Union[str, List[str]],
index_only: bool=False,
overwrite: bool=False,
init: bool=False,
out_dir: Path=OUTPUT_DIR) -> List[Link]:
out_dir: Path=OUTPUT_DIR,
extractors: str="") -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
extractors = extractors.split(",") if extractors else []
if init:
run_subcommand('init', stdin=None, pwd=out_dir)
@ -567,12 +574,17 @@ def add(urls: Union[str, List[str]],
return all_links
# Run the archive methods for each link
archive_kwargs = {
"out_dir": out_dir,
}
if extractors:
archive_kwargs["methods"] = extractors
if update_all:
archive_links(all_links, overwrite=overwrite, out_dir=out_dir)
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
elif overwrite:
archive_links(imported_links, overwrite=True, out_dir=out_dir)
archive_links(imported_links, overwrite=True, **archive_kwargs)
elif new_links:
archive_links(new_links, overwrite=False, out_dir=out_dir)
archive_links(new_links, overwrite=False, **archive_kwargs)
return all_links
@ -857,7 +869,7 @@ def config(config_options_str: Optional[str]=None,
stderr(f' {line}')
raise SystemExit(2)
raw_key, val = line.split('=')
raw_key, val = line.split('=', 1)
raw_key = raw_key.upper().strip()
key = get_real_name(raw_key)
if key != raw_key:
@ -930,7 +942,7 @@ def schedule(add: bool=False,
if every or add:
every = every or 'day'
quoted = lambda s: f'"{s}"' if s and ' ' in s else s
quoted = lambda s: f'"{s}"' if s and ' ' in str(s) else str(s)
cmd = [
'cd',
quoted(out_dir),

View file

@ -39,11 +39,17 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
mode = 'wb+' if isinstance(contents, bytes) else 'w'
# print('\n> Atomic Write:', mode, path, len(contents), f'overwrite={overwrite}')
with lib_atomic_write(path, mode=mode, overwrite=overwrite) as f:
if isinstance(contents, dict):
dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
elif isinstance(contents, (bytes, str)):
f.write(contents)
try:
with lib_atomic_write(path, mode=mode, overwrite=overwrite) as f:
if isinstance(contents, dict):
dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
elif isinstance(contents, (bytes, str)):
f.write(contents)
except OSError as e:
print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})")
print(" For data integrity, ArchiveBox requires a filesystem that supports atomic writes.")
print(" Filesystems and network drives that don't implement FSYNC are incompatible and require workarounds.")
raise SystemExit(1)
os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
@enforce_types

View file

@ -226,6 +226,7 @@
.exists-False {
opacity: 0.1;
filter: grayscale(100%);
pointer-events: none;
}
</style>
@ -265,7 +266,7 @@
<div class="col-sm-10" style="text-align: right">
<a href="/add/">Add Links</a> &nbsp; | &nbsp;
<a href="/admin/core/snapshot/">Admin</a> &nbsp; | &nbsp;
<a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Docs</a>
</div>
</div>
</div>
@ -277,7 +278,7 @@
<br />
<center>
<small>
Archive created using <a href="https://github.com/pirate/ArchiveBox" title="Github">ArchiveBox</a> &nbsp; |
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> &nbsp; |
&nbsp;
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
<br /><br />

View file

@ -223,7 +223,7 @@
<div class="col-sm-10" style="text-align: right">
<a href="/add/">Add Links</a> &nbsp; | &nbsp;
<a href="/admin/core/snapshot/">Admin</a> &nbsp; | &nbsp;
<a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Docs</a>
</div>
</div>
</div>
@ -266,8 +266,8 @@
<br/>
<center>
<small>
Archive created using <a href="https://github.com/pirate/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/pirate/ArchiveBox/tree/v{{VERSION}}" title="Git commit">v{{VERSION}}</a> &nbsp; | &nbsp;
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v{{VERSION}}" title="Git commit">v{{VERSION}}</a> &nbsp; | &nbsp;
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
<br/><br/>
{{FOOTER_INFO}}

View file

@ -187,8 +187,8 @@
</a>
</div>
<div class="col-sm-10" style="text-align: right">
<a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> &nbsp; | &nbsp;
<a href="https://github.com/pirate/ArchiveBox">Source</a> &nbsp; | &nbsp;
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Documentation</a> &nbsp; | &nbsp;
<a href="https://github.com/ArchiveBox/ArchiveBox">Source</a> &nbsp; | &nbsp;
<a href="https://archivebox.io">Website</a>
</div>
</div>
@ -209,8 +209,8 @@
<br/>
<center>
<small>
Archive created using <a href="https://github.com/pirate/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/pirate/ArchiveBox/tree/v$version" title="Git commit">v$version</a> &nbsp; | &nbsp;
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v$version" title="Git commit">v$version</a> &nbsp; | &nbsp;
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
<br/><br/>
$footer_info

View file

@ -16,7 +16,7 @@ from dateparser import parse as dateparser
import requests
from requests.exceptions import RequestException, ReadTimeout
from base32_crockford import encode as base32_encode # type: ignore
from .base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
try: