mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
remove circular import possibilities
This commit is contained in:
parent
844dcd3885
commit
8840ad72bb
3 changed files with 23 additions and 12 deletions
|
@ -21,6 +21,14 @@ from .stubs import (
|
||||||
ConfigDefaultDict,
|
ConfigDefaultDict,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# precedence order for config:
|
||||||
|
# 1. cli args
|
||||||
|
# 2. shell environment vars
|
||||||
|
# 3. config file
|
||||||
|
# 4. defaults
|
||||||
|
|
||||||
|
# env USE_COLO=false archivebox add '...'
|
||||||
|
# env SHOW_PROGRESS=1 archivebox add '...'
|
||||||
|
|
||||||
# ******************************************************************************
|
# ******************************************************************************
|
||||||
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.utils.html import format_html
|
from django.utils.html import format_html
|
||||||
|
|
||||||
from archivebox.util import htmldecode, urldecode
|
from util import htmldecode, urldecode
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from cli.logging import printable_filesize
|
from cli.logging import printable_filesize
|
||||||
|
|
||||||
|
|
|
@ -14,15 +14,6 @@ from dateutil import parser as dateparser
|
||||||
import requests
|
import requests
|
||||||
from base32_crockford import encode as base32_encode # type: ignore
|
from base32_crockford import encode as base32_encode # type: ignore
|
||||||
|
|
||||||
from .config import (
|
|
||||||
TIMEOUT,
|
|
||||||
STATICFILE_EXTENSIONS,
|
|
||||||
CHECK_SSL_VALIDITY,
|
|
||||||
WGET_USER_AGENT,
|
|
||||||
CHROME_OPTIONS,
|
|
||||||
COLOR_DICT
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import chardet
|
import chardet
|
||||||
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
||||||
|
@ -49,7 +40,6 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links
|
||||||
without_www = lambda url: url.replace('://www.', '://', 1)
|
without_www = lambda url: url.replace('://www.', '://', 1)
|
||||||
without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?')
|
without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?')
|
||||||
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
|
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
|
||||||
is_static_file = lambda url: extension(url).lower() in STATICFILE_EXTENSIONS # TODO: the proper way is with MIME type detection, not using extension
|
|
||||||
|
|
||||||
urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace')
|
urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace')
|
||||||
urldecode = lambda s: s and unquote(s)
|
urldecode = lambda s: s and unquote(s)
|
||||||
|
@ -70,7 +60,14 @@ URL_REGEX = re.compile(
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m')
|
COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m')
|
||||||
|
=======
|
||||||
|
def is_static_file(url: str):
|
||||||
|
# TODO: the proper way is with MIME type detection + ext, not only extension
|
||||||
|
from .config import STATICFILE_EXTENSIONS
|
||||||
|
return extension(url).lower() in STATICFILE_EXTENSIONS
|
||||||
|
>>>>>>> c1fe068... remove circular import possibilities
|
||||||
|
|
||||||
|
|
||||||
def enforce_types(func):
|
def enforce_types(func):
|
||||||
|
@ -155,8 +152,10 @@ def parse_date(date: Any) -> Optional[datetime]:
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def download_url(url: str, timeout: int=TIMEOUT) -> str:
|
def download_url(url: str, timeout: int=None) -> str:
|
||||||
"""Download the contents of a remote url and return the text"""
|
"""Download the contents of a remote url and return the text"""
|
||||||
|
from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
|
||||||
|
timeout = timeout or TIMEOUT
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
url,
|
url,
|
||||||
headers={'User-Agent': WGET_USER_AGENT},
|
headers={'User-Agent': WGET_USER_AGENT},
|
||||||
|
@ -170,6 +169,8 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
|
||||||
def chrome_args(**options) -> List[str]:
|
def chrome_args(**options) -> List[str]:
|
||||||
"""helper to build up a chrome shell command with arguments"""
|
"""helper to build up a chrome shell command with arguments"""
|
||||||
|
|
||||||
|
from .config import CHROME_OPTIONS
|
||||||
|
|
||||||
options = {**CHROME_OPTIONS, **options}
|
options = {**CHROME_OPTIONS, **options}
|
||||||
|
|
||||||
cmd_args = [options['CHROME_BINARY']]
|
cmd_args = [options['CHROME_BINARY']]
|
||||||
|
@ -202,6 +203,8 @@ def ansi_to_html(text):
|
||||||
"""
|
"""
|
||||||
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
||||||
"""
|
"""
|
||||||
|
from .config import COLOR_DICT
|
||||||
|
|
||||||
TEMPLATE = '<span style="color: rgb{}"><br>'
|
TEMPLATE = '<span style="color: rgb{}"><br>'
|
||||||
text = text.replace('[m', '</span>')
|
text = text.replace('[m', '</span>')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue