mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
fix rich logging issues
This commit is contained in:
parent
0dffbf1bb4
commit
e99260feb2
5 changed files with 39 additions and 50 deletions
|
@ -1,6 +1,5 @@
|
||||||
__package__ = 'archivebox.api'
|
__package__ = 'archivebox.api'
|
||||||
|
|
||||||
import uuid
|
|
||||||
import secrets
|
import secrets
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ from core.models import Snapshot, ArchiveResult, Tag
|
||||||
from core.mixins import SearchResultsAdminMixin
|
from core.mixins import SearchResultsAdminMixin
|
||||||
from api.models import APIToken
|
from api.models import APIToken
|
||||||
from abid_utils.admin import ABIDModelAdmin
|
from abid_utils.admin import ABIDModelAdmin
|
||||||
from queues.tasks import bg_archive_links, bg_add
|
from queues.tasks import bg_archive_links, bg_archive_link, bg_add
|
||||||
|
|
||||||
from index.html import snapshot_icons
|
from index.html import snapshot_icons
|
||||||
from logging_util import printable_filesize
|
from logging_util import printable_filesize
|
||||||
|
|
|
@ -19,6 +19,8 @@ from django.conf import settings
|
||||||
|
|
||||||
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
|
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
|
||||||
|
|
||||||
|
from queues.tasks import bg_archive_snapshot
|
||||||
|
|
||||||
from ..system import get_dir_size
|
from ..system import get_dir_size
|
||||||
from ..util import parse_date, base_url
|
from ..util import parse_date, base_url
|
||||||
from ..index.schema import Link
|
from ..index.schema import Link
|
||||||
|
@ -160,6 +162,9 @@ class Snapshot(ABIDModel):
|
||||||
|
|
||||||
super().save(*args, **kwargs)
|
super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
def archive(self, overwrite=False, methods=None):
|
||||||
|
result = bg_archive_snapshot(self, overwrite=overwrite, methods=methods)
|
||||||
|
return result
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
title = (self.title_stripped or '-')[:64]
|
title = (self.title_stripped or '-')[:64]
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
__package__ = 'archivebox.core'
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
|
@ -7,6 +8,8 @@ import django.template
|
||||||
|
|
||||||
import archivebox
|
import archivebox
|
||||||
|
|
||||||
|
from ..misc.logging import IS_TTY
|
||||||
|
|
||||||
|
|
||||||
IGNORABLE_URL_PATTERNS = [
|
IGNORABLE_URL_PATTERNS = [
|
||||||
re.compile(r"/.*/?apple-touch-icon.*\.png"),
|
re.compile(r"/.*/?apple-touch-icon.*\.png"),
|
||||||
|
@ -101,7 +104,7 @@ SETTINGS_LOGGING = {
|
||||||
"formatter": "rich",
|
"formatter": "rich",
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
"markup": False,
|
"markup": False,
|
||||||
"rich_tracebacks": True,
|
"rich_tracebacks": IS_TTY,
|
||||||
"filters": ["noisyrequestsfilter"],
|
"filters": ["noisyrequestsfilter"],
|
||||||
"tracebacks_suppress": [
|
"tracebacks_suppress": [
|
||||||
django,
|
django,
|
||||||
|
|
|
@ -242,7 +242,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
|
||||||
args=args,
|
args=args,
|
||||||
)
|
)
|
||||||
# stderr()
|
# stderr()
|
||||||
# stderr('{black} > {pwd}{reset}'.format(pwd=pwd, **ANSI))
|
# stderr('[bright_black] > {pwd}[/]'.format(pwd=pwd, **ANSI))
|
||||||
# stderr()
|
# stderr()
|
||||||
if SHOW_PROGRESS:
|
if SHOW_PROGRESS:
|
||||||
print(Panel(version_msg), file=sys.stderr)
|
print(Panel(version_msg), file=sys.stderr)
|
||||||
|
@ -254,12 +254,11 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
|
||||||
|
|
||||||
def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool):
|
def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool):
|
||||||
_LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc)
|
_LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc)
|
||||||
print('{green}[+] [{}] Adding {} links to index (crawl depth={}){}...{reset}'.format(
|
print('[green][+] [{}] Adding {} links to index (crawl depth={}){}...[/]'.format(
|
||||||
_LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
_LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
len(urls) if isinstance(urls, list) else len(urls.split('\n')),
|
len(urls) if isinstance(urls, list) else len(urls.split('\n')),
|
||||||
depth,
|
depth,
|
||||||
' (index only)' if index_only else '',
|
' (index only)' if index_only else '',
|
||||||
**ANSI,
|
|
||||||
))
|
))
|
||||||
|
|
||||||
def log_source_saved(source_file: str):
|
def log_source_saved(source_file: str):
|
||||||
|
@ -275,7 +274,7 @@ def log_deduping_finished(num_new_links: int):
|
||||||
|
|
||||||
def log_crawl_started(new_links):
|
def log_crawl_started(new_links):
|
||||||
print()
|
print()
|
||||||
print('{green}[*] Starting crawl of {} sites 1 hop out from starting point{reset}'.format(len(new_links), **ANSI))
|
print(f'[green][*] Starting crawl of {len(new_links)} sites 1 hop out from starting point[/]')
|
||||||
|
|
||||||
### Indexing Stage
|
### Indexing Stage
|
||||||
|
|
||||||
|
@ -283,10 +282,9 @@ def log_indexing_process_started(num_links: int):
|
||||||
start_ts = datetime.now(timezone.utc)
|
start_ts = datetime.now(timezone.utc)
|
||||||
_LAST_RUN_STATS.index_start_ts = start_ts
|
_LAST_RUN_STATS.index_start_ts = start_ts
|
||||||
print()
|
print()
|
||||||
print('{black}[*] [{}] Writing {} links to main index...{reset}'.format(
|
print('[bright_black][*] [{}] Writing {} links to main index...[/]'.format(
|
||||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
num_links,
|
num_links,
|
||||||
**ANSI,
|
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
@ -312,17 +310,15 @@ def log_archiving_started(num_links: int, resume: Optional[float]=None):
|
||||||
_LAST_RUN_STATS.archiving_start_ts = start_ts
|
_LAST_RUN_STATS.archiving_start_ts = start_ts
|
||||||
print()
|
print()
|
||||||
if resume:
|
if resume:
|
||||||
print('{green}[▶] [{}] Resuming archive updating for {} pages starting from {}...{reset}'.format(
|
print('[green][▶] [{}] Resuming archive updating for {} pages starting from {}...[/]'.format(
|
||||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
num_links,
|
num_links,
|
||||||
resume,
|
resume,
|
||||||
**ANSI,
|
|
||||||
))
|
))
|
||||||
else:
|
else:
|
||||||
print('{green}[▶] [{}] Starting archiving of {} snapshots in index...{reset}'.format(
|
print('[green][▶] [{}] Starting archiving of {} snapshots in index...[/]'.format(
|
||||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
num_links,
|
num_links,
|
||||||
**ANSI,
|
|
||||||
))
|
))
|
||||||
|
|
||||||
def log_archiving_paused(num_links: int, idx: int, timestamp: str):
|
def log_archiving_paused(num_links: int, idx: int, timestamp: str):
|
||||||
|
@ -330,8 +326,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
|
||||||
end_ts = datetime.now(timezone.utc)
|
end_ts = datetime.now(timezone.utc)
|
||||||
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
||||||
print()
|
print()
|
||||||
print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
|
print('\n[yellow3][X] [{now}] Downloading paused on link {timestamp} ({idx}/{total})[/]'.format(
|
||||||
**ANSI,
|
|
||||||
now=end_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
now=end_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
idx=idx+1,
|
idx=idx+1,
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
|
@ -355,12 +350,10 @@ def log_archiving_finished(num_links: int):
|
||||||
duration = '{0:.2f} sec'.format(seconds)
|
duration = '{0:.2f} sec'.format(seconds)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
|
print('[green][√] [{}] Update of {} pages complete ({})[/]'.format(
|
||||||
ANSI['green'],
|
|
||||||
end_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
end_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
num_links,
|
num_links,
|
||||||
duration,
|
duration,
|
||||||
ANSI['reset'],
|
|
||||||
))
|
))
|
||||||
print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped))
|
print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped))
|
||||||
print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded + _LAST_RUN_STATS.failed))
|
print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded + _LAST_RUN_STATS.failed))
|
||||||
|
@ -368,7 +361,7 @@ def log_archiving_finished(num_links: int):
|
||||||
|
|
||||||
if Snapshot.objects.count() < 50:
|
if Snapshot.objects.count() < 50:
|
||||||
print()
|
print()
|
||||||
print(' {lightred}Hint:{reset} To manage your archive in a Web UI, run:'.format(**ANSI))
|
print(' [violet]Hint:[/] To manage your archive in a Web UI, run:')
|
||||||
print(' archivebox server 0.0.0.0:8000')
|
print(' archivebox server 0.0.0.0:8000')
|
||||||
|
|
||||||
|
|
||||||
|
@ -378,14 +371,13 @@ def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool):
|
||||||
# http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
|
# http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
|
||||||
# > output/archive/1478739709
|
# > output/archive/1478739709
|
||||||
|
|
||||||
print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format(
|
print('\n[[{symbol_color}]{symbol}[/]] [[{symbol_color}]{now}[/]] "{title}"'.format(
|
||||||
symbol_color=ANSI['green' if is_new else 'black'],
|
symbol_color='green' if is_new else 'bright_black',
|
||||||
symbol='+' if is_new else '√',
|
symbol='+' if is_new else '√',
|
||||||
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
title=link.title or link.base_url,
|
title=link.title or link.base_url,
|
||||||
**ANSI,
|
|
||||||
))
|
))
|
||||||
print(' {blue}{url}{reset}'.format(url=link.url, **ANSI))
|
print(f' [sky_blue1]{link.url}[/]')
|
||||||
print(' {} {}'.format(
|
print(' {} {}'.format(
|
||||||
'>' if is_new else '√',
|
'>' if is_new else '√',
|
||||||
pretty_path(link_dir),
|
pretty_path(link_dir),
|
||||||
|
@ -408,7 +400,7 @@ def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats
|
||||||
|
|
||||||
end_ts = datetime.now(timezone.utc)
|
end_ts = datetime.now(timezone.utc)
|
||||||
duration = str(end_ts - start_ts).split('.')[0]
|
duration = str(end_ts - start_ts).split('.')[0]
|
||||||
print(' {black}{} files ({}) in {}s {reset}'.format(size[2], printable_filesize(size[0]), duration, **ANSI))
|
print(' [bright_black]{} files ({}) in {}s [/]'.format(size[2], printable_filesize(size[0]), duration))
|
||||||
|
|
||||||
|
|
||||||
def log_archive_method_started(method: str):
|
def log_archive_method_started(method: str):
|
||||||
|
@ -429,16 +421,13 @@ def log_archive_method_finished(result: "ArchiveResult"):
|
||||||
if result.output.__class__.__name__ == 'TimeoutExpired':
|
if result.output.__class__.__name__ == 'TimeoutExpired':
|
||||||
duration = (result.end_ts - result.start_ts).seconds
|
duration = (result.end_ts - result.start_ts).seconds
|
||||||
hint_header = [
|
hint_header = [
|
||||||
'{lightyellow}Extractor timed out after {}s.{reset}'.format(duration, **ANSI),
|
f'[yellow3]Extractor timed out after {duration}s.[/]',
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
|
error_name = result.output.__class__.__name__.replace('ArchiveError', '')
|
||||||
hint_header = [
|
hint_header = [
|
||||||
'{lightyellow}Extractor failed:{reset}'.format(**ANSI),
|
'[yellow3]Extractor failed:[/]',
|
||||||
' {reset}{} {red}{}{reset}'.format(
|
f' {error_name} [red1]{result.output}[/]',
|
||||||
result.output.__class__.__name__.replace('ArchiveError', ''),
|
|
||||||
result.output,
|
|
||||||
**ANSI,
|
|
||||||
),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# import pudb; pudb.set_trace()
|
# import pudb; pudb.set_trace()
|
||||||
|
@ -454,7 +443,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
|
||||||
hints = hints.split('\n')
|
hints = hints.split('\n')
|
||||||
|
|
||||||
hints = (
|
hints = (
|
||||||
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
|
f' [yellow1]{line.strip()}[/]'
|
||||||
for line in list(hints)[:5] if line.strip()
|
for line in list(hints)[:5] if line.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -468,7 +457,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
|
||||||
output_lines = [
|
output_lines = [
|
||||||
*hint_header,
|
*hint_header,
|
||||||
*hints,
|
*hints,
|
||||||
'{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
|
'[violet]Run to see full output:[/]',
|
||||||
*docker_hints,
|
*docker_hints,
|
||||||
*([' cd {};'.format(result.pwd)] if result.pwd else []),
|
*([' cd {};'.format(result.pwd)] if result.pwd else []),
|
||||||
' {}'.format(quoted_cmd),
|
' {}'.format(quoted_cmd),
|
||||||
|
@ -482,10 +471,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
|
||||||
|
|
||||||
|
|
||||||
def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
|
def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
|
||||||
print('{green}[*] Finding links in the archive index matching these {} patterns:{reset}'.format(
|
print(f'[green][*] Finding links in the archive index matching these {filter_type} patterns:[/]')
|
||||||
filter_type,
|
|
||||||
**ANSI,
|
|
||||||
))
|
|
||||||
print(' {}'.format(' '.join(filter_patterns or ())))
|
print(' {}'.format(' '.join(filter_patterns or ())))
|
||||||
|
|
||||||
def log_list_finished(links):
|
def log_list_finished(links):
|
||||||
|
@ -498,7 +484,7 @@ def log_list_finished(links):
|
||||||
|
|
||||||
|
|
||||||
def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
||||||
print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
|
print(f'[yellow3][i] Found {len(links)} matching URLs to remove.[/]')
|
||||||
if delete:
|
if delete:
|
||||||
file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
|
file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
|
||||||
print(
|
print(
|
||||||
|
@ -513,7 +499,7 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
||||||
|
|
||||||
if not yes:
|
if not yes:
|
||||||
print()
|
print()
|
||||||
print('{lightyellow}[?] Do you want to proceed with removing these {} links?{reset}'.format(len(links), **ANSI))
|
print('[yellow3][?] Do you want to proceed with removing these {len(links)} links?[/]')
|
||||||
try:
|
try:
|
||||||
assert input(' y/[n]: ').lower() == 'y'
|
assert input(' y/[n]: ').lower() == 'y'
|
||||||
except (KeyboardInterrupt, EOFError, AssertionError):
|
except (KeyboardInterrupt, EOFError, AssertionError):
|
||||||
|
@ -522,28 +508,24 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
||||||
def log_removal_finished(all_links: int, to_remove: int):
|
def log_removal_finished(all_links: int, to_remove: int):
|
||||||
if all_links == 0:
|
if all_links == 0:
|
||||||
print()
|
print()
|
||||||
print('{red}[X] No matching links found.{reset}'.format(**ANSI))
|
print('[red1][X] No matching links found.[/]')
|
||||||
else:
|
else:
|
||||||
print()
|
print()
|
||||||
print('{red}[√] Removed {} out of {} links from the archive index.{reset}'.format(
|
print(f'[red1][√] Removed {to_remove} out of {all_links} links from the archive index.[/]')
|
||||||
to_remove,
|
print(f' Index now contains {all_links - to_remove} links.')
|
||||||
all_links,
|
|
||||||
**ANSI,
|
|
||||||
))
|
|
||||||
print(' Index now contains {} links.'.format(all_links - to_remove))
|
|
||||||
|
|
||||||
|
|
||||||
def log_shell_welcome_msg():
|
def log_shell_welcome_msg():
|
||||||
from .cli import CLI_SUBCOMMANDS
|
from .cli import CLI_SUBCOMMANDS
|
||||||
|
|
||||||
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
print('[green]# ArchiveBox Imports[/]')
|
||||||
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
print('[green]from core.models import Snapshot, ArchiveResult, Tag, User[/]')
|
||||||
print('{green}from cli import *\n {}{reset}'.format("\n ".join(CLI_SUBCOMMANDS.keys()), **ANSI))
|
print('[green]from cli import *\n {}[/]'.format("\n ".join(CLI_SUBCOMMANDS.keys())))
|
||||||
print()
|
print()
|
||||||
print('[i] Welcome to the ArchiveBox Shell!')
|
print('[i] Welcome to the ArchiveBox Shell!')
|
||||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
||||||
print()
|
print()
|
||||||
print(' {lightred}Hint:{reset} Example use:'.format(**ANSI))
|
print(' [violet]Hint:[/] Example use:')
|
||||||
print(' print(Snapshot.objects.filter(is_archived=True).count())')
|
print(' print(Snapshot.objects.filter(is_archived=True).count())')
|
||||||
print(' Snapshot.objects.get(url="https://example.com").as_json()')
|
print(' Snapshot.objects.get(url="https://example.com").as_json()')
|
||||||
print(' add("https://example.com/some/new/url")')
|
print(' add("https://example.com/some/new/url")')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue