mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 07:04:27 -04:00
cleanup console logging messages
This commit is contained in:
parent
1ea695d7b7
commit
2cd11feaac
4 changed files with 25 additions and 9 deletions
|
@ -4,6 +4,8 @@ from functools import wraps
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
|
|
||||||
|
from peekable import Peekable
|
||||||
|
|
||||||
from index import html_appended_url, parse_json_link_index, write_link_index
|
from index import html_appended_url, parse_json_link_index, write_link_index
|
||||||
from links import links_after_timestamp
|
from links import links_after_timestamp
|
||||||
from config import (
|
from config import (
|
||||||
|
@ -40,16 +42,19 @@ _RESULTS_TOTALS = { # globals are bad, mmkay
|
||||||
def archive_links(archive_path, links, source=None, resume=None):
|
def archive_links(archive_path, links, source=None, resume=None):
|
||||||
check_dependencies()
|
check_dependencies()
|
||||||
|
|
||||||
to_archive = links_after_timestamp(links, resume)
|
to_archive = Peekable(links_after_timestamp(links, resume))
|
||||||
|
idx, link = 0, to_archive.peek(0)
|
||||||
try:
|
try:
|
||||||
for idx, link in enumerate(to_archive):
|
for idx, link in enumerate(to_archive):
|
||||||
link_dir = os.path.join(archive_path, link['timestamp'])
|
link_dir = os.path.join(archive_path, link['timestamp'])
|
||||||
archive_link(link_dir, link)
|
archive_link(link_dir, link)
|
||||||
|
|
||||||
except (KeyboardInterrupt, SystemExit, Exception) as e:
|
except (KeyboardInterrupt, SystemExit, Exception) as e:
|
||||||
print('{red}[X] Index is up-to-date, archive update paused on link {idx}/{total}{reset}'.format(
|
print('⏸ [{now}] {lightyellow}Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
|
||||||
**ANSI,
|
**ANSI,
|
||||||
|
now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
idx=idx,
|
idx=idx,
|
||||||
|
timestamp=link['timestamp'],
|
||||||
total=len(list(to_archive)),
|
total=len(list(to_archive)),
|
||||||
))
|
))
|
||||||
print(' Continue where you left off by running:')
|
print(' Continue where you left off by running:')
|
||||||
|
|
8
index.py
8
index.py
|
@ -28,16 +28,16 @@ def write_links_index(out_dir, links):
|
||||||
if not os.path.exists(out_dir):
|
if not os.path.exists(out_dir):
|
||||||
os.makedirs(out_dir)
|
os.makedirs(out_dir)
|
||||||
|
|
||||||
print('[i] [{}] Updating {}{}{} links in archive index...'.format(
|
write_json_links_index(out_dir, links)
|
||||||
|
write_html_links_index(out_dir, links)
|
||||||
|
|
||||||
|
print('[√] [{}] Archive index is now up-to-date with {}{}{} links.'.format(
|
||||||
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
ANSI['green'],
|
ANSI['green'],
|
||||||
len(links),
|
len(links),
|
||||||
ANSI['reset'],
|
ANSI['reset'],
|
||||||
))
|
))
|
||||||
|
|
||||||
write_json_links_index(out_dir, links)
|
|
||||||
write_html_links_index(out_dir, links)
|
|
||||||
|
|
||||||
def write_json_links_index(out_dir, links):
|
def write_json_links_index(out_dir, links):
|
||||||
"""write the json link index to a given path"""
|
"""write the json link index to a given path"""
|
||||||
|
|
||||||
|
|
9
links.py
9
links.py
|
@ -32,6 +32,8 @@ Link {
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
from util import (
|
from util import (
|
||||||
domain,
|
domain,
|
||||||
base_url,
|
base_url,
|
||||||
|
@ -39,6 +41,7 @@ from util import (
|
||||||
get_link_type,
|
get_link_type,
|
||||||
merge_links,
|
merge_links,
|
||||||
)
|
)
|
||||||
|
from config import ANSI
|
||||||
|
|
||||||
|
|
||||||
def validate_links(links):
|
def validate_links(links):
|
||||||
|
@ -95,7 +98,11 @@ def links_after_timestamp(links, timestamp=None):
|
||||||
yield from links
|
yield from links
|
||||||
return
|
return
|
||||||
|
|
||||||
print('[.] [{}] Resuming...'.format(timestamp))
|
print('▶️ [{}] {green}Resuming downloads at {}...{reset}'.format(
|
||||||
|
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
timestamp,
|
||||||
|
**ANSI,
|
||||||
|
))
|
||||||
for link in links:
|
for link in links:
|
||||||
try:
|
try:
|
||||||
if float(link['timestamp']) <= float(timestamp):
|
if float(link['timestamp']) <= float(timestamp):
|
||||||
|
|
8
parse.py
8
parse.py
|
@ -184,6 +184,10 @@ def parse_pinboard_rss_feed(rss_file):
|
||||||
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text
|
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text
|
||||||
title = item.find("{http://purl.org/rss/1.0/}title").text
|
title = item.find("{http://purl.org/rss/1.0/}title").text
|
||||||
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text
|
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text
|
||||||
|
# = 🌈🌈🌈🌈
|
||||||
|
# = 🌈🌈🌈🌈
|
||||||
|
# = 🏆🏆🏆🏆
|
||||||
|
|
||||||
# Pinboard includes a colon in its date stamp timezone offsets, which
|
# Pinboard includes a colon in its date stamp timezone offsets, which
|
||||||
# Python can't parse. Remove it:
|
# Python can't parse. Remove it:
|
||||||
if ":" == ts_str[-3:-2]:
|
if ":" == ts_str[-3:-2]:
|
||||||
|
@ -208,8 +212,8 @@ def parse_medium_rss_feed(rss_file):
|
||||||
root = etree.parse(rss_file).getroot()
|
root = etree.parse(rss_file).getroot()
|
||||||
items = root.find("channel").findall("item")
|
items = root.find("channel").findall("item")
|
||||||
for item in items:
|
for item in items:
|
||||||
for child in item:
|
# for child in item:
|
||||||
print(child.tag, child.text)
|
# print(child.tag, child.text)
|
||||||
url = item.find("link").text
|
url = item.find("link").text
|
||||||
title = item.find("title").text
|
title = item.find("title").text
|
||||||
ts_str = item.find("pubDate").text
|
ts_str = item.find("pubDate").text
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue