mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
allow passing import list via stdin
This commit is contained in:
parent
cb60bad1d7
commit
39f5e12364
2 changed files with 37 additions and 2 deletions
|
@ -31,6 +31,7 @@ from config import (
|
|||
)
|
||||
from util import (
|
||||
download_url,
|
||||
save_source,
|
||||
progress,
|
||||
cleanup_archive,
|
||||
pretty_path,
|
||||
|
@ -39,14 +40,18 @@ from util import (
|
|||
|
||||
__AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
|
||||
__VERSION__ = GIT_SHA
|
||||
__DESCRIPTION__ = 'ArchiveBox: Create a browsable html archive of a list of links.'
|
||||
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox'
|
||||
__DESCRIPTION__ = 'ArchiveBox Usage: Create a browsable html archive of a list of links.'
|
||||
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'
|
||||
|
||||
def print_help():
|
||||
print(__DESCRIPTION__)
|
||||
print("Documentation: {}\n".format(__DOCUMENTATION__))
|
||||
print("Usage:")
|
||||
print(" ./bin/archivebox ~/Downloads/bookmarks_export.html\n")
|
||||
print("")
|
||||
print(" ./bin/archivebox https://example.com/feed.rss\n")
|
||||
print("")
|
||||
print(" echo 'https://examplecom' | ./bin/archivebox\n")
|
||||
|
||||
|
||||
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
|
||||
|
@ -138,6 +143,20 @@ if __name__ == '__main__':
|
|||
source = sys.argv[1] if argc > 1 else None # path of links file to import
|
||||
resume = sys.argv[2] if argc > 2 else None # timestamp to resume dowloading from
|
||||
|
||||
stdin_raw_text = []
|
||||
|
||||
if not sys.stdin.isatty():
|
||||
stdin_raw_text = sys.stdin.read()
|
||||
|
||||
if source and stdin_raw_text:
|
||||
print(
|
||||
'[X] You should pass either a path as an argument, '
|
||||
'or pass a list of links via stdin, but not both.\n'
|
||||
)
|
||||
print_help()
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
if argc == 1:
|
||||
source, resume = None, None
|
||||
elif argc == 2:
|
||||
|
@ -163,6 +182,8 @@ if __name__ == '__main__':
|
|||
# Step 0: Download url to local file (only happens if a URL is specified instead of local path)
|
||||
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
source = download_url(source)
|
||||
elif stdin_raw_text:
|
||||
source = save_source(stdin_raw_text)
|
||||
|
||||
# Step 1: Parse the links and dedupe them with existing archive
|
||||
links = merge_links(archive_path=out_dir, import_path=source, only_new=False)
|
||||
|
|
|
@ -180,6 +180,20 @@ def pretty_path(path):
|
|||
return path.replace(REPO_DIR + '/', '')
|
||||
|
||||
|
||||
def save_source(raw_text):
|
||||
if not os.path.exists(SOURCES_DIR):
|
||||
os.makedirs(SOURCES_DIR)
|
||||
|
||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
|
||||
source_path = os.path.join(SOURCES_DIR, '{}-{}.txt'.format('stdin', ts))
|
||||
|
||||
with open(source_path, 'w', encoding='utf-8') as f:
|
||||
f.write(raw_text)
|
||||
|
||||
return source_path
|
||||
|
||||
|
||||
def download_url(url):
|
||||
"""download a given url's content into downloads/domain.txt"""
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue