mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
allow passing import list via stdin
This commit is contained in:
parent
cb60bad1d7
commit
39f5e12364
2 changed files with 37 additions and 2 deletions
|
@ -31,6 +31,7 @@ from config import (
|
||||||
)
|
)
|
||||||
from util import (
|
from util import (
|
||||||
download_url,
|
download_url,
|
||||||
|
save_source,
|
||||||
progress,
|
progress,
|
||||||
cleanup_archive,
|
cleanup_archive,
|
||||||
pretty_path,
|
pretty_path,
|
||||||
|
@ -39,14 +40,18 @@ from util import (
|
||||||
|
|
||||||
__AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
|
__AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
|
||||||
__VERSION__ = GIT_SHA
|
__VERSION__ = GIT_SHA
|
||||||
__DESCRIPTION__ = 'ArchiveBox: Create a browsable html archive of a list of links.'
|
__DESCRIPTION__ = 'ArchiveBox Usage: Create a browsable html archive of a list of links.'
|
||||||
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox'
|
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'
|
||||||
|
|
||||||
def print_help():
|
def print_help():
|
||||||
print(__DESCRIPTION__)
|
print(__DESCRIPTION__)
|
||||||
print("Documentation: {}\n".format(__DOCUMENTATION__))
|
print("Documentation: {}\n".format(__DOCUMENTATION__))
|
||||||
print("Usage:")
|
print("Usage:")
|
||||||
print(" ./bin/archivebox ~/Downloads/bookmarks_export.html\n")
|
print(" ./bin/archivebox ~/Downloads/bookmarks_export.html\n")
|
||||||
|
print("")
|
||||||
|
print(" ./bin/archivebox https://example.com/feed.rss\n")
|
||||||
|
print("")
|
||||||
|
print(" echo 'https://examplecom' | ./bin/archivebox\n")
|
||||||
|
|
||||||
|
|
||||||
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
|
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
|
||||||
|
@ -138,6 +143,20 @@ if __name__ == '__main__':
|
||||||
source = sys.argv[1] if argc > 1 else None # path of links file to import
|
source = sys.argv[1] if argc > 1 else None # path of links file to import
|
||||||
resume = sys.argv[2] if argc > 2 else None # timestamp to resume dowloading from
|
resume = sys.argv[2] if argc > 2 else None # timestamp to resume dowloading from
|
||||||
|
|
||||||
|
stdin_raw_text = []
|
||||||
|
|
||||||
|
if not sys.stdin.isatty():
|
||||||
|
stdin_raw_text = sys.stdin.read()
|
||||||
|
|
||||||
|
if source and stdin_raw_text:
|
||||||
|
print(
|
||||||
|
'[X] You should pass either a path as an argument, '
|
||||||
|
'or pass a list of links via stdin, but not both.\n'
|
||||||
|
)
|
||||||
|
print_help()
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
|
||||||
if argc == 1:
|
if argc == 1:
|
||||||
source, resume = None, None
|
source, resume = None, None
|
||||||
elif argc == 2:
|
elif argc == 2:
|
||||||
|
@ -163,6 +182,8 @@ if __name__ == '__main__':
|
||||||
# Step 0: Download url to local file (only happens if a URL is specified instead of local path)
|
# Step 0: Download url to local file (only happens if a URL is specified instead of local path)
|
||||||
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||||
source = download_url(source)
|
source = download_url(source)
|
||||||
|
elif stdin_raw_text:
|
||||||
|
source = save_source(stdin_raw_text)
|
||||||
|
|
||||||
# Step 1: Parse the links and dedupe them with existing archive
|
# Step 1: Parse the links and dedupe them with existing archive
|
||||||
links = merge_links(archive_path=out_dir, import_path=source, only_new=False)
|
links = merge_links(archive_path=out_dir, import_path=source, only_new=False)
|
||||||
|
|
|
@ -180,6 +180,20 @@ def pretty_path(path):
|
||||||
return path.replace(REPO_DIR + '/', '')
|
return path.replace(REPO_DIR + '/', '')
|
||||||
|
|
||||||
|
|
||||||
|
def save_source(raw_text):
|
||||||
|
if not os.path.exists(SOURCES_DIR):
|
||||||
|
os.makedirs(SOURCES_DIR)
|
||||||
|
|
||||||
|
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||||
|
|
||||||
|
source_path = os.path.join(SOURCES_DIR, '{}-{}.txt'.format('stdin', ts))
|
||||||
|
|
||||||
|
with open(source_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(raw_text)
|
||||||
|
|
||||||
|
return source_path
|
||||||
|
|
||||||
|
|
||||||
def download_url(url):
|
def download_url(url):
|
||||||
"""download a given url's content into downloads/domain.txt"""
|
"""download a given url's content into downloads/domain.txt"""
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue