allow passing import list via stdin

2025-05-13 22:54:27 -04:00 · 2019-01-14 18:11:48 -05:00 · 2019-01-14 18:11:48 -05:00 · 39f5e12364
commit 39f5e12364
parent cb60bad1d7
2 changed files with 37 additions and 2 deletions
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@ -31,6 +31,7 @@ from config import (
 )
 from util import (
    download_url,
+    save_source,
    progress,
    cleanup_archive,
    pretty_path,
@ -39,14 +40,18 @@ from util import (

 __AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
 __VERSION__ = GIT_SHA
-__DESCRIPTION__ = 'ArchiveBox: Create a browsable html archive of a list of links.'
-__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox'
+__DESCRIPTION__ = 'ArchiveBox Usage:  Create a browsable html archive of a list of links.'
+__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'

 def print_help():
    print(__DESCRIPTION__)
    print("Documentation:     {}\n".format(__DOCUMENTATION__))
    print("Usage:")
    print("    ./bin/archivebox ~/Downloads/bookmarks_export.html\n")
+    print("")
+    print("    ./bin/archivebox https://example.com/feed.rss\n")
+    print("")
+    print("    echo 'https://examplecom' | ./bin/archivebox\n")


 def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
@ -138,6 +143,20 @@ if __name__ == '__main__':
    source = sys.argv[1] if argc > 1 else None  # path of links file to import
    resume = sys.argv[2] if argc > 2 else None  # timestamp to resume dowloading from
   
+    stdin_raw_text = []
+
+    if not sys.stdin.isatty():
+        stdin_raw_text = sys.stdin.read()
+
+    if source and stdin_raw_text:
+        print(
+            '[X] You should pass either a path as an argument, '
+            'or pass a list of links via stdin, but not both.\n'
+        )
+        print_help()
+        raise SystemExit(1)
+
+
    if argc == 1:
        source, resume = None, None
    elif argc == 2:
@ -163,6 +182,8 @@ if __name__ == '__main__':
    # Step 0: Download url to local file (only happens if a URL is specified instead of local path) 
    if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
        source = download_url(source)
+    elif stdin_raw_text:
+        source = save_source(stdin_raw_text)

    # Step 1: Parse the links and dedupe them with existing archive
    links = merge_links(archive_path=out_dir, import_path=source, only_new=False)
--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -180,6 +180,20 @@ def pretty_path(path):
    return path.replace(REPO_DIR + '/', '')


+def save_source(raw_text):
+    if not os.path.exists(SOURCES_DIR):
+        os.makedirs(SOURCES_DIR)
+
+    ts = str(datetime.now().timestamp()).split('.', 1)[0]
+
+    source_path = os.path.join(SOURCES_DIR, '{}-{}.txt'.format('stdin', ts))
+
+    with open(source_path, 'w', encoding='utf-8') as f:
+        f.write(raw_text)
+
+    return source_path
+
+
 def download_url(url):
    """download a given url's content into downloads/domain.txt"""