move everything out of legacy folder

2025-05-12 22:25:44 -04:00 · 2019-04-27 17:26:24 -04:00 · 2019-04-27 17:26:24 -04:00 · 1b8abc0961
commit 1b8abc0961
parent 553f312125
74 changed files with 3162 additions and 2629 deletions
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@ -7,90 +7,75 @@ __description__ = 'Add a new URL or list of URLs to your archive'
 import sys
 import argparse

-from typing import List, Optional
+from typing import List, Optional, IO

-from ..legacy.config import stderr, check_dependencies, check_data_folder
-from ..legacy.util import (
-    handle_stdin_import,
-    handle_file_import,
-)
-from ..legacy.main import update_archive_data
+from ..main import add
+from ..util import SmartFormatter, accept_stdin
+from ..config import OUTPUT_DIR, ONLY_NEW


-def main(args: List[str]=None, stdin: Optional[str]=None) -> None:
-    check_data_folder()
-    
-    args = sys.argv[1:] if args is None else args
-
+def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=__description__,
        add_help=True,
+        formatter_class=SmartFormatter,
    )
-    # parser.add_argument(
-    #     '--depth', #'-d',
-    #     type=int,
-    #     help='Recursively archive all linked pages up to this many hops away',
-    #     default=0,
-    # )
    parser.add_argument(
-        '--only-new', #'-n',
+        '--update-all', #'-n',
        action='store_true',
-        help="Don't attempt to retry previously skipped/failed links when updating",
+        default=not ONLY_NEW,
+        help="Also retry previously skipped/failed links when adding new links",
    )
    parser.add_argument(
        '--index-only', #'-o',
        action='store_true',
        help="Add the links to the main index without archiving them",
    )
-    # parser.add_argument(
-    #     '--mirror', #'-m',
-    #     action='store_true',
-    #     help='Archive an entire site (finding all linked pages below it on the same domain)',
-    # )
-    # parser.add_argument(
-    #     '--crawler', #'-r',
-    #     choices=('depth_first', 'breadth_first'),
-    #     help='Controls which crawler to use in order to find outlinks in a given page',
-    #     default=None,
-    # )
    parser.add_argument(
-        'url',
+        'import_path',
        nargs='?',
        type=str,
        default=None,
-        help='URL of page to archive (or path to local file)'
+        help=(
+            'URL or path to local file containing a list of links to import. e.g.:\n'
+            '    https://getpocket.com/users/USERNAME/feed/all\n'
+            '    https://example.com/some/rss/feed.xml\n'
+            '    ~/Downloads/firefox_bookmarks_export.html\n'
+            '    ~/Desktop/sites_list.csv\n'
+        )
    )
-    command = parser.parse_args(args)
-
-    check_dependencies()
-
-    ### Handle ingesting urls piped in through stdin
-    # (.e.g if user does cat example_urls.txt | archivebox add)
-    import_path = None
-    if stdin or not sys.stdin.isatty():
-        stdin_raw_text = stdin or sys.stdin.read()
-        if stdin_raw_text and command.url:
-            stderr(
-                '[X] You should pass either a path as an argument, '
-                'or pass a list of links via stdin, but not both.\n'
-            )
-            raise SystemExit(1)
-
-        import_path = handle_stdin_import(stdin_raw_text)
-
-    ### Handle ingesting url from a remote file/feed
-    # (e.g. if an RSS feed URL is used as the import path) 
-    elif command.url:
-        import_path = handle_file_import(command.url)
-
-    update_archive_data(
-        import_path=import_path,
-        resume=None,
-        only_new=command.only_new,
+    command = parser.parse_args(args or ())
+    import_str = accept_stdin(stdin)
+    add(
+        import_str=import_str,
+        import_path=command.import_path,
+        update_all=command.update_all,
        index_only=command.index_only,
+        out_dir=pwd or OUTPUT_DIR,
    )


 if __name__ == '__main__':
-    main()
+    main(args=sys.argv[1:], stdin=sys.stdin)
+
+
+# TODO: Implement these
+#
+# parser.add_argument(
+#     '--depth', #'-d',
+#     type=int,
+#     help='Recursively archive all linked pages up to this many hops away',
+#     default=0,
+# )
+# parser.add_argument(
+#     '--mirror', #'-m',
+#     action='store_true',
+#     help='Archive an entire site (finding all linked pages below it on the same domain)',
+# )
+# parser.add_argument(
+#     '--crawler', #'-r',
+#     choices=('depth_first', 'breadth_first'),
+#     help='Controls which crawler to use in order to find outlinks in a given page',
+#     default=None,
+# )