mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
move everything out of legacy folder
This commit is contained in:
parent
553f312125
commit
1b8abc0961
74 changed files with 3162 additions and 2629 deletions
|
@ -7,90 +7,75 @@ __description__ = 'Add a new URL or list of URLs to your archive'
|
|||
import sys
|
||||
import argparse
|
||||
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, IO
|
||||
|
||||
from ..legacy.config import stderr, check_dependencies, check_data_folder
|
||||
from ..legacy.util import (
|
||||
handle_stdin_import,
|
||||
handle_file_import,
|
||||
)
|
||||
from ..legacy.main import update_archive_data
|
||||
from ..main import add
|
||||
from ..util import SmartFormatter, accept_stdin
|
||||
from ..config import OUTPUT_DIR, ONLY_NEW
|
||||
|
||||
|
||||
def main(args: List[str]=None, stdin: Optional[str]=None) -> None:
|
||||
check_data_folder()
|
||||
|
||||
args = sys.argv[1:] if args is None else args
|
||||
|
||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description=__description__,
|
||||
add_help=True,
|
||||
formatter_class=SmartFormatter,
|
||||
)
|
||||
# parser.add_argument(
|
||||
# '--depth', #'-d',
|
||||
# type=int,
|
||||
# help='Recursively archive all linked pages up to this many hops away',
|
||||
# default=0,
|
||||
# )
|
||||
parser.add_argument(
|
||||
'--only-new', #'-n',
|
||||
'--update-all', #'-n',
|
||||
action='store_true',
|
||||
help="Don't attempt to retry previously skipped/failed links when updating",
|
||||
default=not ONLY_NEW,
|
||||
help="Also retry previously skipped/failed links when adding new links",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--index-only', #'-o',
|
||||
action='store_true',
|
||||
help="Add the links to the main index without archiving them",
|
||||
)
|
||||
# parser.add_argument(
|
||||
# '--mirror', #'-m',
|
||||
# action='store_true',
|
||||
# help='Archive an entire site (finding all linked pages below it on the same domain)',
|
||||
# )
|
||||
# parser.add_argument(
|
||||
# '--crawler', #'-r',
|
||||
# choices=('depth_first', 'breadth_first'),
|
||||
# help='Controls which crawler to use in order to find outlinks in a given page',
|
||||
# default=None,
|
||||
# )
|
||||
parser.add_argument(
|
||||
'url',
|
||||
'import_path',
|
||||
nargs='?',
|
||||
type=str,
|
||||
default=None,
|
||||
help='URL of page to archive (or path to local file)'
|
||||
help=(
|
||||
'URL or path to local file containing a list of links to import. e.g.:\n'
|
||||
' https://getpocket.com/users/USERNAME/feed/all\n'
|
||||
' https://example.com/some/rss/feed.xml\n'
|
||||
' ~/Downloads/firefox_bookmarks_export.html\n'
|
||||
' ~/Desktop/sites_list.csv\n'
|
||||
)
|
||||
)
|
||||
command = parser.parse_args(args)
|
||||
|
||||
check_dependencies()
|
||||
|
||||
### Handle ingesting urls piped in through stdin
|
||||
# (.e.g if user does cat example_urls.txt | archivebox add)
|
||||
import_path = None
|
||||
if stdin or not sys.stdin.isatty():
|
||||
stdin_raw_text = stdin or sys.stdin.read()
|
||||
if stdin_raw_text and command.url:
|
||||
stderr(
|
||||
'[X] You should pass either a path as an argument, '
|
||||
'or pass a list of links via stdin, but not both.\n'
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
import_path = handle_stdin_import(stdin_raw_text)
|
||||
|
||||
### Handle ingesting url from a remote file/feed
|
||||
# (e.g. if an RSS feed URL is used as the import path)
|
||||
elif command.url:
|
||||
import_path = handle_file_import(command.url)
|
||||
|
||||
update_archive_data(
|
||||
import_path=import_path,
|
||||
resume=None,
|
||||
only_new=command.only_new,
|
||||
command = parser.parse_args(args or ())
|
||||
import_str = accept_stdin(stdin)
|
||||
add(
|
||||
import_str=import_str,
|
||||
import_path=command.import_path,
|
||||
update_all=command.update_all,
|
||||
index_only=command.index_only,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main(args=sys.argv[1:], stdin=sys.stdin)
|
||||
|
||||
|
||||
# TODO: Implement these
|
||||
#
|
||||
# parser.add_argument(
|
||||
# '--depth', #'-d',
|
||||
# type=int,
|
||||
# help='Recursively archive all linked pages up to this many hops away',
|
||||
# default=0,
|
||||
# )
|
||||
# parser.add_argument(
|
||||
# '--mirror', #'-m',
|
||||
# action='store_true',
|
||||
# help='Archive an entire site (finding all linked pages below it on the same domain)',
|
||||
# )
|
||||
# parser.add_argument(
|
||||
# '--crawler', #'-r',
|
||||
# choices=('depth_first', 'breadth_first'),
|
||||
# help='Controls which crawler to use in order to find outlinks in a given page',
|
||||
# default=None,
|
||||
# )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue