mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 15:44:26 -04:00
add command: --parser option
This commit is contained in:
parent
308be35367
commit
60bd9a902e
6 changed files with 73 additions and 21 deletions
29
archivebox/parsers/url_list.py
Normal file
29
archivebox/parsers/url_list.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
__package__ = 'archivebox.parsers'
|
||||
__description__ = 'URL list'
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
enforce_types
|
||||
)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_url_list(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||
"""Parse raw URLs from each line in a text file"""
|
||||
|
||||
text_file.seek(0)
|
||||
for line in text_file.readlines():
|
||||
url = line.strip()
|
||||
if len(url) == 0:
|
||||
continue
|
||||
|
||||
yield Link(
|
||||
url=url,
|
||||
timestamp=str(datetime.now().timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue