mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
parent
ca2c484a8e
commit
5478d13d52
6 changed files with 168 additions and 53 deletions
34
archivebox/parsers/generic_jsonl.py
Normal file
34
archivebox/parsers/generic_jsonl.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
__package__ = 'archivebox.parsers'
|
||||
|
||||
import json
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
htmldecode,
|
||||
enforce_types,
|
||||
)
|
||||
|
||||
from .generic_json import jsonObjectToLink
|
||||
|
||||
def parse_line(line: str):
|
||||
if line.strip() != "":
|
||||
return json.loads(line)
|
||||
|
||||
@enforce_types
|
||||
def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||
"""Parse JSONL format bookmarks export files"""
|
||||
|
||||
json_file.seek(0)
|
||||
|
||||
links = [ parse_line(line) for line in json_file ]
|
||||
|
||||
for link in links:
|
||||
if link:
|
||||
yield jsonObjectToLink(link,json_file.name)
|
||||
|
||||
KEY = 'jsonl'
|
||||
NAME = 'Generic JSONL'
|
||||
PARSER = parse_generic_jsonl_export
|
Loading…
Add table
Add a link
Reference in a new issue