Add generic_jsonl parser

Resolves #1369
This commit is contained in:
jim winstead 2024-02-29 18:15:06 -08:00
parent ca2c484a8e
commit 5478d13d52
6 changed files with 168 additions and 53 deletions

View file

@ -0,0 +1,34 @@
__package__ = 'archivebox.parsers'
import json
from typing import IO, Iterable
from datetime import datetime, timezone
from ..index.schema import Link
from ..util import (
htmldecode,
enforce_types,
)
from .generic_json import jsonObjectToLink
def parse_line(line: str):
if line.strip() != "":
return json.loads(line)
@enforce_types
def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse JSONL format bookmarks export files"""
json_file.seek(0)
links = [ parse_line(line) for line in json_file ]
for link in links:
if link:
yield jsonObjectToLink(link,json_file.name)
KEY = 'jsonl'
NAME = 'Generic JSONL'
PARSER = parse_generic_jsonl_export