fix lint errors

This commit is contained in:
Nick Sweeting 2024-04-25 21:36:11 -07:00
parent 1d9e7ec66a
commit 6a6ae7468e
No known key found for this signature in database
12 changed files with 16 additions and 35 deletions

View file

@ -7,7 +7,6 @@ For examples of supported import formats see tests/.
__package__ = 'archivebox.parsers'
import re
from io import StringIO
from typing import IO, Tuple, List, Optional
@ -28,7 +27,6 @@ from ..util import (
htmldecode,
download_url,
enforce_types,
find_all_urls,
)
from ..index.schema import Link
from ..logging_util import TimedProgress, log_source_saved

View file

@ -72,21 +72,13 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
json_file.seek(0)
try:
links = json.load(json_file)
if type(links) != list:
raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
except json.decoder.JSONDecodeError:
# sometimes the first line is a comment or other junk, so try without
json_file.seek(0)
first_line = json_file.readline()
#print(' > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '')
links = json.load(json_file)
# we may fail again, which means we really don't know what to do
links = json.load(json_file)
if type(links) != list:
raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
for link in links:
if link:
yield jsonObjectToLink(link,json_file.name)
yield jsonObjectToLink(link, json_file.name)
KEY = 'json'
NAME = 'Generic JSON'

View file

@ -3,11 +3,9 @@ __package__ = 'archivebox.parsers'
import json
from typing import IO, Iterable
from datetime import datetime, timezone
from ..index.schema import Link
from ..util import (
htmldecode,
enforce_types,
)

View file

@ -1,8 +1,6 @@
__package__ = 'archivebox.parsers'
__description__ = 'Plain Text'
import re
from typing import IO, Iterable
from datetime import datetime, timezone
from pathlib import Path