mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 08:34:26 -04:00
refactor: All parsers return snapshot instead of link
This commit is contained in:
parent
15d88be229
commit
b8efaa5b6a
11 changed files with 72 additions and 53 deletions
|
@ -9,7 +9,6 @@ from configparser import ConfigParser
|
|||
from pathlib import Path
|
||||
from ..vendor.pocket import Pocket
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..system import atomic_write
|
||||
from ..config import (
|
||||
|
@ -46,19 +45,21 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
|
|||
api.last_since = body['since']
|
||||
|
||||
|
||||
def link_from_article(article: dict, sources: list):
|
||||
def snapshot_from_article(article: dict, sources: list):
|
||||
from core.models import Snapshot
|
||||
|
||||
url: str = article['resolved_url'] or article['given_url']
|
||||
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
|
||||
if broken_protocol:
|
||||
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
|
||||
title = article['resolved_title'] or article['given_title'] or url
|
||||
|
||||
return Link(
|
||||
return Snapshot(
|
||||
url=url,
|
||||
timestamp=article['time_read'],
|
||||
title=title,
|
||||
tags=article.get('tags'),
|
||||
sources=sources
|
||||
#tags=article.get('tags'),
|
||||
#sources=sources
|
||||
)
|
||||
|
||||
|
||||
|
@ -108,6 +109,6 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
api.last_since = None
|
||||
|
||||
for article in get_pocket_articles(api, since=read_since(username)):
|
||||
yield link_from_article(article, sources=[line])
|
||||
yield snapshot_from_article(article, sources=[line])
|
||||
|
||||
write_since(username, api.last_since)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue