move utils and vendored libs into subfolders

This commit is contained in:
Nick Sweeting 2020-12-06 02:01:18 +02:00
parent 8440858751
commit a0a79cead8
9 changed files with 413 additions and 52 deletions

View file

@ -4,34 +4,35 @@ __package__ = 'archivebox.parsers'
import re
from typing import IO, Iterable, Optional
from datetime import datetime
from configparser import ConfigParser
from pathlib import Path
from pocket import Pocket
import requests
from ..vendor.pocket import Pocket
from ..index.schema import Link
from ..util import (
enforce_types,
)
from ..util import enforce_types
from ..system import atomic_write
from ..config import (
SOURCES_DIR
SOURCES_DIR,
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
_COUNT_PER_PAGE = 500
_API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
COUNT_PER_PAGE = 500
API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
# search for broken protocols that sometimes come from the Pocket API
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
def get_pocket_articles(api: Pocket, since=None, page=0):
body, headers = api.get(
state='archive',
sort='oldest',
since=since,
count=_COUNT_PER_PAGE,
offset=page * _COUNT_PER_PAGE,
count=COUNT_PER_PAGE,
offset=page * COUNT_PER_PAGE,
)
articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
@ -39,7 +40,7 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
yield from articles
if returned_count == _COUNT_PER_PAGE:
if returned_count == COUNT_PER_PAGE:
yield from get_pocket_articles(api, since=since, page=page + 1)
else:
api.last_since = body['since']
@ -60,56 +61,53 @@ def link_from_article(article: dict, sources: list):
sources=sources
)
def write_since(username: str, since: str):
from ..system import atomic_write
if not _API_DB_PATH.exists():
atomic_write(_API_DB_PATH, '')
def write_since(username: str, since: str):
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, '')
since_file = ConfigParser()
since_file.optionxform = str
since_file.read(_API_DB_PATH)
since_file.read(API_DB_PATH)
since_file[username] = {
'since': since
}
with open(_API_DB_PATH, 'w+') as new:
with open(API_DB_PATH, 'w+') as new:
since_file.write(new)
def read_since(username: str) -> Optional[str]:
from ..system import atomic_write
if not _API_DB_PATH.exists():
atomic_write(_API_DB_PATH, '')
def read_since(username: str) -> Optional[str]:
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, '')
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(_API_DB_PATH)
config_file.read(API_DB_PATH)
return config_file.get(username, 'since', fallback=None)
@enforce_types
def should_parse_as_pocket_api(text: str) -> bool:
return text.startswith('pocket://')
@enforce_types
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Pocket API"""
input_buffer.seek(0)
pattern = re.compile("^pocket:\/\/(\w+)")
pattern = re.compile(r"^pocket:\/\/(\w+)")
for line in input_buffer:
if should_parse_as_pocket_api(line):
from ..config import (
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
username = pattern.search(line).group(1)
api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)):
yield link_from_article(article, sources=[line])
write_since(username, api.last_since)
if should_parse_as_pocket_api(line):
username = pattern.search(line).group(1)
api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)):
yield link_from_article(article, sources=[line])
write_since(username, api.last_since)