rename vendor dir to pkgs

This commit is contained in:
Nick Sweeting 2024-10-28 20:05:20 -07:00
parent 7d75867650
commit dee4eb7992
No known key found for this signature in database
168 changed files with 47 additions and 54 deletions

View file

@ -0,0 +1,18 @@
import abx
@abx.hookimpl
def get_CONFIG():
from .config import CURL_CONFIG
return {
'curl': CURL_CONFIG
}
@abx.hookimpl
def get_BINARIES():
from .binaries import CURL_BINARY
return {
'curl': CURL_BINARY,
}

View file

@ -0,0 +1,18 @@
__package__ = 'abx_plugin_curl'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env
from .config import CURL_CONFIG
class CurlBinary(Binary):
name: BinName = CURL_CONFIG.CURL_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
CURL_BINARY = CurlBinary()

View file

@ -0,0 +1,33 @@
__package__ = 'abx_plugin_curl'
from typing import List, Optional
from pathlib import Path
from pydantic import Field
from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
class CurlConfig(BaseConfigSet):
SAVE_TITLE: bool = Field(default=True)
SAVE_HEADERS: bool = Field(default=True)
USE_CURL: bool = Field(default=True)
CURL_BINARY: str = Field(default='curl')
CURL_ARGS: List[str] = [
'--silent',
'--location',
'--compressed',
]
CURL_EXTRA_ARGS: List[str] = []
CURL_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
CURL_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
CURL_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
CURL_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
CURL_CONFIG = CurlConfig()

View file

@ -0,0 +1,76 @@
__package__ = 'archivebox.extractors'
from pathlib import Path
from typing import Optional
from archivebox.misc.system import atomic_write
from archivebox.misc.util import (
enforce_types,
get_headers,
dedupe,
)
from archivebox.plugins_extractor.curl.config import CURL_CONFIG
from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput
from ..logging_util import TimedProgress
def get_output_path():
return 'headers.json'
@enforce_types
def should_save_headers(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
out_dir_path = Path(out_dir or link.link_dir)
assert out_dir_path
if not overwrite and (out_dir_path / get_output_path()).exists():
return False
return CURL_CONFIG.SAVE_HEADERS
@enforce_types
def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> ArchiveResult:
"""Download site headers"""
curl_binary = CURL_BINARY.load()
assert curl_binary.abspath and curl_binary.version
out_dir_path = Path(out_dir or link.link_dir)
output_folder = out_dir_path.absolute()
output: ArchiveOutput = get_output_path()
status = 'succeeded'
timer = TimedProgress(timeout + 1, prefix=' ')
# later options take precedence
options = [
*CURL_CONFIG.CURL_ARGS,
*CURL_CONFIG.CURL_EXTRA_ARGS,
'--head',
'--max-time', str(timeout),
*(['--user-agent', '{}'.format(CURL_CONFIG.CURL_USER_AGENT)] if CURL_CONFIG.CURL_USER_AGENT else []),
*([] if CURL_CONFIG.CURL_CHECK_SSL_VALIDITY else ['--insecure']),
]
cmd = [
str(curl_binary.abspath),
*dedupe(options),
link.url,
]
try:
json_headers = get_headers(link.url, timeout=timeout)
output_folder.mkdir(exist_ok=True)
atomic_write(str(output_folder / get_output_path()), json_headers)
except (Exception, OSError) as err:
status = 'failed'
output = err
finally:
timer.end()
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir_path),
cmd_version=str(curl_binary.version),
output=output,
status=status,
**timer.stats,
)

View file

@ -0,0 +1,18 @@
[project]
name = "abx-plugin-curl"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_curl = "abx_plugin_curl"