This commit is contained in:
Nick Sweeting 2024-10-25 01:06:12 -07:00
parent 4b6f08b0fe
commit 5d9a32c364
No known key found for this signature in database
178 changed files with 2982 additions and 1322 deletions

View file

@ -1,131 +0,0 @@
__package__ = 'abx'
import importlib
from pathlib import Path
from typing import Dict, Callable, List
from . import hookspec as base_spec
from abx.hookspec import hookimpl, hookspec # noqa
from abx.manager import pm, PluginManager # noqa
pm.add_hookspecs(base_spec)
###### PLUGIN DISCOVERY AND LOADING ########################################################
def get_plugin_order(plugin_entrypoint: Path):
order = 999
try:
# if .plugin_order file exists, use it to set the load priority
order = int((plugin_entrypoint.parent / '.plugin_order').read_text())
except FileNotFoundError:
pass
return (order, plugin_entrypoint)
def register_hookspecs(hookspecs: List[str]):
"""
Register all the hookspecs from a list of module names.
"""
for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path)
pm.add_hookspecs(hookspec_module)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
"""
Find all the plugins in a given directory. Just looks for an __init__.py file.
"""
return {
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
if plugin_entrypoint.parent.name != 'abx'
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
def get_pip_installed_plugins(group='abx'):
"""replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
import importlib.metadata
DETECTED_PLUGINS = {} # module_name: module_dir_path
for dist in list(importlib.metadata.distributions()):
for entrypoint in dist.entry_points:
if entrypoint.group != group or pm.is_blocked(entrypoint.name):
continue
DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
# pm.register(plugin, name=ep.name)
# pm._plugin_distinfo.append((plugin, DistFacade(dist)))
return DETECTED_PLUGINS
def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
"""
Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
"""
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in plugin_dirs.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
# Load all plugins from pip packages, archivebox built-ins, and user plugins
def load_plugins(plugins_dict: Dict[str, Path]):
"""
Load all the plugins from a dictionary of module names and directory paths.
"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}')
return LOADED_PLUGINS
def get_registered_plugins():
"""
Get all the plugins registered with Pluggy.
"""
plugins = {}
plugin_to_distinfo = dict(pm.list_plugin_distinfo())
for plugin in pm.get_plugins():
plugin_info = {
"name": plugin.__name__,
"hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
}
distinfo = plugin_to_distinfo.get(plugin)
if distinfo:
plugin_info["version"] = distinfo.version
plugin_info["name"] = (
getattr(distinfo, "name", None) or distinfo.project_name
)
plugins[plugin_info["name"]] = plugin_info
return plugins
def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
"""
Get all the functions marked with @hookimpl on a module.
"""
if not plugin_pkg:
return {}
hooks = {}
plugin_module = importlib.import_module(plugin_pkg)
for attr_name in dir(plugin_module):
if attr_name.startswith('_'):
continue
try:
attr = getattr(plugin_module, attr_name)
if isinstance(attr, Callable):
hooks[attr_name] = None
pm.parse_hookimpl_opts(plugin_module, attr_name)
hooks[attr_name] = attr
except Exception as e:
print(f'Error getting hookimpls for {plugin_pkg}: {e}')
return hooks

View file

@ -1,30 +0,0 @@
__package__ = 'abx.archivebox'
import os
import importlib
from typing import Dict
from pathlib import Path
def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
"""Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in reversed(plugins_dict.items()):
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
# 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
try:
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
except Exception as e:
print(f'Error registering plugin: {plugin_module} - {e}')
# 2. then try to import plugin_module.apps as well
if os.access(plugin_dir / 'apps.py', os.R_OK):
plugin_apps = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
# print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
return LOADED_PLUGINS

View file

@ -1,117 +0,0 @@
__package__ = "abx.archivebox"
import os
from typing import Optional, cast
from typing_extensions import Self
from pydantic import validate_call
from pydantic_pkgr import (
Binary,
BinProvider,
BinProviderName,
AptProvider,
BrewProvider,
EnvProvider,
)
from archivebox.config.permissions import ARCHIVEBOX_USER
import abx
class BaseBinProvider(BinProvider):
# TODO: add install/load/load_or_install methods as abx.hookimpl methods
@property
def admin_url(self) -> str:
# e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
return "/admin/environment/binaries/"
@abx.hookimpl
def get_BINPROVIDERS(self):
return [self]
class BaseBinary(Binary):
# TODO: formalize state diagram, final states, transitions, side effects, etc.
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
from archivebox.config.common import STORAGE_CONFIG
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
return
try:
bin_dir.mkdir(parents=True, exist_ok=True)
symlink = bin_dir / binary.name
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
symlink.chmod(0o777) # make sure its executable by everyone
except Exception as err:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
@validate_call
def load(self, fresh=False, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
if fresh:
binary = super().load(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
else:
# get cached binary from db
try:
from machine.models import InstalledBinary
installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore
binary = InstalledBinary.load_from_db(installed_binary)
except Exception:
# maybe we are not in a DATA dir so there is no db, fallback to reading from fs
# (e.g. when archivebox version is run outside of a DATA dir)
binary = super().load(**kwargs)
return cast(Self, binary)
@validate_call
def install(self, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
binary = super().install(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
return binary
@validate_call
def load_or_install(self, fresh=False, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
try:
binary = self.load(fresh=fresh)
if binary and binary.version:
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
return binary
except Exception:
pass
return self.install(**kwargs)
@property
def admin_url(self) -> str:
# e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/binaries/{self.name}/"
@abx.hookimpl
def get_BINARIES(self):
return [self]
class AptBinProvider(AptProvider, BaseBinProvider):
name: BinProviderName = "apt"
class BrewBinProvider(BrewProvider, BaseBinProvider):
name: BinProviderName = "brew"
class EnvBinProvider(EnvProvider, BaseBinProvider):
name: BinProviderName = "env"
euid: Optional[int] = ARCHIVEBOX_USER
apt = AptBinProvider()
brew = BrewBinProvider()
env = EnvBinProvider()

View file

@ -1,204 +0,0 @@
__package__ = 'abx.archivebox'
import json
import os
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
from pathlib import Path
from pydantic import AfterValidator
from pydantic_pkgr import BinName
from django.utils.functional import cached_property
from django.utils import timezone
import abx
from .base_binary import BaseBinary
def assert_no_empty_args(args: List[str]) -> List[str]:
assert all(len(arg) for arg in args)
return args
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
class BaseExtractor:
name: ExtractorName
binary: BinName
default_args: CmdArgsList = []
extra_args: CmdArgsList = []
def get_output_path(self, snapshot) -> Path:
return Path(self.__class__.__name__.lower())
def should_extract(self, uri: str, config: dict | None=None) -> bool:
try:
assert self.detect_installed_binary().version
except Exception:
raise
# could not load binary
return False
# output_dir = self.get_output_path(snapshot)
# if output_dir.glob('*.*'):
# return False
return True
@abx.hookimpl
def extract(self, snapshot_id: str) -> Dict[str, Any]:
from core.models import Snapshot
from archivebox import CONSTANTS
snapshot = Snapshot.objects.get(id=snapshot_id)
if not self.should_extract(snapshot.url):
return {}
status = 'failed'
start_ts = timezone.now()
uplink = self.detect_network_interface()
installed_binary = self.detect_installed_binary()
machine = installed_binary.machine
assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
output_dir.mkdir(parents=True, exist_ok=True)
# execute the extractor binary with the given args
args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
cmd = [str(installed_binary.abspath), *args]
proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
# collect the output
end_ts = timezone.now()
output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
stdout = proc.stdout.strip()
stderr = proc.stderr.strip()
output_json = None
output_text = stdout
try:
output_json = json.loads(stdout.strip())
output_text = None
except json.JSONDecodeError:
pass
errors = []
if proc.returncode == 0:
status = 'success'
else:
errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
# increment health stats counters
if status == 'success':
machine.record_health_success()
uplink.record_health_success()
installed_binary.record_health_success()
else:
machine.record_health_failure()
uplink.record_health_failure()
installed_binary.record_health_failure()
return {
'extractor': self.name,
'snapshot': {
'id': snapshot.id,
'abid': snapshot.abid,
'url': snapshot.url,
'created_by_id': snapshot.created_by_id,
},
'machine': {
'id': machine.id,
'abid': machine.abid,
'guid': machine.guid,
'hostname': machine.hostname,
'hw_in_docker': machine.hw_in_docker,
'hw_in_vm': machine.hw_in_vm,
'hw_manufacturer': machine.hw_manufacturer,
'hw_product': machine.hw_product,
'hw_uuid': machine.hw_uuid,
'os_arch': machine.os_arch,
'os_family': machine.os_family,
'os_platform': machine.os_platform,
'os_release': machine.os_release,
'os_kernel': machine.os_kernel,
},
'uplink': {
'id': uplink.id,
'abid': uplink.abid,
'mac_address': uplink.mac_address,
'ip_public': uplink.ip_public,
'ip_local': uplink.ip_local,
'dns_server': uplink.dns_server,
'hostname': uplink.hostname,
'iface': uplink.iface,
'isp': uplink.isp,
'city': uplink.city,
'region': uplink.region,
'country': uplink.country,
},
'binary': {
'id': installed_binary.id,
'abid': installed_binary.abid,
'name': installed_binary.name,
'binprovider': installed_binary.binprovider,
'abspath': installed_binary.abspath,
'version': installed_binary.version,
'sha256': installed_binary.sha256,
},
'cmd': cmd,
'stdout': stdout,
'stderr': stderr,
'returncode': proc.returncode,
'start_ts': start_ts,
'end_ts': end_ts,
'status': status,
'errors': errors,
'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
'output_files': output_files,
'output_json': output_json or {},
'output_text': output_text or '',
}
# TODO: move this to a hookimpl
def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
cwd = cwd or Path(os.getcwd())
binary = self.load_binary(installed_binary=installed_binary)
return binary.exec(cmd=args, cwd=cwd)
@cached_property
def BINARY(self) -> BaseBinary:
import abx.archivebox.reads
for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.binary:
return binary
raise ValueError(f'Binary {self.binary} not found')
def detect_installed_binary(self):
from machine.models import InstalledBinary
# hydrates binary from DB/cache if record of installed version is recent enough
# otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
def load_binary(self, installed_binary=None) -> BaseBinary:
installed_binary = installed_binary or self.detect_installed_binary()
return installed_binary.load_from_db()
def detect_network_interface(self):
from machine.models import NetworkInterface
return NetworkInterface.objects.current()
@abx.hookimpl
def get_EXTRACTORS(self):
return [self]

View file

@ -1,25 +0,0 @@
__package__ = 'abx.archivebox'
import abx
class BaseReplayer:
"""Describes how to render an ArchiveResult in several contexts"""
url_pattern: str = '*'
row_template: str = 'plugins/generic_replayer/templates/row.html'
embed_template: str = 'plugins/generic_replayer/templates/embed.html'
fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
# row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
# embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
# fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
@abx.hookimpl
def get_REPLAYERS(self):
return [self]
# TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...

View file

@ -1,52 +0,0 @@
__package__ = 'abx.archivebox'
from typing import Dict, Any
from .. import hookspec
from .base_binary import BaseBinary, BaseBinProvider
from .base_configset import BaseConfigSet
from .base_extractor import BaseExtractor
from .base_searchbackend import BaseSearchBackend
@hookspec
def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
return {}
@hookspec
def get_CONFIG() -> Dict[str, BaseConfigSet]:
return {}
@hookspec
def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
return {}
@hookspec
def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
return {}
# @hookspec
# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
# return {}
# @hookspec
# def get_ADMINDATAVIEWS():
# return {}
# @hookspec
# def get_QUEUES():
# return {}
##############################################################
# provided by abx.pydantic_pkgr.hookspec:
# @hookspec
# def get_BINARIES() -> Dict[str, BaseBinary]:
# return {}
# @hookspec
# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
# return {}

View file

@ -1,160 +0,0 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict
import abx
from .. import pm
if TYPE_CHECKING:
from .base_configset import BaseConfigSet
from .base_binary import BaseBinary, BaseBinProvider
from .base_extractor import BaseExtractor
from .base_searchbackend import BaseSearchBackend
# from .base_replayer import BaseReplayer
# from .base_queue import BaseQueue
# from .base_admindataview import BaseAdminDataView
# API exposed to ArchiveBox code
def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
return benedict({
plugin_id: plugin
for plugin_dict in pm.hook.get_PLUGIN()
for plugin_id, plugin in plugin_dict.items()
})
def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
plugin_info = get_PLUGINS().get(plugin_id, {})
package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
if not package:
return {'id': plugin_id, 'hooks': {}}
module = importlib.import_module(package)
hooks = abx.get_plugin_hooks(module.__package__)
assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
return benedict({
'id': plugin_id,
'label': getattr(module, '__label__', plugin_id),
'module': module,
'package': module.__package__,
'hooks': hooks,
'version': getattr(module, '__version__', '999.999.999'),
'author': getattr(module, '__author__', 'Unknown'),
'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
'dependencies': getattr(module, '__dependencies__', []),
'source_code': module.__file__,
**plugin_info,
})
def get_HOOKS() -> Set[str]:
return {
hook_name
for plugin_id in get_PLUGINS().keys()
for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
return benedict({
config_id: configset
for plugin_configs in pm.hook.get_CONFIG()
for config_id, configset in plugin_configs.items()
})
def get_FLAT_CONFIG() -> Dict[str, Any]:
return benedict({
key: value
for configset in get_CONFIGS().values()
for key, value in configset.model_dump().items()
})
def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
# TODO: move these to plugins
from abx.archivebox.base_binary import apt, brew, env
builtin_binproviders = {
'env': env,
'apt': apt,
'brew': brew,
}
return benedict({
binprovider_id: binprovider
for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
for binprovider_id, binprovider in plugin_binproviders.items()
})
def get_BINARIES() -> Dict[str, 'BaseBinary']:
return benedict({
binary_id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary_id, binary in plugin_binaries.items()
})
def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
return benedict({
extractor_id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor_id, extractor in plugin_extractors.items()
})
# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
# return benedict({
# replayer.id: replayer
# for plugin_replayers in pm.hook.get_REPLAYERS()
# for replayer in plugin_replayers
# })
# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
# return benedict({
# admin_dataview.id: admin_dataview
# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
# for admin_dataview in plugin_admin_dataviews
# })
# def get_QUEUES() -> Dict[str, 'BaseQueue']:
# return benedict({
# queue.id: queue
# for plugin_queues in pm.hook.get_QUEUES()
# for queue in plugin_queues
# })
def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
return benedict({
searchbackend_id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend_id,searchbackend in plugin_searchbackends.items()
})
def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
"""Get all the relevant config for the given scope, in correct precedence order"""
from django.conf import settings
default_config: benedict = defaults or settings.CONFIG
snapshot = snapshot or (archiveresult and archiveresult.snapshot)
crawl = crawl or (snapshot and snapshot.crawl)
seed = seed or (crawl and crawl.seed)
persona = persona or (crawl and crawl.persona)
persona_config = persona.config if persona else {}
seed_config = seed.config if seed else {}
crawl_config = crawl.config if crawl else {}
snapshot_config = snapshot.config if snapshot else {}
archiveresult_config = archiveresult.config if archiveresult else {}
extra_config = extra_config or {}
return {
**default_config, # defaults / config file / environment variables
**persona_config, # lowest precedence
**seed_config,
**crawl_config,
**snapshot_config,
**archiveresult_config,
**extra_config, # highest precedence
}

View file

@ -1 +0,0 @@
__package__ = 'abx.django'

View file

@ -1,101 +0,0 @@
__package__ = 'abx.django'
import itertools
# from benedict import benedict
from .. import pm
def get_INSTALLED_APPS():
return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
# def register_INSTALLLED_APPS(INSTALLED_APPS):
# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_MIDDLEWARES():
return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
# def register_MIDDLEWARES(MIDDLEWARE):
# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_AUTHENTICATION_BACKENDS():
return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_STATICFILES_DIRS():
return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_TEMPLATE_DIRS():
return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
# def register_DJANGO_HUEY(DJANGO_HUEY):
# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
# def register_settings(settings):
# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
# settings_as_obj = benedict(settings, keypath_separator=None)
# # set default values for settings that are used by plugins
# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# # # call all the hook functions to mutate the settings values in-place
# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# # calls Plugin.settings(settings) on each registered plugin
# pm.hook.register_settings(settings=settings_as_obj)
# # then finally update the settings globals() object will all the new settings
# # settings.update(settings_as_obj)
def get_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
def register_checks():
"""register any django system checks"""
pm.hook.register_checks()
def register_admin(admin_site):
"""register any django admin models/views with the main django admin site instance"""
pm.hook.register_admin(admin_site=admin_site)

View file

@ -1,22 +0,0 @@
from pathlib import Path
from pluggy import HookimplMarker
from pluggy import HookspecMarker
spec = hookspec = HookspecMarker("abx")
impl = hookimpl = HookimplMarker("abx")
@hookspec
@hookimpl
def get_system_user() -> str:
# Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases
# - sudo (EUD != UID != SUID)
# - running with an autodetected UID based on data dir ownership
# but mapping of UID:username is broken because it was created
# by a different host system, e.g. 911's $HOME outside of docker
# might be /usr/lib/lxd instead of /home/archivebox
# - running as a user that doens't have a home directory
# - home directory is set to a path that doesn't exist, or is inside a dir we cant read
return Path('~').expanduser().name

View file

@ -1,30 +0,0 @@
import inspect
import pluggy
class PluginManager(pluggy.PluginManager):
"""
Patch to fix pluggy's PluginManager to work with pydantic models.
See: https://github.com/pytest-dev/pluggy/pull/536
"""
def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
# IMPORTANT: @property methods can have side effects, and are never hookimpl
# if attr is a property, skip it in advance
plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
if isinstance(getattr(plugin_class, name, None), property):
return None
# pydantic model fields are like attrs and also can never be hookimpls
plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
# pydantic models mess with the class and attr __signature__
# so inspect.isroutine(...) throws exceptions and cant be used
return None
try:
return super().parse_hookimpl_opts(plugin, name)
except AttributeError:
return super().parse_hookimpl_opts(type(plugin), name)
pm = PluginManager("abx")

View file

@ -1 +0,0 @@
__package__ = 'abx.pydantic_pkgr'

View file

@ -1,13 +0,0 @@
from ..hookspec import hookspec
###########################################################################################
@hookspec
def get_BINPROVIDERS():
return {}
@hookspec
def get_BINARIES():
return {}

View file

@ -9,9 +9,6 @@ from pathlib import Path
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
import abx import abx
import abx.archivebox
import abx.archivebox.reads
import abx.django.use
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
@ -26,43 +23,22 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
################################################################################ ################################################################################
PLUGIN_HOOKSPECS = [ PLUGIN_HOOKSPECS = [
'abx.django.hookspec', 'abx_spec_django',
'abx.pydantic_pkgr.hookspec', 'abx_spec_pydantic_pkgr',
'abx.archivebox.hookspec', 'abx_spec_config',
'abx_spec_archivebox',
] ]
abx.register_hookspecs(PLUGIN_HOOKSPECS) abx.register_hookspecs(PLUGIN_HOOKSPECS)
BUILTIN_PLUGIN_DIRS = { SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
'archivebox': PACKAGE_DIR, USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins')
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
'plugins_search': PACKAGE_DIR / 'plugins_search',
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
# 'user_plugins': DATA_DIR / 'user_plugins',
}
# Discover ArchiveBox plugins ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS}
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load ArchiveBox plugins # Load ArchiveBox plugins
PLUGIN_MANAGER = abx.pm abx.load_plugins(ALL_PLUGINS)
abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
PLUGINS = abx.archivebox.reads.get_PLUGINS()
# Load ArchiveBox config from plugins # # Load ArchiveBox config from plugins
CONFIGS = abx.archivebox.reads.get_CONFIGS()
CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
BINARIES = abx.archivebox.reads.get_BINARIES()
EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
################################################################################ ################################################################################
@ -110,7 +86,7 @@ INSTALLED_APPS = [
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins # ArchiveBox plugins
*abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, *abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last # 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -135,7 +111,7 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware', 'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware', 'core.middleware.CacheControlMiddleware',
*abx.django.use.get_MIDDLEWARES(), *abx.as_list(abx.pm.hook.get_MIDDLEWARES()),
] ]
@ -148,7 +124,7 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend', 'django.contrib.auth.backends.ModelBackend',
*abx.django.use.get_AUTHENTICATION_BACKENDS(), *abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()),
] ]
@ -169,7 +145,7 @@ AUTHENTICATION_BACKENDS = [
STATIC_URL = '/static/' STATIC_URL = '/static/'
TEMPLATES_DIR_NAME = 'templates' TEMPLATES_DIR_NAME = 'templates'
CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK)
STATICFILES_DIRS = [ STATICFILES_DIRS = [
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []), *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []),
# *[ # *[
@ -177,7 +153,7 @@ STATICFILES_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir() # if (plugin_dir / 'static').is_dir()
# ], # ],
*abx.django.use.get_STATICFILES_DIRS(), *abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
] ]
@ -188,7 +164,7 @@ TEMPLATE_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir() # if (plugin_dir / 'templates').is_dir()
# ], # ],
*abx.django.use.get_TEMPLATE_DIRS(), *abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME), str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -292,7 +268,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file
"queues": { "queues": {
HUEY["name"]: HUEY.copy(), HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register() # more registered here at plugin import-time by BaseQueue.register()
**abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME), **abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)),
}, },
} }
@ -517,7 +493,7 @@ ADMIN_DATA_VIEWS = {
"name": "log", "name": "log",
}, },
}, },
*abx.django.use.get_ADMIN_DATA_VIEWS_URLS(), *abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()),
], ],
} }
@ -611,7 +587,4 @@ if DEBUG_REQUESTS_TRACKER:
# JET_TOKEN = 'some-api-token-here' # JET_TOKEN = 'some-api-token-here'
abx.django.use.register_checks()
# abx.archivebox.reads.register_all_hooks(globals())
# import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()

View file

@ -1,42 +0,0 @@
__package__ = 'plugins_pkg.npm'
from pathlib import Path
from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
from archivebox.config import DATA_DIR, CONSTANTS
from abx.archivebox.base_binary import BaseBinProvider
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "sys_npm"
npm_prefix: Optional[Path] = None
class LibNpmBinProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
def setup(self) -> None:
# update paths from config if they arent the default
from archivebox.config.common import STORAGE_CONFIG
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
super().setup()
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
npm = LIB_NPM_BINPROVIDER

View file

@ -8,8 +8,8 @@ VENDORED_LIBS = {
# sys.path dir: library name # sys.path dir: library name
#'python-atomicwrites': 'atomicwrites', #'python-atomicwrites': 'atomicwrites',
#'django-taggit': 'taggit', #'django-taggit': 'taggit',
'pydantic-pkgr': 'pydantic_pkgr', # 'pydantic-pkgr': 'pydantic_pkgr',
'pocket': 'pocket', # 'pocket': 'pocket',
#'base32-crockford': 'base32_crockford', #'base32-crockford': 'base32_crockford',
} }

@ -1 +0,0 @@
Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5

@ -1 +0,0 @@
Subproject commit a774f24644ee14f14fa2cc3d8e6e0a585ae00fdd

32
click_test.py Normal file
View file

@ -0,0 +1,32 @@
import sys
import click
from rich import print
from archivebox.config.django import setup_django
setup_django()
import abx.archivebox.writes
def parse_stdin_to_args(io=sys.stdin):
for line in io.read().split('\n'):
for url_or_id in line.split(' '):
if url_or_id.strip():
yield url_or_id.strip()
# Gather data from stdin in case using a pipe
if not sys.stdin.isatty():
sys.argv += parse_stdin_to_args(sys.stdin)
@click.command()
@click.argument("snapshot_ids_or_urls", type=str, nargs=-1)
def extract(snapshot_ids_or_urls):
for url_or_snapshot_id in snapshot_ids_or_urls:
print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr)
for result in abx.archivebox.writes.extract(url_or_snapshot_id):
print(result)
if __name__ == "__main__":
extract()

View file

@ -0,0 +1,7 @@
[project]
name = "abx-archivedotorg-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -13,15 +13,15 @@ from pydantic_pkgr import (
bin_abspath, bin_abspath,
) )
import abx.archivebox.reads
from abx.archivebox.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_binary import BaseBinary, env, apt, brew
# Depends on Other Plugins: from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER
from archivebox.config.common import SHELL_CONFIG from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER
from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
from .config import CHROME_CONFIG from .config import CHROME_CONFIG
CHROMIUM_BINARY_NAMES_LINUX = [ CHROMIUM_BINARY_NAMES_LINUX = [
"chromium", "chromium",
"chromium-browser", "chromium-browser",
@ -48,12 +48,13 @@ CHROME_BINARY_NAMES_MACOS = [
] ]
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
APT_DEPENDENCIES = [ CHROME_APT_DEPENDENCIES = [
'apt-transport-https', 'at-spi2-common', 'chromium-browser', 'apt-transport-https', 'at-spi2-common',
'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei', 'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2', 'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1', 'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings', 'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
'chromium-browser',
] ]
@ -95,7 +96,7 @@ class ChromeBinary(BaseBinary):
'packages': ['chromium'], # playwright install chromium 'packages': ['chromium'], # playwright install chromium
}, },
apt.name: { apt.name: {
'packages': APT_DEPENDENCIES, 'packages': CHROME_APT_DEPENDENCIES,
}, },
brew.name: { brew.name: {
'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [], 'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [],
@ -104,10 +105,9 @@ class ChromeBinary(BaseBinary):
@staticmethod @staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None: def symlink_to_lib(binary, bin_dir=None) -> None:
from archivebox.config.common import STORAGE_CONFIG bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin'
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
if not (binary.abspath and os.access(binary.abspath, os.F_OK)): if not (binary.abspath and os.path.isfile(binary.abspath)):
return return
bin_dir.mkdir(parents=True, exist_ok=True) bin_dir.mkdir(parents=True, exist_ok=True)
@ -121,7 +121,7 @@ class ChromeBinary(BaseBinary):
# otherwise on linux we can symlink directly to binary executable # otherwise on linux we can symlink directly to binary executable
symlink.unlink(missing_ok=True) symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath) symlink.symlink_to(binary.abspath)
except Exception as err: except Exception:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it # not actually needed, we can just run without it
pass pass
@ -132,14 +132,17 @@ class ChromeBinary(BaseBinary):
Cleans up any state or runtime files that chrome leaves behind when killed by Cleans up any state or runtime files that chrome leaves behind when killed by
a timeout or other error a timeout or other error
""" """
lock_file = Path("~/.config/chromium/SingletonLock").expanduser() try:
linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK): linux_lock_file.unlink(missing_ok=True)
lock_file.unlink() except Exception:
pass
if CHROME_CONFIG.CHROME_USER_DATA_DIR: if CHROME_CONFIG.CHROME_USER_DATA_DIR:
if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK): try:
lock_file.unlink() (CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True)
except Exception:
pass

View file

@ -0,0 +1,7 @@
[project]
name = "abx-chrome-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-curl-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,24 @@
import abx
from typing import Dict
from pydantic_pkgr import (
AptProvider,
BrewProvider,
EnvProvider,
BinProvider,
)
apt = APT_BINPROVIDER = AptProvider()
brew = BREW_BINPROVIDER = BrewProvider()
env = ENV_BINPROVIDER = EnvProvider()
@abx.hookimpl(tryfirst=True)
def get_BINPROVIDERS() -> Dict[str, BinProvider]:
return {
'apt': APT_BINPROVIDER,
'brew': BREW_BINPROVIDER,
'env': ENV_BINPROVIDER,
}

View file

@ -0,0 +1,18 @@
[project]
name = "abx-plugin-default-binproviders"
version = "2024.10.24"
description = "Default BinProviders for ABX (apt, brew, env)"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_default_binproviders = "abx_plugin_default_binproviders"

View file

@ -0,0 +1,7 @@
[project]
name = "abx-favicon-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-git-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-htmltotext-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

View file

@ -0,0 +1,22 @@
[project]
name = "abx-ldap-auth"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []
[project.entry-points.abx]
ldap = "abx_ldap_auth"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.sdist]
packages = ["."]
[tool.hatch.build.targets.wheel]
packages = ["."]

View file

@ -0,0 +1,7 @@
[project]
name = "abx-mercury-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -1,26 +1,12 @@
__package__ = 'plugins_pkg.npm' __package__ = 'abx_plugin_npm_binprovider'
__version__ = '2024.10.14'
__id__ = 'npm' __id__ = 'npm'
__label__ = 'npm' __label__ = 'NPM'
__author__ = 'ArchiveBox' __author__ = 'ArchiveBox'
__homepage__ = 'https://www.npmjs.com/' __homepage__ = 'https://www.npmjs.com/'
import abx import abx
@abx.hookimpl
def get_PLUGIN():
return {
__id__: {
'id': __id__,
'package': __package__,
'label': __label__,
'version': __version__,
'author': __author__,
'homepage': __homepage__,
}
}
@abx.hookimpl @abx.hookimpl
def get_CONFIG(): def get_CONFIG():
from .config import NPM_CONFIG from .config import NPM_CONFIG

View file

@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from benedict import benedict
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
from abx_plugin_default_binproviders import get_BINPROVIDERS
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
env = DEFAULT_BINPROVIDERS.env
apt = DEFAULT_BINPROVIDERS.apt
brew = DEFAULT_BINPROVIDERS.brew
from abx.archivebox.base_binary import BaseBinary, env, apt, brew class NodeBinary(Binary):
class NodeBinary(BaseBinary):
name: BinName = 'node' name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@ -23,7 +28,7 @@ class NodeBinary(BaseBinary):
NODE_BINARY = NodeBinary() NODE_BINARY = NodeBinary()
class NpmBinary(BaseBinary): class NpmBinary(Binary):
name: BinName = 'npm' name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@ -35,7 +40,7 @@ class NpmBinary(BaseBinary):
NPM_BINARY = NpmBinary() NPM_BINARY = NpmBinary()
class NpxBinary(BaseBinary): class NpxBinary(Binary):
name: BinName = 'npx' name: BinName = 'npx'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]

View file

@ -0,0 +1,39 @@
import os
from pathlib import Path
from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
import abx
DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm')
OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin'
class SystemNpmBinProvider(NpmProvider):
name: BinProviderName = "sys_npm"
npm_prefix: Optional[Path] = None
class LibNpmBinProvider(NpmProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR
def setup(self) -> None:
# update paths from config at runtime
LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR
self.npm_prefix = LIB_DIR / 'npm'
self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
super().setup()
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
npm = LIB_NPM_BINPROVIDER

View file

@ -1,7 +1,4 @@
__package__ = 'plugins_pkg.npm' from abx_spec_config import BaseConfigSet
from abx.archivebox.base_configset import BaseConfigSet
###################### Config ########################## ###################### Config ##########################

View file

@ -0,0 +1,20 @@
[project]
name = "abx-plugin-npm-binprovider"
version = "2024.10.24"
description = "NPM binary provider plugin for ABX"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider"

View file

@ -1,33 +1,19 @@
__package__ = 'plugins_pkg.pip' __package__ = 'abx_plugin_pip_binprovider'
__label__ = 'pip' __id__ = 'pip'
__version__ = '2024.10.14' __label__ = 'PIP'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/pypa/pip'
import abx import abx
@abx.hookimpl
def get_PLUGIN():
return {
'pip': {
'PACKAGE': __package__,
'LABEL': __label__,
'VERSION': __version__,
'AUTHOR': __author__,
'HOMEPAGE': __homepage__,
}
}
@abx.hookimpl @abx.hookimpl
def get_CONFIG(): def get_CONFIG():
from .config import PIP_CONFIG from .config import PIP_CONFIG
return { return {
'pip': PIP_CONFIG __id__: PIP_CONFIG
} }
@abx.hookimpl @abx.hookimpl(tryfirst=True)
def get_BINARIES(): def get_BINARIES():
from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_pkg.pip' __package__ = 'abx_plugin_pip_binprovider'
import sys import sys
from pathlib import Path from pathlib import Path
@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator
import django import django
import django.db.backends.sqlite3.base import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
from archivebox import VERSION
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew
from archivebox.misc.logging import hint
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
###################### Config ########################## ###################### Config ##########################
def get_archivebox_version():
try:
from archivebox import VERSION
return VERSION
except Exception:
return None
class ArchiveboxBinary(BaseBinary): class ArchiveboxBinary(Binary):
name: BinName = 'archivebox' name: BinName = 'archivebox'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
overrides: BinaryOverrides = { overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
apt.name: {'packages': [], 'version': VERSION}, apt.name: {'packages': [], 'version': get_archivebox_version},
brew.name: {'packages': [], 'version': VERSION}, brew.name: {'packages': [], 'version': get_archivebox_version},
} }
# @validate_call # @validate_call
@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary):
ARCHIVEBOX_BINARY = ArchiveboxBinary() ARCHIVEBOX_BINARY = ArchiveboxBinary()
class PythonBinary(BaseBinary): class PythonBinary(Binary):
name: BinName = 'python' name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__)
LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version) LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version)
LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class SqliteBinary(BaseBinary): class SqliteBinary(Binary):
name: BinName = 'sqlite' name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = { overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: { VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None, "abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary):
cursor.execute('SELECT JSON(\'{"a": "b"}\')') cursor.execute('SELECT JSON(\'{"a": "b"}\')')
except django_sqlite3.OperationalError as exc: except django_sqlite3.OperationalError as exc:
print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]') print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]')
hint([ print(
'Upgrade your Python version or install the extension manually:', '[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' +
'https://code.djangoproject.com/wiki/JSON1Extension' ' https://code.djangoproject.com/wiki/JSON1Extension\n'
]) )
return self return self
# @validate_call # @validate_call
@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__)
LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3]) LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3])
LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class DjangoBinary(BaseBinary): class DjangoBinary(Binary):
name: BinName = 'django' name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = { overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: { VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None, "abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary):
DJANGO_BINARY = DjangoBinary() DJANGO_BINARY = DjangoBinary()
class PipBinary(BaseBinary): class PipBinary(Binary):
name: BinName = "pip" name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@ -154,7 +155,7 @@ class PipBinary(BaseBinary):
PIP_BINARY = PipBinary() PIP_BINARY = PipBinary()
class PipxBinary(BaseBinary): class PipxBinary(Binary):
name: BinName = "pipx" name: BinName = "pipx"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]

View file

@ -1,21 +1,26 @@
__package__ = 'plugins_pkg.pip'
import os import os
import sys import sys
import site import site
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from benedict import benedict
from pydantic_pkgr import PipProvider, BinName, BinProviderName from pydantic_pkgr import PipProvider, BinName, BinProviderName
from archivebox.config import CONSTANTS import abx
from abx.archivebox.base_binary import BaseBinProvider from abx_plugin_default_binproviders import get_BINPROVIDERS
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
env = DEFAULT_BINPROVIDERS.env
apt = DEFAULT_BINPROVIDERS.apt
brew = DEFAULT_BINPROVIDERS.brew
###################### Config ########################## ###################### Config ##########################
class SystemPipBinProvider(PipProvider, BaseBinProvider): class SystemPipBinProvider(PipProvider):
name: BinProviderName = "sys_pip" name: BinProviderName = "sys_pip"
INSTALLER_BIN: BinName = "pip" INSTALLER_BIN: BinName = "pip"
@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider):
# never modify system pip packages # never modify system pip packages
return 'refusing to install packages globally with system pip, use a venv instead' return 'refusing to install packages globally with system pip, use a venv instead'
class SystemPipxBinProvider(PipProvider, BaseBinProvider): class SystemPipxBinProvider(PipProvider):
name: BinProviderName = "pipx" name: BinProviderName = "pipx"
INSTALLER_BIN: BinName = "pipx" INSTALLER_BIN: BinName = "pipx"
@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider):
IS_INSIDE_VENV = sys.prefix != sys.base_prefix IS_INSIDE_VENV = sys.prefix != sys.base_prefix
class VenvPipBinProvider(PipProvider, BaseBinProvider): class VenvPipBinProvider(PipProvider):
name: BinProviderName = "venv_pip" name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip" INSTALLER_BIN: BinName = "pip"
@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider):
return None return None
class LibPipBinProvider(PipProvider, BaseBinProvider): class LibPipBinProvider(PipProvider):
name: BinProviderName = "lib_pip" name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip" INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv' pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv')
def setup(self) -> None: def setup(self) -> None:
# update paths from config if they arent the default # update venv path to match most up-to-date LIB_DIR based on runtime config
from archivebox.config.common import STORAGE_CONFIG LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: self.pip_venv = LIB_DIR / 'pip' / 'venv'
self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
super().setup() super().setup()
SYS_PIP_BINPROVIDER = SystemPipBinProvider() SYS_PIP_BINPROVIDER = SystemPipBinProvider()

View file

@ -0,0 +1,22 @@
[project]
name = "abx-plugin-pip-binprovider"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
"django>=5.0.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider"

View file

@ -1,30 +1,18 @@
__package__ = 'plugins_pkg.playwright' __package__ = 'abx_plugin_playwright_binprovider'
__label__ = 'playwright' __id__ = 'playwright'
__version__ = '2024.10.14' __label__ = 'Playwright'
__author__ = 'ArchiveBox' __author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/microsoft/playwright-python' __homepage__ = 'https://github.com/microsoft/playwright-python'
import abx import abx
@abx.hookimpl
def get_PLUGIN():
return {
'playwright': {
'PACKAGE': __package__,
'LABEL': __label__,
'VERSION': __version__,
'AUTHOR': __author__,
'HOMEPAGE': __homepage__,
}
}
@abx.hookimpl @abx.hookimpl
def get_CONFIG(): def get_CONFIG():
from .config import PLAYWRIGHT_CONFIG from .config import PLAYWRIGHT_CONFIG
return { return {
'playwright': PLAYWRIGHT_CONFIG __id__: PLAYWRIGHT_CONFIG
} }
@abx.hookimpl @abx.hookimpl

View file

@ -1,20 +1,18 @@
__package__ = 'plugins_pkg.playwright' __package__ = 'abx_plugin_playwright_binprovider'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinName, BinProvider from pydantic_pkgr import BinName, BinProvider, Binary
from abx.archivebox.base_binary import BaseBinary, env
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from abx_plugin_default_binproviders import env
from .config import PLAYWRIGHT_CONFIG from .config import PLAYWRIGHT_CONFIG
class PlaywrightBinary(Binary):
class PlaywrightBinary(BaseBinary):
name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env] binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]

View file

@ -1,6 +1,7 @@
__package__ = 'plugins_pkg.playwright' __package__ = 'abx_plugin_playwright_binprovider'
import os import os
import shutil
import platform import platform
from pathlib import Path from pathlib import Path
from typing import List, Optional, Dict, ClassVar from typing import List, Optional, Dict, ClassVar
@ -8,6 +9,7 @@ from typing import List, Optional, Dict, ClassVar
from pydantic import computed_field, Field from pydantic import computed_field, Field
from pydantic_pkgr import ( from pydantic_pkgr import (
BinName, BinName,
BinProvider,
BinProviderName, BinProviderName,
BinProviderOverrides, BinProviderOverrides,
InstallArgs, InstallArgs,
@ -18,11 +20,8 @@ from pydantic_pkgr import (
DEFAULT_ENV_PATH, DEFAULT_ENV_PATH,
) )
from archivebox.config import CONSTANTS import abx
from abx.archivebox.base_binary import BaseBinProvider, env
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER
from .binaries import PLAYWRIGHT_BINARY from .binaries import PLAYWRIGHT_BINARY
@ -31,11 +30,11 @@ MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright")
LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright") LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright")
class PlaywrightBinProvider(BaseBinProvider): class PlaywrightBinProvider(BinProvider):
name: BinProviderName = "playwright" name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}"
playwright_browsers_dir: Path = ( playwright_browsers_dir: Path = (
MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
@ -59,12 +58,12 @@ class PlaywrightBinProvider(BaseBinProvider):
return None return None
def setup(self) -> None: def setup(self) -> None:
# update paths from config if they arent the default # update paths from config at runtime
from archivebox.config.common import STORAGE_CONFIG LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized" self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
assert shutil.which('pip'), "Pip bin provider not initialized"
if self.playwright_browsers_dir: if self.playwright_browsers_dir:
self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True) self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)

View file

@ -1,7 +1,4 @@
__package__ = 'playwright' from abx_spec_config import BaseConfigSet
from abx.archivebox.base_configset import BaseConfigSet
class PlaywrightConfigs(BaseConfigSet): class PlaywrightConfigs(BaseConfigSet):
PLAYWRIGHT_BINARY: str = 'playwright' PLAYWRIGHT_BINARY: str = 'playwright'

View file

@ -0,0 +1,20 @@
[project]
name = "abx-plugin-playwright-binprovider"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic>=2.4.2",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-config>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider"

View file

@ -0,0 +1,7 @@
[project]
name = "abx-pocket-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -42,7 +42,8 @@ class PuppeteerBinProvider(BaseBinProvider):
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
def setup(self) -> None: def setup(self) -> None:
# update paths from config # update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time
# we want to avoid depending on archivebox from abx code if at all possible
from archivebox.config.common import STORAGE_CONFIG from archivebox.config.common import STORAGE_CONFIG
self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers' self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin') self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')

View file

@ -0,0 +1,7 @@
[project]
name = "abx-puppeteer-binprovider"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-readability-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-readwise-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

Some files were not shown because too many files have changed in this diff Show more