split archivebox.use into archivebox.reads and archivebox.writes

This commit is contained in:
Nick Sweeting 2024-10-15 01:03:01 -07:00
parent aaf069fab0
commit 80d8a6b667
No known key found for this signature in database
8 changed files with 138 additions and 131 deletions

View file

@ -195,8 +195,8 @@ class BaseExtractor:
@cached_property @cached_property
def BINARY(self) -> BaseBinary: def BINARY(self) -> BaseBinary:
import abx.archivebox.use import abx.archivebox.reads
for binary in abx.archivebox.use.get_BINARIES().values(): for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.binary: if binary.name == self.binary:
return binary return binary
raise ValueError(f'Binary {self.binary} not found') raise ValueError(f'Binary {self.binary} not found')

View file

@ -1,10 +1,11 @@
__package__ = 'abx.archivebox' __package__ = 'abx.archivebox'
import importlib import importlib
from typing import Dict, Any, TYPE_CHECKING from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict from benedict import benedict
import abx
from .. import pm from .. import pm
if TYPE_CHECKING: if TYPE_CHECKING:
@ -25,51 +26,36 @@ def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
for plugin_id, plugin in plugin_dict.items() for plugin_id, plugin in plugin_dict.items()
}) })
def get_PLUGIN(plugin_id: str): def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
plugin_info = get_PLUGINS().get(plugin_id, {}) plugin_info = get_PLUGINS().get(plugin_id, {})
assert plugin_info and getattr(plugin_info, 'PACKAGE', None), f'Plugin {plugin_id} not found' package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
if not package:
return {'id': plugin_id, 'hooks': {}}
module = importlib.import_module(package)
hooks = abx.get_plugin_hooks(module.__package__)
assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
module = importlib.import_module(plugin_info['PACKAGE']) return benedict({
extra_info ={
'ID': plugin_id,
'id': plugin_id, 'id': plugin_id,
'label': getattr(module, '__label__', plugin_id),
'module': module,
'package': module.__package__,
'hooks': hooks,
'version': getattr(module, '__version__', '999.999.999'),
'author': getattr(module, '__author__', 'Unknown'),
'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
'dependencies': getattr(module, '__dependencies__', []),
'source_code': module.__file__,
**plugin_info, **plugin_info,
'SOURCE_PATH': module.__file__, })
'MODULE': module,
'CONFIG': {},
'BINARIES': {},
'BINPROVIDERS': {},
'EXTRACTORS': {},
'SEARCHBACKENDS': {},
}
try:
extra_info['CONFIG'] = module.get_CONFIG()[plugin_id]
except AttributeError:
pass
try:
extra_info['BINARIES'] = module.get_BINARIES()
except AttributeError:
pass
try:
extra_info['BINPROVIDERS'] = module.get_BINPROVIDERS()
except AttributeError:
pass
try:
extra_info['EXTRACTORS'] = module.get_EXTRACTORS()
except AttributeError:
pass
try:
extra_info['SEARCHBACKENDS'] = module.get_SEARCHBACKENDS()
except AttributeError:
pass
return benedict(extra_info)
# def get_HOOKS(PLUGINS) -> Dict[str, 'BaseHook']:
# return benedict({ def get_HOOKS() -> Set[str]:
# hook.id: hook return {
# for plugin in PLUGINS.values() hook_name
# for hook in plugin.hooks for plugin_id in get_PLUGINS().keys()
# }) for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> Dict[str, 'BaseConfigSet']: def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
return benedict({ return benedict({
@ -78,6 +64,7 @@ def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
for config_id, configset in plugin_configs.items() for config_id, configset in plugin_configs.items()
}) })
def get_FLAT_CONFIG() -> Dict[str, Any]: def get_FLAT_CONFIG() -> Dict[str, Any]:
return benedict({ return benedict({
key: value key: value
@ -141,28 +128,3 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend_id,searchbackend in plugin_searchbackends.items() for searchbackend_id,searchbackend in plugin_searchbackends.items()
}) })
###########################
# def extract(url_or_snapshot_id):
# from core.models import Snapshot
# url, snapshot_abid, snapshot_id = None, None, None
# snapshot = None
# if '://' in url_or_snapshot_id:
# url = url_or_snapshot_id
# try:
# snapshot = Snapshot.objects.get(url=url)
# except Snapshot.DoesNotExist:
# snapshot = Snapshot(url=url_or_snapshot_id, timestamp=str(timezone.now().timestamp()), bookmarked_at=timezone.now())
# snapshot.save()
# elif '-' in url_or_snapshot_id:
# snapshot_id = url_or_snapshot_id
# snapshot = Snapshot.objects.get(id=snapshot_id)
# else:
# snapshot_abid = url_or_snapshot_id
# snapshot = Snapshot.objects.get(abid=snapshot_abid)
# return pm.hook.extract(snapshot_id=snapshot.id)

View file

@ -0,0 +1,10 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict
import abx
from .. import pm

View file

@ -14,7 +14,7 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import abx.archivebox.use import abx.archivebox.reads
from archivebox.config import CONSTANTS from archivebox.config import CONSTANTS
from archivebox.misc.util import parse_date from archivebox.misc.util import parse_date
@ -85,10 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
if '_BINARY' in key or '_VERSION' in key if '_BINARY' in key or '_VERSION' in key
} }
for plugin_id in abx.archivebox.use.get_PLUGINS().keys(): for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
plugin = abx.archivebox.use.get_PLUGIN(plugin_id) plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
if not plugin.hooks.get('get_BINARIES'):
continue
for binary in plugin.BINARIES.values(): for binary in plugin.hooks.get_BINARIES().values():
try: try:
installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary) installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
binary = installed_binary.load_from_db() binary = installed_binary.load_from_db()
@ -97,7 +99,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
rows['Binary Name'].append(ItemLink(binary.name, key=binary.name)) rows['Binary Name'].append(ItemLink(binary.name, key=binary.name))
rows['Found Version'].append(f'{binary.loaded_version}' if binary.loaded_version else '❌ missing') rows['Found Version'].append(f'{binary.loaded_version}' if binary.loaded_version else '❌ missing')
rows['From Plugin'].append(plugin.PACKAGE) rows['From Plugin'].append(plugin.package)
rows['Provided By'].append( rows['Provided By'].append(
', '.join( ', '.join(
f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name
@ -133,12 +135,16 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
binary = None binary = None
plugin = None plugin = None
for plugin_id in abx.archivebox.use.get_PLUGINS().keys(): for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
loaded_plugin = abx.archivebox.use.get_PLUGIN(plugin_id) loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
for loaded_binary in loaded_plugin.BINARIES.values(): try:
for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
if loaded_binary.name == key: if loaded_binary.name == key:
binary = loaded_binary binary = loaded_binary
plugin = loaded_plugin plugin = loaded_plugin
# break # last write wins
except Exception as e:
print(e)
assert plugin and binary, f'Could not find a binary matching the specified name: {key}' assert plugin and binary, f'Could not find a binary matching the specified name: {key}'
@ -155,7 +161,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
"name": binary.name, "name": binary.name,
"description": binary.abspath, "description": binary.abspath,
"fields": { "fields": {
'plugin': plugin.PACKAGE, 'plugin': plugin.package,
'binprovider': binary.loaded_binprovider, 'binprovider': binary.loaded_binprovider,
'abspath': binary.loaded_abspath, 'abspath': binary.loaded_abspath,
'version': binary.loaded_version, 'version': binary.loaded_version,
@ -187,27 +193,52 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Search Backends": [], # "Search Backends": [],
} }
config_colors = {
'_BINARY': '#339',
'USE_': 'green',
'SAVE_': 'green',
'_ARGS': '#33e',
'KEY': 'red',
'COOKIES': 'red',
'AUTH': 'red',
'SECRET': 'red',
'TOKEN': 'red',
'PASSWORD': 'red',
'TIMEOUT': '#533',
'RETRIES': '#533',
'MAX': '#533',
'MIN': '#533',
}
def get_color(key):
for pattern, color in config_colors.items():
if pattern in key:
return color
return 'black'
for plugin_id in settings.PLUGINS.keys(): for plugin_id in settings.PLUGINS.keys():
plugin = abx.archivebox.use.get_PLUGIN(plugin_id) plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
rows['Label'].append(mark_safe(f'<a href="{plugin.HOMEPAGE}" target="_blank">{plugin.LABEL}</a>')) rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
rows['Version'].append(str(plugin.VERSION)) rows['Version'].append(str(plugin.version))
rows['Author'].append(str(plugin.AUTHOR)) rows['Author'].append(mark_safe(f'<a href="{plugin.homepage}" target="_blank">{plugin.author}</a>'))
rows['Package'].append(ItemLink(plugin.PACKAGE, key=plugin.PACKAGE)) rows['Package'].append(ItemLink(plugin.package, key=plugin.package))
rows['Source Code'].append(format_html('<code>{}</code>', str(plugin.SOURCE_PATH).replace(str(Path('~').expanduser()), '~'))) rows['Source Code'].append(format_html('<code>{}</code>', str(plugin.source_code).replace(str(Path('~').expanduser()), '~')))
rows['Config'].append(mark_safe(''.join( rows['Config'].append(mark_safe(''.join(
f'<a href="/admin/environment/config/{key}/"><b><code>{key}</code></b>=<code>{value}</code></a><br/>' f'<a href="/admin/environment/config/{key}/"><b><code style="color: {get_color(key)};">{key}</code></b>=<code>{value}</code></a><br/>'
for key, value in plugin.CONFIG.model_dump().items() for configdict in plugin.hooks.get_CONFIG().values()
for key, value in benedict(configdict).items()
))) )))
rows['Binaries'].append(mark_safe(', '.join( rows['Binaries'].append(mark_safe(', '.join(
f'<a href="/admin/environment/binaries/{binary.name}/"><code>{binary.name}</code></a>' f'<a href="/admin/environment/binaries/{binary.name}/"><code>{binary.name}</code></a>'
for binary in plugin.BINARIES.values() for binary in plugin.hooks.get_BINARIES().values()
))) )))
rows['Package Managers'].append(mark_safe(', '.join( rows['Package Managers'].append(mark_safe(', '.join(
f'<a href="/admin/environment/binproviders/{binprovider.name}/"><code>{binprovider.name}</code></a>' f'<a href="/admin/environment/binproviders/{binprovider.name}/"><code>{binprovider.name}</code></a>'
for binprovider in plugin.BINPROVIDERS.values() for binprovider in plugin.hooks.get_BINPROVIDERS().values()
))) )))
# rows['Search Backends'].append(mark_safe(', '.join( # rows['Search Backends'].append(mark_safe(', '.join(
# f'<a href="/admin/environment/searchbackends/{searchbackend.name}/"><code>{searchbackend.name}</code></a>' # f'<a href="/admin/environment/searchbackends/{searchbackend.name}/"><code>{searchbackend.name}</code></a>'
@ -224,30 +255,33 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
plugin = None plugin_id = None
for plugin_id, loaded_plugin in settings.PLUGINS.items0(): for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
if loaded_plugin.PACKAGE == key or plugin_id == key: if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
plugin = loaded_plugin plugin_id = check_plugin_id
break
assert plugin, f'Could not find a plugin matching the specified name: {key}' assert plugin_id, f'Could not find a plugin matching the specified name: {key}'
try: plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
plugin = plugin.load_binaries()
except Exception as e:
print(e)
return ItemContext( return ItemContext(
slug=key, slug=key,
title=key, title=key,
data=[ data=[
{ {
"name": plugin.PACKAGE, "name": plugin.package,
"description": plugin.LABEL, "description": plugin.label,
"fields": { "fields": {
"version": plugin.VERSION, "id": plugin.id,
"author": plugin.AUTHOR, "package": plugin.package,
"homepage": plugin.HOMEPAGE, "label": plugin.label,
"version": plugin.version,
"author": plugin.author,
"homepage": plugin.homepage,
"dependencies": getattr(plugin, 'DEPENDENCIES', []), "dependencies": getattr(plugin, 'DEPENDENCIES', []),
"source_code": plugin.source_code,
"hooks": plugin.hooks,
}, },
"help_texts": { "help_texts": {
# TODO # TODO

View file

@ -10,7 +10,7 @@ from django.utils.crypto import get_random_string
import abx import abx
import abx.archivebox import abx.archivebox
import abx.archivebox.use import abx.archivebox.reads
import abx.django.use import abx.django.use
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
@ -53,17 +53,17 @@ ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load ArchiveBox plugins # Load ArchiveBox plugins
PLUGIN_MANAGER = abx.pm PLUGIN_MANAGER = abx.pm
abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS) abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
PLUGINS = abx.archivebox.use.get_PLUGINS() PLUGINS = abx.archivebox.reads.get_PLUGINS()
# Load ArchiveBox config from plugins # Load ArchiveBox config from plugins
CONFIGS = abx.archivebox.use.get_CONFIGS() CONFIGS = abx.archivebox.reads.get_CONFIGS()
CONFIG = FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG() CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS() BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
BINARIES = abx.archivebox.use.get_BINARIES() BINARIES = abx.archivebox.reads.get_BINARIES()
EXTRACTORS = abx.archivebox.use.get_EXTRACTORS() EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS() SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
# REPLAYERS = abx.archivebox.use.get_REPLAYERS() # REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
# ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS() # ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
################################################################################ ################################################################################
@ -609,6 +609,6 @@ if DEBUG_REQUESTS_TRACKER:
abx.django.use.register_checks() abx.django.use.register_checks()
# abx.archivebox.use.register_all_hooks(globals()) # abx.archivebox.reads.register_all_hooks(globals())
# import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()

View file

@ -503,7 +503,7 @@ def find_config_section(key: str) -> str:
if key in CONSTANTS_CONFIG: if key in CONSTANTS_CONFIG:
return 'CONSTANT' return 'CONSTANT'
matching_sections = [ matching_sections = [
section.id for section in settings.CONFIGS.values() if key in section.model_fields section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
] ]
section = matching_sections[0] if matching_sections else 'DYNAMIC' section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section return section
@ -560,9 +560,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Aliases": [], # "Aliases": [],
} }
for section in reversed(list(settings.CONFIGS.values())): for section_id, section in reversed(list(settings.CONFIGS.items())):
for key, field in section.model_fields.items(): for key, field in section.model_fields.items():
rows['Section'].append(section.id) # section.replace('_', ' ').title().replace(' Config', '') rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key)) rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', find_config_type(key))) rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
rows['Value'].append(mark_safe(f'<code>{getattr(section, key)}</code>') if key_is_safe(key) else '******** (redacted)') rows['Value'].append(mark_safe(f'<code>{getattr(section, key)}</code>') if key_is_safe(key) else '******** (redacted)')

View file

@ -8,7 +8,8 @@ from django.db import models
from django.utils import timezone from django.utils import timezone
from django.utils.functional import cached_property from django.utils.functional import cached_property
import abx.archivebox.use import abx.archivebox.reads
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
@ -290,7 +291,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
if not hasattr(self, 'machine'): if not hasattr(self, 'machine'):
self.machine = Machine.objects.current() self.machine = Machine.objects.current()
if not self.binprovider: if not self.binprovider:
all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values()) all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True) binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
if not self.abspath: if not self.abspath:
@ -304,7 +305,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
@cached_property @cached_property
def BINARY(self) -> BaseBinary: def BINARY(self) -> BaseBinary:
for binary in abx.archivebox.use.get_BINARIES().values(): for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.name: if binary.name == self.name:
return binary return binary
raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it') raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
@ -312,7 +313,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
@cached_property @cached_property
def BINPROVIDER(self) -> BaseBinProvider: def BINPROVIDER(self) -> BaseBinProvider:
for binprovider in abx.archivebox.use.get_BINPROVIDERS().values(): for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
if binprovider.name == self.binprovider: if binprovider.name == self.binprovider:
return binprovider return binprovider
raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})') raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')

View file

@ -6,7 +6,7 @@ from typing import List, Union
from django.db.models import QuerySet from django.db.models import QuerySet
from django.conf import settings from django.conf import settings
import abx.archivebox.use import abx.archivebox.reads
from archivebox.index.schema import Link from archivebox.index.schema import Link
from archivebox.misc.util import enforce_types from archivebox.misc.util import enforce_types
@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet):
def import_backend(): def import_backend():
for backend in abx.archivebox.use.get_SEARCHBACKENDS().values(): for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values():
if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE: if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE:
return backend return backend
raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend') raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')