mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 00:24:26 -04:00
add new InstalledBinary model to cache binaries on host machine
This commit is contained in:
parent
c2ed9a19d6
commit
e315905721
9 changed files with 330 additions and 66 deletions
|
@ -14,9 +14,9 @@ from pydantic_pkgr import (
|
||||||
EnvProvider,
|
EnvProvider,
|
||||||
)
|
)
|
||||||
|
|
||||||
import abx
|
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config import CONSTANTS
|
||||||
|
|
||||||
|
import abx
|
||||||
from .base_hook import BaseHook, HookType
|
from .base_hook import BaseHook, HookType
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,7 +94,15 @@ class BaseBinary(BaseHook, Binary):
|
||||||
return [self]
|
return [self]
|
||||||
|
|
||||||
|
|
||||||
|
class AptBinProvider(AptProvider, BaseBinProvider):
|
||||||
|
name: BinProviderName = "apt"
|
||||||
|
|
||||||
apt = AptProvider()
|
class BrewBinProvider(BrewProvider, BaseBinProvider):
|
||||||
brew = BrewProvider()
|
name: BinProviderName = "brew"
|
||||||
env = EnvProvider()
|
|
||||||
|
class EnvBinProvider(EnvProvider, BaseBinProvider):
|
||||||
|
name: BinProviderName = "env"
|
||||||
|
|
||||||
|
apt = AptBinProvider()
|
||||||
|
brew = BrewBinProvider()
|
||||||
|
env = EnvBinProvider()
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
__package__ = 'abx.archivebox'
|
__package__ = 'abx.archivebox'
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import socket
|
|
||||||
from typing import Optional, List, Literal, Annotated, Dict, Any
|
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from pydantic import model_validator, AfterValidator
|
from pydantic import model_validator, AfterValidator
|
||||||
from pydantic_pkgr import BinName
|
from pydantic_pkgr import BinName
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
import abx
|
import abx
|
||||||
|
|
||||||
|
@ -23,7 +24,7 @@ def no_empty_args(args: List[str]) -> List[str]:
|
||||||
ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
|
ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
|
||||||
|
|
||||||
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
||||||
CmdArgsList = Annotated[List[str], AfterValidator(no_empty_args)]
|
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
|
||||||
|
|
||||||
|
|
||||||
class BaseExtractor(BaseHook):
|
class BaseExtractor(BaseHook):
|
||||||
|
@ -53,8 +54,9 @@ class BaseExtractor(BaseHook):
|
||||||
|
|
||||||
def should_extract(self, snapshot) -> bool:
|
def should_extract(self, snapshot) -> bool:
|
||||||
try:
|
try:
|
||||||
assert self.BIN.version
|
assert self.detect_installed_binary().version
|
||||||
except Exception:
|
except Exception:
|
||||||
|
raise
|
||||||
# could not load binary
|
# could not load binary
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -66,19 +68,32 @@ class BaseExtractor(BaseHook):
|
||||||
@abx.hookimpl
|
@abx.hookimpl
|
||||||
def extract(self, snapshot_id: str) -> Dict[str, Any]:
|
def extract(self, snapshot_id: str) -> Dict[str, Any]:
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
|
from archivebox import CONSTANTS
|
||||||
|
|
||||||
snapshot = Snapshot.objects.get(id=snapshot_id)
|
snapshot = Snapshot.objects.get(id=snapshot_id)
|
||||||
|
|
||||||
if not self.should_extract(snapshot):
|
if not self.should_extract(snapshot):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
from archivebox import CONSTANTS
|
status = 'failed'
|
||||||
|
start_ts = timezone.now()
|
||||||
|
uplink = self.detect_network_interface()
|
||||||
|
installed_binary = self.detect_installed_binary()
|
||||||
|
machine = installed_binary.machine
|
||||||
|
assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
|
||||||
|
|
||||||
# output_dir = self.get_output_path(snapshot) or CONSTANTS.TMP_DIR
|
# output_dir = self.get_output_path(snapshot) or CONSTANTS.TMP_DIR
|
||||||
output_dir = CONSTANTS.TMP_DIR / 'test'
|
output_dir = CONSTANTS.TMP_DIR / 'test'
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
cmd = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
|
# execute the extractor binary with the given args
|
||||||
proc = self.exec(cmd, cwd=output_dir)
|
args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
|
||||||
|
cmd = [str(installed_binary.abspath), *args]
|
||||||
|
proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
|
||||||
|
|
||||||
|
# collect the output
|
||||||
|
end_ts = timezone.now()
|
||||||
|
output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
|
||||||
stdout = proc.stdout.strip()
|
stdout = proc.stdout.strip()
|
||||||
stderr = proc.stderr.strip()
|
stderr = proc.stderr.strip()
|
||||||
output_json = None
|
output_json = None
|
||||||
|
@ -90,59 +105,116 @@ class BaseExtractor(BaseHook):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
if proc.returncode != 0:
|
if proc.returncode == 0:
|
||||||
errors.append(f'{self.BIN.name} returned non-zero exit code: {proc.returncode}')
|
status = 'success'
|
||||||
|
else:
|
||||||
|
errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
|
||||||
|
|
||||||
# pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7
|
# increment health stats counters
|
||||||
binary_str = f'{self.BIN.abspath}@{self.BIN.binprovider.name}:{self.BIN.binprovider.get_packages(self.BIN.name)}=={self.BIN.version}'
|
if status == 'success':
|
||||||
|
machine.record_health_success()
|
||||||
|
uplink.record_health_success()
|
||||||
|
installed_binary.record_health_success()
|
||||||
|
else:
|
||||||
|
machine.record_health_failure()
|
||||||
|
uplink.record_health_failure()
|
||||||
|
installed_binary.record_health_failure()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'extractor': self.name,
|
'extractor': self.name,
|
||||||
|
|
||||||
'snapshot_id': snapshot.id,
|
'snapshot': {
|
||||||
'snapshot_abid': snapshot.abid,
|
'id': snapshot.id,
|
||||||
'snapshot_url': snapshot.url,
|
'abid': snapshot.abid,
|
||||||
'snapshot_created_by_id': snapshot.created_by_id,
|
'url': snapshot.url,
|
||||||
|
'created_by_id': snapshot.created_by_id,
|
||||||
|
},
|
||||||
|
|
||||||
'hostname': socket.gethostname(),
|
'machine': {
|
||||||
|
'id': machine.id,
|
||||||
|
'abid': machine.abid,
|
||||||
|
'guid': machine.guid,
|
||||||
|
'hostname': machine.hostname,
|
||||||
|
'hw_in_docker': machine.hw_in_docker,
|
||||||
|
'hw_in_vm': machine.hw_in_vm,
|
||||||
|
'hw_manufacturer': machine.hw_manufacturer,
|
||||||
|
'hw_product': machine.hw_product,
|
||||||
|
'hw_uuid': machine.hw_uuid,
|
||||||
|
'os_arch': machine.os_arch,
|
||||||
|
'os_family': machine.os_family,
|
||||||
|
'os_platform': machine.os_platform,
|
||||||
|
'os_release': machine.os_release,
|
||||||
|
'os_kernel': machine.os_kernel,
|
||||||
|
},
|
||||||
|
|
||||||
'binary': binary_str,
|
'uplink': {
|
||||||
'binary_name': self.BIN.name,
|
'id': uplink.id,
|
||||||
'binary_provider': self.BIN.binprovider.name,
|
'abid': uplink.abid,
|
||||||
'binary_version': self.BIN.version,
|
'mac_address': uplink.mac_address,
|
||||||
'binary_abspath': self.BIN.abspath,
|
'ip_public': uplink.ip_public,
|
||||||
|
'ip_local': uplink.ip_local,
|
||||||
|
'dns_server': uplink.dns_server,
|
||||||
|
'hostname': uplink.hostname,
|
||||||
|
'iface': uplink.iface,
|
||||||
|
'isp': uplink.isp,
|
||||||
|
'city': uplink.city,
|
||||||
|
'region': uplink.region,
|
||||||
|
'country': uplink.country,
|
||||||
|
},
|
||||||
|
|
||||||
|
'binary': {
|
||||||
|
'id': installed_binary.id,
|
||||||
|
'abid': installed_binary.abid,
|
||||||
|
'name': installed_binary.name,
|
||||||
|
'binprovider': installed_binary.binprovider,
|
||||||
|
'abspath': installed_binary.abspath,
|
||||||
|
'version': installed_binary.version,
|
||||||
|
'sha256': installed_binary.sha256,
|
||||||
|
},
|
||||||
|
|
||||||
'cmd': cmd,
|
'cmd': cmd,
|
||||||
'stdout': stdout,
|
'stdout': stdout,
|
||||||
'stderr': stderr,
|
'stderr': stderr,
|
||||||
'returncode': proc.returncode,
|
'returncode': proc.returncode,
|
||||||
|
'start_ts': start_ts,
|
||||||
|
'end_ts': end_ts,
|
||||||
|
|
||||||
'status': 'succeeded' if proc.returncode == 0 else 'failed',
|
'status': status,
|
||||||
'errors': errors,
|
'errors': errors,
|
||||||
'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
|
'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
|
||||||
'output_files': list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')),
|
'output_files': output_files,
|
||||||
'output_json': output_json or {},
|
'output_json': output_json or {},
|
||||||
'output_text': output_text or '',
|
'output_text': output_text or '',
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: move this to a hookimpl
|
# TODO: move this to a hookimpl
|
||||||
def exec(self, args: CmdArgsList, cwd: Optional[Path]=None, binary=None):
|
def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
|
||||||
cwd = cwd or Path('.')
|
cwd = cwd or Path('.')
|
||||||
binary = (binary or self.BINARY).load()
|
binary = self.load_binary(installed_binary=installed_binary)
|
||||||
|
|
||||||
return binary.exec(cmd=args, cwd=cwd)
|
return binary.exec(cmd=args, cwd=cwd)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def BINARY(self) -> BaseBinary:
|
def BINARY(self) -> BaseBinary:
|
||||||
from django.conf import settings
|
import abx.archivebox.use
|
||||||
for binary in settings.BINARIES.values():
|
for binary in abx.archivebox.use.get_BINARIES().values():
|
||||||
if binary.name == self.binary:
|
if binary.name == self.binary:
|
||||||
return binary
|
return binary
|
||||||
raise ValueError(f'Binary {self.binary} not found')
|
raise ValueError(f'Binary {self.binary} not found')
|
||||||
|
|
||||||
@cached_property
|
def detect_installed_binary(self):
|
||||||
def BIN(self) -> BaseBinary:
|
from machine.models import InstalledBinary
|
||||||
return self.BINARY.load()
|
# hydrates binary from DB/cache if record of installed version is recent enough
|
||||||
|
# otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
|
||||||
|
return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
|
||||||
|
|
||||||
|
def load_binary(self, installed_binary=None) -> BaseBinary:
|
||||||
|
installed_binary = installed_binary or self.detect_installed_binary()
|
||||||
|
return installed_binary.load_from_db()
|
||||||
|
|
||||||
|
def detect_network_interface(self):
|
||||||
|
from machine.models import NetworkInterface
|
||||||
|
return NetworkInterface.objects.current()
|
||||||
|
|
||||||
@abx.hookimpl
|
@abx.hookimpl
|
||||||
def get_EXTRACTORS(self):
|
def get_EXTRACTORS(self):
|
||||||
|
|
|
@ -46,9 +46,13 @@ def get_FLAT_CONFIG() -> Dict[str, Any]:
|
||||||
})
|
})
|
||||||
|
|
||||||
def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
|
def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
|
||||||
|
# TODO: move these to plugins
|
||||||
|
from abx.archivebox.base_binary import apt, brew, env
|
||||||
|
builtin_binproviders = [apt, brew, env]
|
||||||
|
|
||||||
return benedict({
|
return benedict({
|
||||||
binprovider.id: binprovider
|
binprovider.id: binprovider
|
||||||
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
|
for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
|
||||||
for binprovider in plugin_binproviders
|
for binprovider in plugin_binproviders
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,6 @@ class ConfigPlugin(BasePlugin):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PLUGIN = ConfigPlugin()
|
PLUGIN = ConfigPlugin()
|
||||||
DJANGO_APP = PLUGIN.AppConfig
|
DJANGO_APP = PLUGIN.AppConfig
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,8 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config import CONSTANTS
|
||||||
from archivebox.misc.util import parse_date
|
from archivebox.misc.util import parse_date
|
||||||
|
|
||||||
|
from machine.models import InstalledBinary
|
||||||
|
|
||||||
|
|
||||||
def obj_to_yaml(obj: Any, indent: int=0) -> str:
|
def obj_to_yaml(obj: Any, indent: int=0) -> str:
|
||||||
indent_str = " " * indent
|
indent_str = " " * indent
|
||||||
|
@ -64,7 +66,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||||
|
|
||||||
rows = {
|
rows = {
|
||||||
"Binary": [],
|
"Binary Name": [],
|
||||||
"Found Version": [],
|
"Found Version": [],
|
||||||
"From Plugin": [],
|
"From Plugin": [],
|
||||||
"Provided By": [],
|
"Provided By": [],
|
||||||
|
@ -83,11 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
for plugin in settings.PLUGINS.values():
|
for plugin in settings.PLUGINS.values():
|
||||||
for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
|
for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
|
||||||
try:
|
try:
|
||||||
binary = binary.load()
|
installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
|
||||||
|
binary = installed_binary.load_from_db()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
rows['Binary'].append(ItemLink(binary.name, key=binary.name))
|
rows['Binary Name'].append(ItemLink(binary.name, key=binary.name))
|
||||||
rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing')
|
rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing')
|
||||||
rows['From Plugin'].append(plugin.plugin_module)
|
rows['From Plugin'].append(plugin.plugin_module)
|
||||||
rows['Provided By'].append(
|
rows['Provided By'].append(
|
||||||
|
|
|
@ -29,7 +29,7 @@ from core.mixins import SearchResultsAdminMixin
|
||||||
from api.models import APIToken
|
from api.models import APIToken
|
||||||
from abid_utils.admin import ABIDModelAdmin
|
from abid_utils.admin import ABIDModelAdmin
|
||||||
from queues.tasks import bg_archive_links, bg_add
|
from queues.tasks import bg_archive_links, bg_add
|
||||||
from machine.models import Machine, NetworkInterface
|
from machine.models import Machine, NetworkInterface, InstalledBinary
|
||||||
|
|
||||||
from index.html import snapshot_icons
|
from index.html import snapshot_icons
|
||||||
from logging_util import printable_filesize
|
from logging_util import printable_filesize
|
||||||
|
@ -829,3 +829,29 @@ class NetworkInterfaceAdmin(ABIDModelAdmin):
|
||||||
iface.machine.abid,
|
iface.machine.abid,
|
||||||
iface.machine.hostname,
|
iface.machine.hostname,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@admin.register(InstalledBinary, site=archivebox_admin)
|
||||||
|
class InstalledBinaryAdmin(ABIDModelAdmin):
|
||||||
|
list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
|
||||||
|
sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
|
||||||
|
search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
|
||||||
|
|
||||||
|
readonly_fields = ('created_at', 'modified_at', 'abid_info')
|
||||||
|
fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
|
||||||
|
|
||||||
|
list_filter = ('name', 'binprovider', 'machine_id')
|
||||||
|
ordering = ['-created_at']
|
||||||
|
list_per_page = 100
|
||||||
|
actions = ["delete_selected"]
|
||||||
|
|
||||||
|
@admin.display(
|
||||||
|
description='Machine',
|
||||||
|
ordering='machine__abid',
|
||||||
|
)
|
||||||
|
def machine_info(self, installed_binary):
|
||||||
|
return format_html(
|
||||||
|
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> {}</a>',
|
||||||
|
installed_binary.machine.id,
|
||||||
|
installed_binary.machine.abid,
|
||||||
|
installed_binary.machine.hostname,
|
||||||
|
)
|
||||||
|
|
|
@ -22,6 +22,7 @@ from archivebox.config import CONSTANTS
|
||||||
|
|
||||||
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
|
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
|
||||||
from queues.tasks import bg_archive_snapshot
|
from queues.tasks import bg_archive_snapshot
|
||||||
|
from machine.models import Machine, NetworkInterface
|
||||||
|
|
||||||
from archivebox.misc.system import get_dir_size
|
from archivebox.misc.system import get_dir_size
|
||||||
from archivebox.misc.util import parse_date, base_url
|
from archivebox.misc.util import parse_date, base_url
|
||||||
|
@ -545,6 +546,9 @@ class ArchiveResult(ABIDModel):
|
||||||
end_ts = models.DateTimeField()
|
end_ts = models.DateTimeField()
|
||||||
status = models.CharField(max_length=16, choices=STATUS_CHOICES)
|
status = models.CharField(max_length=16, choices=STATUS_CHOICES)
|
||||||
|
|
||||||
|
# the network interface that was used to download this result
|
||||||
|
# uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
|
||||||
|
|
||||||
objects = ArchiveResultManager()
|
objects = ArchiveResultManager()
|
||||||
|
|
||||||
class Meta(TypedModelMeta):
|
class Meta(TypedModelMeta):
|
||||||
|
@ -556,6 +560,10 @@ class ArchiveResult(ABIDModel):
|
||||||
# return f'[{self.abid}] 📅 {self.start_ts.strftime("%Y-%m-%d %H:%M")} 📄 {self.extractor} {self.snapshot.url}'
|
# return f'[{self.abid}] 📅 {self.start_ts.strftime("%Y-%m-%d %H:%M")} 📄 {self.extractor} {self.snapshot.url}'
|
||||||
return self.extractor
|
return self.extractor
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def machine(self):
|
||||||
|
return self.iface.machine if self.iface else None
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def snapshot_dir(self):
|
def snapshot_dir(self):
|
||||||
return Path(self.snapshot.link_dir)
|
return Path(self.snapshot.link_dir)
|
||||||
|
|
|
@ -480,7 +480,7 @@ ADMIN_DATA_VIEWS = {
|
||||||
{
|
{
|
||||||
"route": "binaries/",
|
"route": "binaries/",
|
||||||
"view": "archivebox.config.views.binaries_list_view",
|
"view": "archivebox.config.views.binaries_list_view",
|
||||||
"name": "Binaries",
|
"name": "Dependencies",
|
||||||
"items": {
|
"items": {
|
||||||
"route": "<str:key>/",
|
"route": "<str:key>/",
|
||||||
"view": "archivebox.config.views.binary_detail_view",
|
"view": "archivebox.config.views.binary_detail_view",
|
||||||
|
|
|
@ -124,44 +124,188 @@ class NetworkInterface(ABIDModel):
|
||||||
dns_server = models.GenericIPAddressField(default=None, null=False, editable=False) # e.g. 8.8.8.8 or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
|
dns_server = models.GenericIPAddressField(default=None, null=False, editable=False) # e.g. 8.8.8.8 or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
|
||||||
|
|
||||||
# MUTABLE PROPERTIES
|
# MUTABLE PROPERTIES
|
||||||
iface = models.CharField(max_length=15, default=None, null=False) # e.g. en0
|
|
||||||
hostname = models.CharField(max_length=63, default=None, null=False) # e.g. somehost.sub.example.com
|
hostname = models.CharField(max_length=63, default=None, null=False) # e.g. somehost.sub.example.com
|
||||||
|
iface = models.CharField(max_length=15, default=None, null=False) # e.g. en0
|
||||||
isp = models.CharField(max_length=63, default=None, null=False) # e.g. AS-SONICTELECOM
|
isp = models.CharField(max_length=63, default=None, null=False) # e.g. AS-SONICTELECOM
|
||||||
city = models.CharField(max_length=63, default=None, null=False) # e.g. Berkeley
|
city = models.CharField(max_length=63, default=None, null=False) # e.g. Berkeley
|
||||||
region = models.CharField(max_length=63, default=None, null=False) # e.g. California
|
region = models.CharField(max_length=63, default=None, null=False) # e.g. California
|
||||||
country = models.CharField(max_length=63, default=None, null=False) # e.g. United States
|
country = models.CharField(max_length=63, default=None, null=False) # e.g. United States
|
||||||
|
|
||||||
objects = NetworkInterfaceManager()
|
# STATS COUNTERS (from ModelWithHealthStats)
|
||||||
|
# num_uses_failed = models.PositiveIntegerField(default=0)
|
||||||
|
# num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||||
|
|
||||||
|
objects: NetworkInterfaceManager = NetworkInterfaceManager()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
unique_together = (
|
unique_together = (
|
||||||
|
# if *any* of these change, it's considered a different interface
|
||||||
|
# because we might get different downloaded content as a result,
|
||||||
|
# this forces us to store an audit trail whenever these things change
|
||||||
('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
|
('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# class InstalledBinary(ABIDModel):
|
class InstalledBinaryManager(models.Manager):
|
||||||
# abid_prefix = 'bin_'
|
def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
|
||||||
# abid_ts_src = 'self.machine.created_at'
|
"""Get or create an InstalledBinary record for a Binary on the local machine"""
|
||||||
# abid_uri_src = 'self.machine.guid'
|
|
||||||
# abid_subtype_src = 'self.binprovider'
|
|
||||||
# abid_rand_src = 'self.id'
|
|
||||||
# abid_drift_allowed = False
|
|
||||||
|
|
||||||
# id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
global CURRENT_BINARIES
|
||||||
# abid = ABIDField(prefix=abid_prefix)
|
cached_binary = CURRENT_BINARIES.get(binary.id)
|
||||||
|
if cached_binary:
|
||||||
|
expires_at = cached_binary.modified_at + timedelta(seconds=INSTALLED_BINARY_RECHECK_INTERVAL)
|
||||||
|
if timezone.now() < expires_at:
|
||||||
|
is_loaded = binary.abspath and binary.version and binary.sha256
|
||||||
|
if is_loaded:
|
||||||
|
# if the caller took did the (expensive) job of loading the binary from the filesystem already
|
||||||
|
# then their in-memory version is certainly more up-to-date than any potential cached version
|
||||||
|
# use this opportunity to invalidate the cache in case if anything has changed
|
||||||
|
is_different_from_cache = (
|
||||||
|
binary.abspath != cached_binary.abspath
|
||||||
|
or binary.version != cached_binary.version
|
||||||
|
or binary.sha256 != cached_binary.sha256
|
||||||
|
)
|
||||||
|
if is_different_from_cache:
|
||||||
|
CURRENT_BINARIES.pop(binary.id)
|
||||||
|
else:
|
||||||
|
return cached_binary
|
||||||
|
else:
|
||||||
|
# if they have not yet loaded the binary
|
||||||
|
# but our cache is recent enough and not expired, assume cached version is good enough
|
||||||
|
# it will automatically reload when the cache expires
|
||||||
|
# cached_binary will be stale/bad for up to 30min if binary was updated/removed on host system
|
||||||
|
return cached_binary
|
||||||
|
else:
|
||||||
|
# cached binary is too old, reload it from scratch
|
||||||
|
CURRENT_BINARIES.pop(binary.id)
|
||||||
|
|
||||||
# created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
if not binary.abspath or not binary.version or not binary.sha256:
|
||||||
# modified_at = models.DateTimeField(auto_now=True)
|
# if binary was not yet loaded from filesystem, do it now
|
||||||
|
# this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
|
||||||
|
# to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
|
||||||
|
binary = binary.load()
|
||||||
|
|
||||||
# machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False)
|
assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
|
||||||
# binprovider = models.CharField(max_length=255, default=None, null=False)
|
|
||||||
|
|
||||||
# name = models.CharField(max_length=255, default=None, null=False)
|
CURRENT_BINARIES[binary.id], _created = self.update_or_create(
|
||||||
# version = models.CharField(max_length=255, default=None, null=False)
|
machine=Machine.objects.current(),
|
||||||
# abspath = models.CharField(max_length=255, default=None, null=False)
|
name=binary.name,
|
||||||
# sha256 = models.CharField(max_length=255, default=None, null=False)
|
binprovider=binary.loaded_binprovider.name,
|
||||||
|
version=str(binary.loaded_version),
|
||||||
|
abspath=str(binary.loaded_abspath),
|
||||||
|
sha256=str(binary.loaded_sha256),
|
||||||
|
)
|
||||||
|
cached_binary = CURRENT_BINARIES[binary.id]
|
||||||
|
cached_binary.save() # populate ABID
|
||||||
|
|
||||||
# class Meta:
|
# if we get this far make sure DB record matches in-memroy cache
|
||||||
# unique_together = (
|
assert str(cached_binary.binprovider) == str(binary.loaded_binprovider.name)
|
||||||
# ('machine', 'binprovider', 'version', 'abspath', 'sha256'),
|
assert str(cached_binary.abspath) == str(binary.loaded_abspath)
|
||||||
# )
|
assert str(cached_binary.version) == str(binary.loaded_version)
|
||||||
|
assert str(cached_binary.sha256) == str(binary.loaded_sha256)
|
||||||
|
|
||||||
|
return cached_binary
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class InstalledBinary(ABIDModel, ModelWithHealthStats):
|
||||||
|
abid_prefix = 'bin_'
|
||||||
|
abid_ts_src = 'self.machine.created_at'
|
||||||
|
abid_uri_src = 'self.machine.guid'
|
||||||
|
abid_subtype_src = 'self.binprovider'
|
||||||
|
abid_rand_src = 'self.id'
|
||||||
|
abid_drift_allowed = False
|
||||||
|
|
||||||
|
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
||||||
|
modified_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
# IMMUTABLE PROPERTIES
|
||||||
|
machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False, blank=True)
|
||||||
|
name = models.CharField(max_length=63, default=None, null=False, blank=True)
|
||||||
|
binprovider = models.CharField(max_length=31, default=None, null=False, blank=True)
|
||||||
|
abspath = models.CharField(max_length=255, default=None, null=False, blank=True)
|
||||||
|
version = models.CharField(max_length=32, default=None, null=False, blank=True)
|
||||||
|
sha256 = models.CharField(max_length=64, default=None, null=False, blank=True)
|
||||||
|
|
||||||
|
# MUTABLE PROPERTIES
|
||||||
|
# is_pinned = models.BooleanField(default=False) # i.e. should this binary superceede other binaries with the same name on the host?
|
||||||
|
# is_valid = models.BooleanField(default=True) # i.e. is this binary still available on the host?
|
||||||
|
|
||||||
|
# STATS COUNTERS (from ModelWithHealthStats)
|
||||||
|
# num_uses_failed = models.PositiveIntegerField(default=0)
|
||||||
|
# num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||||
|
|
||||||
|
objects: InstalledBinaryManager = InstalledBinaryManager()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = 'Installed Binary'
|
||||||
|
verbose_name_plural = 'Installed Binaries'
|
||||||
|
unique_together = (
|
||||||
|
('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'{self.name}@{self.binprovider}+{self.abspath}@{self.version}'
|
||||||
|
|
||||||
|
def clean(self, *args, **kwargs) -> None:
|
||||||
|
assert self.name or self.abspath
|
||||||
|
self.name = str(self.name or self.abspath)
|
||||||
|
assert self.name
|
||||||
|
|
||||||
|
if not hasattr(self, 'machine'):
|
||||||
|
self.machine = Machine.objects.current()
|
||||||
|
if not self.binprovider:
|
||||||
|
all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values())
|
||||||
|
binary = Binary(name=self.name, binproviders=all_known_binproviders).load()
|
||||||
|
self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
|
||||||
|
if not self.abspath:
|
||||||
|
self.abspath = self.BINPROVIDER.get_abspath(self.name)
|
||||||
|
if not self.version:
|
||||||
|
self.version = self.BINPROVIDER.get_version(self.name, abspath=self.abspath)
|
||||||
|
if not self.sha256:
|
||||||
|
self.sha256 = self.BINPROVIDER.get_sha256(self.name, abspath=self.abspath)
|
||||||
|
|
||||||
|
super().clean(*args, **kwargs)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def BINARY(self) -> BaseBinary:
|
||||||
|
for binary in abx.archivebox.use.get_BINARIES().values():
|
||||||
|
if binary.name == self.name:
|
||||||
|
return binary
|
||||||
|
raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
|
||||||
|
# TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def BINPROVIDER(self) -> BaseBinProvider:
|
||||||
|
for binprovider in abx.archivebox.use.get_BINPROVIDERS().values():
|
||||||
|
if binprovider.name == self.binprovider:
|
||||||
|
return binprovider
|
||||||
|
raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
|
||||||
|
|
||||||
|
# maybe not a good idea to provide this? Binary in DB is a record of the binary's config
|
||||||
|
# whereas a loaded binary is a not-yet saved instance that may not have the same config
|
||||||
|
# why would we want to load a binary record from the db when it could be freshly loaded?
|
||||||
|
def load_from_db(self) -> BaseBinary:
|
||||||
|
# TODO: implement defaults arg in pydantic_pkgr
|
||||||
|
# return self.BINARY.load(defaults={
|
||||||
|
# 'binprovider': self.BINPROVIDER,
|
||||||
|
# 'abspath': Path(self.abspath),
|
||||||
|
# 'version': self.version,
|
||||||
|
# 'sha256': self.sha256,
|
||||||
|
# })
|
||||||
|
|
||||||
|
return BaseBinary.model_validate({
|
||||||
|
**self.BINARY.model_dump(),
|
||||||
|
'abspath': self.abspath and Path(self.abspath),
|
||||||
|
'version': self.version,
|
||||||
|
'sha256': self.sha256,
|
||||||
|
'loaded_binprovider': self.BINPROVIDER,
|
||||||
|
'binproviders_supported': self.BINARY.binproviders_supported,
|
||||||
|
'provider_overrides': self.BINARY.provider_overrides,
|
||||||
|
})
|
||||||
|
|
||||||
|
def load_fresh(self) -> BaseBinary:
|
||||||
|
return self.BINARY.load()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue