mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 23:54:27 -04:00
new vastly simplified plugin spec without pydantic
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
This commit is contained in:
parent
abf75f49f4
commit
01ba6d49d3
115 changed files with 2466 additions and 2301 deletions
|
@ -0,0 +1,51 @@
|
|||
__package__ = 'plugins_extractor.singlefile'
|
||||
__label__ = 'singlefile'
|
||||
__version__ = '2024.10.14'
|
||||
__author__ = 'Nick Sweeting'
|
||||
__homepage__ = 'https://github.com/gildas-lormeau/singlefile'
|
||||
__dependencies__ = ['npm']
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
'singlefile': {
|
||||
'PACKAGE': __package__,
|
||||
'LABEL': __label__,
|
||||
'VERSION': __version__,
|
||||
'AUTHOR': __author__,
|
||||
'HOMEPAGE': __homepage__,
|
||||
'DEPENDENCIES': __dependencies__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import SINGLEFILE_CONFIG
|
||||
|
||||
return {
|
||||
'singlefile': SINGLEFILE_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_BINARIES():
|
||||
from .binaries import SINGLEFILE_BINARY
|
||||
|
||||
return {
|
||||
'singlefile': SINGLEFILE_BINARY,
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_EXTRACTORS():
|
||||
from .extractors import SINGLEFILE_EXTRACTOR
|
||||
|
||||
return {
|
||||
'singlefile': SINGLEFILE_EXTRACTOR,
|
||||
}
|
||||
|
||||
# @abx.hookimpl
|
||||
# def get_INSTALLED_APPS():
|
||||
# # needed to load ./models.py
|
||||
# return [__package__]
|
|
@ -1,110 +0,0 @@
|
|||
__package__ = 'archivebox.plugins_extractor.singlefile'
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
# from typing_extensions import Self
|
||||
|
||||
# Depends on other PyPI/vendor packages:
|
||||
from pydantic import InstanceOf, Field
|
||||
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath
|
||||
|
||||
# Depends on other Django apps:
|
||||
from abx.archivebox.base_plugin import BasePlugin
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
from abx.archivebox.base_binary import BaseBinary, env
|
||||
from abx.archivebox.base_extractor import BaseExtractor
|
||||
from abx.archivebox.base_queue import BaseQueue
|
||||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
class SinglefileConfig(BaseConfigSet):
|
||||
SAVE_SINGLEFILE: bool = True
|
||||
|
||||
SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
|
||||
SINGLEFILE_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
|
||||
SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
|
||||
SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
|
||||
|
||||
SINGLEFILE_BINARY: str = Field(default='single-file')
|
||||
SINGLEFILE_EXTRA_ARGS: List[str] = []
|
||||
|
||||
|
||||
SINGLEFILE_CONFIG = SinglefileConfig()
|
||||
|
||||
|
||||
SINGLEFILE_MIN_VERSION = '1.1.54'
|
||||
SINGLEFILE_MAX_VERSION = '1.1.60'
|
||||
|
||||
|
||||
class SinglefileBinary(BaseBinary):
|
||||
name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
|
||||
|
||||
overrides: BinaryOverrides = {
|
||||
LIB_NPM_BINPROVIDER.name: {
|
||||
"abspath": lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=LIB_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
|
||||
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
|
||||
},
|
||||
SYS_NPM_BINPROVIDER.name: {
|
||||
"abspath": lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=SYS_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file", PATH=SYS_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file-node.js", PATH=SYS_NPM_BINPROVIDER.PATH),
|
||||
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
|
||||
"install": lambda: None,
|
||||
},
|
||||
env.name: {
|
||||
'abspath': lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
|
||||
or bin_abspath('single-file', PATH=env.PATH)
|
||||
or bin_abspath('single-file-node.js', PATH=env.PATH),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
SINGLEFILE_BINARY = SinglefileBinary()
|
||||
|
||||
PLUGIN_BINARIES = [SINGLEFILE_BINARY]
|
||||
|
||||
class SinglefileExtractor(BaseExtractor):
|
||||
name: str = 'singlefile'
|
||||
binary: BinName = SINGLEFILE_BINARY.name
|
||||
|
||||
def get_output_path(self, snapshot) -> Path:
|
||||
return Path(snapshot.link_dir) / 'singlefile.html'
|
||||
|
||||
|
||||
SINGLEFILE_BINARY = SinglefileBinary()
|
||||
SINGLEFILE_EXTRACTOR = SinglefileExtractor()
|
||||
|
||||
class SinglefileQueue(BaseQueue):
|
||||
name: str = 'singlefile'
|
||||
|
||||
binaries: List[InstanceOf[BaseBinary]] = [SINGLEFILE_BINARY]
|
||||
|
||||
SINGLEFILE_QUEUE = SinglefileQueue()
|
||||
|
||||
class SinglefilePlugin(BasePlugin):
|
||||
app_label: str ='singlefile'
|
||||
verbose_name: str = 'SingleFile'
|
||||
|
||||
hooks: List[InstanceOf[BaseHook]] = [
|
||||
SINGLEFILE_CONFIG,
|
||||
SINGLEFILE_BINARY,
|
||||
SINGLEFILE_EXTRACTOR,
|
||||
SINGLEFILE_QUEUE,
|
||||
]
|
||||
|
||||
|
||||
|
||||
PLUGIN = SinglefilePlugin()
|
||||
# PLUGIN.register(settings)
|
||||
DJANGO_APP = PLUGIN.AppConfig
|
48
archivebox/plugins_extractor/singlefile/binaries.py
Normal file
48
archivebox/plugins_extractor/singlefile/binaries.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
__package__ = 'plugins_extractor.singlefile'
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import InstanceOf
|
||||
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinary, env
|
||||
|
||||
from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||
|
||||
from .config import SINGLEFILE_CONFIG
|
||||
|
||||
|
||||
SINGLEFILE_MIN_VERSION = '1.1.54'
|
||||
SINGLEFILE_MAX_VERSION = '1.1.60'
|
||||
|
||||
|
||||
class SinglefileBinary(BaseBinary):
|
||||
name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
|
||||
|
||||
overrides: BinaryOverrides = {
|
||||
LIB_NPM_BINPROVIDER.name: {
|
||||
"abspath": lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=LIB_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
|
||||
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
|
||||
},
|
||||
SYS_NPM_BINPROVIDER.name: {
|
||||
"abspath": lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=SYS_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file", PATH=SYS_NPM_BINPROVIDER.PATH)
|
||||
or bin_abspath("single-file-node.js", PATH=SYS_NPM_BINPROVIDER.PATH),
|
||||
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
|
||||
"install": lambda: None,
|
||||
},
|
||||
env.name: {
|
||||
'abspath': lambda:
|
||||
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
|
||||
or bin_abspath('single-file', PATH=env.PATH)
|
||||
or bin_abspath('single-file-node.js', PATH=env.PATH),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
SINGLEFILE_BINARY = SinglefileBinary()
|
25
archivebox/plugins_extractor/singlefile/config.py
Normal file
25
archivebox/plugins_extractor/singlefile/config.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
__package__ = 'plugins_extractor.singlefile'
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
|
||||
|
||||
class SinglefileConfig(BaseConfigSet):
|
||||
SAVE_SINGLEFILE: bool = True
|
||||
|
||||
SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
|
||||
SINGLEFILE_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
|
||||
SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
|
||||
SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
|
||||
|
||||
SINGLEFILE_BINARY: str = Field(default='single-file')
|
||||
SINGLEFILE_EXTRA_ARGS: List[str] = []
|
||||
|
||||
|
||||
SINGLEFILE_CONFIG = SinglefileConfig()
|
19
archivebox/plugins_extractor/singlefile/extractors.py
Normal file
19
archivebox/plugins_extractor/singlefile/extractors.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
__package__ = 'plugins_extractor.singlefile'
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic_pkgr import BinName
|
||||
from abx.archivebox.base_extractor import BaseExtractor
|
||||
|
||||
from .binaries import SINGLEFILE_BINARY
|
||||
|
||||
|
||||
class SinglefileExtractor(BaseExtractor):
|
||||
name: str = 'singlefile'
|
||||
binary: BinName = SINGLEFILE_BINARY.name
|
||||
|
||||
def get_output_path(self, snapshot) -> Path:
|
||||
return Path(snapshot.link_dir) / 'singlefile.html'
|
||||
|
||||
|
||||
SINGLEFILE_EXTRACTOR = SinglefileExtractor()
|
|
@ -1,26 +0,0 @@
|
|||
# Generated by Django 5.1.1 on 2024-09-10 05:05
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('core', '0074_alter_snapshot_downloaded_at'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SinglefileResult',
|
||||
fields=[
|
||||
],
|
||||
options={
|
||||
'proxy': True,
|
||||
'indexes': [],
|
||||
'constraints': [],
|
||||
},
|
||||
bases=('core.archiveresult',),
|
||||
),
|
||||
]
|
|
@ -1,40 +0,0 @@
|
|||
__package__ = 'archivebox.queues'
|
||||
|
||||
import time
|
||||
|
||||
from django.core.cache import cache
|
||||
|
||||
from huey import crontab
|
||||
from django_huey import db_task, on_startup, db_periodic_task
|
||||
from huey_monitor.models import TaskModel
|
||||
from huey_monitor.tqdm import ProcessInfo
|
||||
|
||||
@db_task(queue="singlefile", context=True)
|
||||
def extract(url, out_dir, config, task=None, parent_task_id=None):
|
||||
if task and parent_task_id:
|
||||
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
|
||||
|
||||
process_info = ProcessInfo(task, desc="extract_singlefile", parent_task_id=parent_task_id, total=1)
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
process_info.update(n=1)
|
||||
return {'output': 'singlefile.html', 'status': 'succeeded'}
|
||||
|
||||
|
||||
# @on_startup(queue='singlefile')
|
||||
# def start_singlefile_queue():
|
||||
# print("[+] Starting singlefile worker...")
|
||||
# update_version.call_local()
|
||||
|
||||
|
||||
# @db_periodic_task(crontab(minute='*/5'), queue='singlefile')
|
||||
# def update_version():
|
||||
# print('[*] Updating singlefile version... 5 minute interval')
|
||||
# from django.conf import settings
|
||||
|
||||
# bin = settings.BINARIES.SinglefileBinary.load()
|
||||
# if bin.version:
|
||||
# cache.set(f"bin:abspath:{bin.name}", bin.abspath)
|
||||
# cache.set(f"bin:version:{bin.name}:{bin.abspath}", bin.version)
|
||||
# print('[√] Updated singlefile version:', bin.version, bin.abspath)
|
Loading…
Add table
Add a link
Reference in a new issue