rename plugins dirs

This commit is contained in:
Nick Sweeting 2024-09-24 01:34:27 -07:00
parent 8713091e73
commit e8f1264954
No known key found for this signature in database
28 changed files with 31 additions and 31 deletions

View file

View file

@ -0,0 +1,164 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
bin_abspath,
)
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
CHROMIUM_BINARY_NAMES_LINUX = [
"chromium",
"chromium-browser",
"chromium-browser-beta",
"chromium-browser-unstable",
"chromium-browser-canary",
"chromium-browser-dev",
]
CHROMIUM_BINARY_NAMES_MACOS = ["/Applications/Chromium.app/Contents/MacOS/Chromium"]
CHROMIUM_BINARY_NAMES = CHROMIUM_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_MACOS
CHROME_BINARY_NAMES_LINUX = [
"google-chrome",
"google-chrome-stable",
"google-chrome-beta",
"google-chrome-canary",
"google-chrome-unstable",
"google-chrome-dev",
"chrome"
]
CHROME_BINARY_NAMES_MACOS = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
]
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
abspath = bin_abspath(bin_name, PATH=env.PATH)
if abspath:
return abspath
return None
def create_macos_app_symlink(target: Path, shortcut: Path):
"""
on macOS, some binaries are inside of .app, so we need to
create a tiny bash script instead of a symlink
(so that ../ parent relationships are relative to original .app instead of callsite dir)
"""
# TODO: should we enforce this? is it useful in any other situation?
# if platform.system().lower() != 'darwin':
# raise Exception(...)
shortcut.write_text(f"""#!/usr/bin/env bash\nexec '{target}' "$@"\n""")
shortcut.chmod(0o777) # make sure its executable by everyone
###################### Config ##########################
class ChromeDependencyConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
CHROME_BINARY: str = Field(default='chrome')
CHROME_ARGS: Optional[List[str]] = Field(default=None)
CHROME_EXTRA_ARGS: List[str] = []
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# def load(self) -> Self:
# # for each field in the model, load its value
# # load from each source in order of precedence (lowest to highest):
# # - schema default
# # - ArchiveBox.conf INI file
# # - environment variables
# # - command-line arguments
# LOADED_VALUES: Dict[str, Any] = {}
# for field_name, field in self.__fields__.items():
# def_value = field.default_factory() if field.default_factory else field.default
# ini_value = settings.INI_CONFIG.get_value(field_name)
# env_value = settings.ENV_CONFIG.get_value(field_name)
# cli_value = settings.CLI_CONFIG.get_value(field_name)
# run_value = settings.RUN_CONFIG.get_value(field_name)
# value = run_value or cli_value or env_value or ini_value or def_value
class ChromeConfigs(ChromeDependencyConfigs):
# section: ConfigSectionName = 'ALL_CONFIGS'
pass
DEFAULT_GLOBAL_CONFIG = {
}
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
class ChromeBinary(BaseBinary):
name: BinName = CHROME_CONFIG.CHROME_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
env.name: {
'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH), # /usr/bin/google-chrome-stable
},
PUPPETEER_BINPROVIDER.name: {
'packages': lambda: ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
},
PLAYWRIGHT_BINPROVIDER.name: {
'packages': lambda: ['chromium'], # playwright install chromium
},
}
@staticmethod
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
if not (binary.abspath and binary.abspath.exists()):
return
bin_dir.mkdir(parents=True, exist_ok=True)
symlink = bin_dir / binary.name
if platform.system().lower() == 'darwin':
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
create_macos_app_symlink(binary.abspath, symlink)
else:
# otherwise on linux we can symlink directly to binary executable
symlink.symlink_to(binary.abspath)
CHROME_BINARY = ChromeBinary()
PLUGIN_BINARIES = [CHROME_BINARY]
class ChromePlugin(BasePlugin):
app_label: str = 'chrome'
verbose_name: str = 'Chrome Browser'
hooks: List[InstanceOf[BaseHook]] = [
CHROME_CONFIG,
CHROME_BINARY,
]
PLUGIN = ChromePlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -0,0 +1,153 @@
__package__ = 'archivebox.plugins_extractor.singlefile'
from pathlib import Path
from typing import List, Dict, Optional, ClassVar
# from typing_extensions import Self
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
from plugantic.base_extractor import BaseExtractor
from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.base.apps import ARCHIVING_CONFIG
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
class SinglefileToggleConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_TOGGLES'
SAVE_SINGLEFILE: bool = True
class SinglefileOptionsConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS'
SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
SINGLEFILE_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
class SinglefileDependencyConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
SINGLEFILE_BINARY: str = Field(default='wget')
SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None)
SINGLEFILE_EXTRA_ARGS: List[str] = []
SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs):
# section: ClassVar[ConfigSectionName] = 'ALL_CONFIGS'
pass
DEFAULT_GLOBAL_CONFIG = {
'CHECK_SSL_VALIDITY': False,
'SAVE_SINGLEFILE': True,
'TIMEOUT': 120,
}
SINGLEFILE_CONFIG = SinglefileConfigs(**DEFAULT_GLOBAL_CONFIG)
SINGLEFILE_MIN_VERSION = '1.1.54'
SINGLEFILE_MAX_VERSION = '1.1.60'
class SinglefileBinary(BaseBinary):
name: BinName = 'single-file'
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
env.name: {
'abspath': lambda:
bin_abspath('single-file', PATH=env.PATH) or bin_abspath('single-file-node.js', PATH=env.PATH),
},
LIB_NPM_BINPROVIDER.name: {
"abspath": lambda:
bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH) or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
"packages": lambda:
[f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
},
SYS_NPM_BINPROVIDER.name: {
"packages": lambda:
[], # prevent modifying system global npm packages
},
}
@validate_call
def install(self, binprovider_name: Optional[BinProviderName]=None) -> ShallowBinary:
# force install to only use lib/npm provider, we never want to modify global NPM packages
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name)
@validate_call
def load_or_install(self, binprovider_name: Optional[BinProviderName] = None) -> ShallowBinary:
# force install to only use lib/npm provider, we never want to modify global NPM packages
try:
return self.load()
except Exception:
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name)
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = PLUGANTIC_DIR / 'ansible' / 'install_singlefile.yml'
# singlefile_bin = run_playbook(install_playbook, data_dir=settings.CONFIG.OUTPUT_DIR, quiet=quiet).BINARIES.singlefile
# return self.__class__.model_validate(
# {
# **self.model_dump(),
# "loaded_abspath": singlefile_bin.abspath,
# "loaded_version": singlefile_bin.version,
# "loaded_binprovider": env,
# "binproviders_supported": self.binproviders_supported,
# }
# )
SINGLEFILE_BINARY = SinglefileBinary()
PLUGIN_BINARIES = [SINGLEFILE_BINARY]
class SinglefileExtractor(BaseExtractor):
name: str = 'singlefile'
binary: BinName = SINGLEFILE_BINARY.name
def get_output_path(self, snapshot) -> Path:
return Path(snapshot.link_dir) / 'singlefile.html'
SINGLEFILE_BINARY = SinglefileBinary()
SINGLEFILE_EXTRACTOR = SinglefileExtractor()
class SinglefileQueue(BaseQueue):
name: str = 'singlefile'
binaries: List[InstanceOf[BaseBinary]] = [SINGLEFILE_BINARY]
SINGLEFILE_QUEUE = SinglefileQueue()
class SinglefilePlugin(BasePlugin):
app_label: str ='singlefile'
verbose_name: str = 'SingleFile'
hooks: List[InstanceOf[BaseHook]] = [
SINGLEFILE_CONFIG,
SINGLEFILE_BINARY,
SINGLEFILE_EXTRACTOR,
SINGLEFILE_QUEUE,
]
PLUGIN = SinglefilePlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -0,0 +1,26 @@
# Generated by Django 5.1.1 on 2024-09-10 05:05
from django.db import migrations
class Migration(migrations.Migration):
initial = True
dependencies = [
('core', '0074_alter_snapshot_downloaded_at'),
]
operations = [
migrations.CreateModel(
name='SinglefileResult',
fields=[
],
options={
'proxy': True,
'indexes': [],
'constraints': [],
},
bases=('core.archiveresult',),
),
]

View file

@ -0,0 +1,14 @@
from django.db import models
from core.models import ArchiveResult
class SinglefileResultManager(models.Manager):
def get_queryset(self):
return super().get_queryset().filter(extractor='singlefile')
class SinglefileResult(ArchiveResult):
objects = SinglefileResultManager()
class Meta:
proxy = True

View file

@ -0,0 +1,40 @@
__package__ = 'archivebox.queues'
import time
from django.core.cache import cache
from huey import crontab
from django_huey import db_task, on_startup, db_periodic_task
from huey_monitor.models import TaskModel
from huey_monitor.tqdm import ProcessInfo
@db_task(queue="singlefile", context=True)
def extract(url, out_dir, config, task=None, parent_task_id=None):
if task and parent_task_id:
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
process_info = ProcessInfo(task, desc="extract_singlefile", parent_task_id=parent_task_id, total=1)
time.sleep(5)
process_info.update(n=1)
return {'output': 'singlefile.html', 'status': 'succeeded'}
# @on_startup(queue='singlefile')
# def start_singlefile_queue():
# print("[+] Starting singlefile worker...")
# update_version.call_local()
# @db_periodic_task(crontab(minute='*/5'), queue='singlefile')
# def update_version():
# print('[*] Updating singlefile version... 5 minute interval')
# from django.conf import settings
# bin = settings.BINARIES.SinglefileBinary.load()
# if bin.version:
# cache.set(f"bin:abspath:{bin.name}", bin.abspath)
# cache.set(f"bin:version:{bin.name}:{bin.abspath}", bin.version)
# print('[√] Updated singlefile version:', bin.version, bin.abspath)

View file

@ -0,0 +1,80 @@
from typing import List, Dict, ClassVar
from subprocess import run, PIPE
from pydantic import InstanceOf, Field
from django.conf import settings
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, apt, brew
from plugantic.base_hook import BaseHook
from plugins_pkg.pip.apps import pip
###################### Config ##########################
class YtdlpDependencyConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_YTDLP: bool = True
YTDLP_BINARY: str = Field(default='yt-dlp')
DEFAULT_GLOBAL_CONFIG = {}
YTDLP_CONFIG = YtdlpDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
class YtdlpBinary(BaseBinary):
name: BinName = YTDLP_CONFIG.YTDLP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [pip, apt, brew, env]
class FfmpegBinary(BaseBinary):
name: BinName = 'ffmpeg'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
'env': {
# 'abspath': lambda: shutil.which('ffmpeg', PATH=env.PATH),
# 'version': lambda: run(['ffmpeg', '-version'], stdout=PIPE, stderr=PIPE, text=True).stdout,
},
'apt': {
# 'abspath': lambda: shutil.which('ffmpeg', PATH=apt.PATH),
'version': lambda: run(['apt', 'show', 'ffmpeg'], stdout=PIPE, stderr=PIPE, text=True).stdout,
},
'brew': {
# 'abspath': lambda: shutil.which('ffmpeg', PATH=brew.PATH),
'version': lambda: run(['brew', 'info', 'ffmpeg', '--quiet'], stdout=PIPE, stderr=PIPE, text=True).stdout,
},
}
# def get_ffmpeg_version(self) -> Optional[str]:
# return self.exec(cmd=['-version']).stdout
YTDLP_BINARY = YtdlpBinary()
FFMPEG_BINARY = FfmpegBinary()
# class YtdlpExtractor(BaseExtractor):
# name: str = 'ytdlp'
# binary: str = 'ytdlp'
class YtdlpPlugin(BasePlugin):
app_label: str = 'ytdlp'
verbose_name: str = 'YT-DLP'
docs_url: str = 'https://github.com/yt-dlp/yt-dlp'
hooks: List[InstanceOf[BaseHook]] = [
YTDLP_CONFIG,
YTDLP_BINARY,
FFMPEG_BINARY,
]
PLUGIN = YtdlpPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig