split plugin dirs, created new cleaner import path for plugin config in settings.py

This commit is contained in:
Nick Sweeting 2024-09-24 01:25:55 -07:00
parent 1a58967e8c
commit a9a97c013d
No known key found for this signature in database
39 changed files with 469 additions and 199 deletions

View file

View file

@ -0,0 +1,86 @@
__package__ = 'archivebox.pkg_plugins.npm'
from pathlib import Path
from typing import List, Optional
from django.conf import settings
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook
from ...config import CONFIG
###################### Config ##########################
class NpmDependencyConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# USE_NPM: bool = True
# NPM_BINARY: str = Field(default='npm')
# NPM_ARGS: Optional[List[str]] = Field(default=None)
# NPM_EXTRA_ARGS: List[str] = []
# NPM_DEFAULT_ARGS: List[str] = []
pass
DEFAULT_GLOBAL_CONFIG = {
}
NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
class SystemNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "sys_npm"
PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
npm_prefix: Optional[Path] = None
class LibNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
npm_prefix: Optional[Path] = settings.CONFIG.LIB_DIR / 'npm'
SYS_NPM_BINPROVIDER = SystemNpmProvider()
LIB_NPM_BINPROVIDER = LibNpmProvider()
npm = LIB_NPM_BINPROVIDER
class NpmBinary(BaseBinary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
NPM_BINARY = NpmBinary()
class NodeBinary(BaseBinary):
name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
NODE_BINARY = NodeBinary()
class NpmPlugin(BasePlugin):
app_label: str = 'npm'
verbose_name: str = 'NPM'
hooks: List[InstanceOf[BaseHook]] = [
NPM_CONFIG,
SYS_NPM_BINPROVIDER,
LIB_NPM_BINPROVIDER,
NODE_BINARY,
NPM_BINARY,
]
PLUGIN = NpmPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

View file

@ -0,0 +1,198 @@
import os
import sys
import inspect
from pathlib import Path
from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field
import django
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags
from django.conf import settings
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_check import BaseCheck
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook
###################### Config ##########################
class PipDependencyConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_PIP: bool = True
PIP_BINARY: str = Field(default='pip')
PIP_ARGS: Optional[List[str]] = Field(default=None)
PIP_EXTRA_ARGS: List[str] = []
PIP_DEFAULT_ARGS: List[str] = []
DEFAULT_GLOBAL_CONFIG = {
}
PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
class SystemPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "sys_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = None # global pip scope
def on_install(self, bin_name: str, **kwargs):
# never modify system pip packages
return 'refusing to install packages globally with system pip, use a venv instead'
class SystemPipxBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "pipx"
INSTALLER_BIN: BinName = "pipx"
pip_venv: Optional[Path] = None # global pipx scope
class VenvPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv')
class LibPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv'
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
VENV_PIP_BINPROVIDER = VenvPipBinProvider()
LIB_PIP_BINPROVIDER = LibPipBinProvider()
pip = LIB_PIP_BINPROVIDER
class PythonBinary(BaseBinary):
name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
SYS_PIP_BINPROVIDER.name: {
'abspath': lambda:
sys.executable,
'version': lambda:
'{}.{}.{}'.format(*sys.version_info[:3]),
},
}
PYTHON_BINARY = PythonBinary()
class SqliteBinary(BaseBinary):
name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: Path(inspect.getfile(django_sqlite3)),
"version": lambda: SemVer(django_sqlite3.version),
},
SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: Path(inspect.getfile(django_sqlite3)),
"version": lambda: SemVer(django_sqlite3.version),
},
}
SQLITE_BINARY = SqliteBinary()
class DjangoBinary(BaseBinary):
name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: inspect.getfile(django),
"version": lambda: django.VERSION[:3],
},
SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: inspect.getfile(django),
"version": lambda: django.VERSION[:3],
},
}
DJANGO_BINARY = DjangoBinary()
class PipBinary(BaseBinary):
name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
PIP_BINARY = PipBinary()
class CheckUserIsNotRoot(BaseCheck):
label: str = 'CheckUserIsNotRoot'
tag: str = Tags.database
@staticmethod
def check(settings, logger) -> List[Warning]:
errors = []
if getattr(settings, "USER", None) == 'root' or getattr(settings, "PUID", None) == 0:
errors.append(
Error(
"Cannot run as root!",
id="core.S001",
hint=f'Run ArchiveBox as a non-root user with a UID greater than 500. (currently running as UID {os.getuid()}).',
)
)
logger.debug('[√] UID is not root')
return errors
class CheckPipEnvironment(BaseCheck):
label: str = "CheckPipEnvironment"
tag: str = Tags.database
@staticmethod
def check(settings, logger) -> List[Warning]:
errors = []
LIB_PIP_BINPROVIDER.setup()
if not LIB_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH:
errors.append(
Error(
"Failed to setup data/lib/pip virtualenv for runtime dependencies!",
id="pip.P001",
hint="Make sure the data dir is writable and make sure python3-pip and python3-venv are installed & available on the host.",
)
)
logger.debug("[√] CheckPipEnvironment: data/lib/pip virtualenv is setup properly")
return errors
USER_IS_NOT_ROOT_CHECK = CheckUserIsNotRoot()
PIP_ENVIRONMENT_CHECK = CheckPipEnvironment()
class PipPlugin(BasePlugin):
app_label: str = 'pip'
verbose_name: str = 'PIP'
hooks: List[InstanceOf[BaseHook]] = [
PIP_CONFIG,
SYS_PIP_BINPROVIDER,
PIPX_PIP_BINPROVIDER,
VENV_PIP_BINPROVIDER,
LIB_PIP_BINPROVIDER,
PIP_BINARY,
PYTHON_BINARY,
SQLITE_BINARY,
DJANGO_BINARY,
USER_IS_NOT_ROOT_CHECK,
PIP_ENVIRONMENT_CHECK,
]
PLUGIN = PipPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -0,0 +1,181 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, computed_field, Field
from pydantic_pkgr import (
BinName,
BinProvider,
BinProviderName,
ProviderLookupDict,
InstallArgs,
PATHStr,
HostBinPath,
bin_abspath,
OPERATING_SYSTEM,
DEFAULT_ENV_PATH,
)
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
from pkg_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
###################### Config ##########################
class PlaywrightConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# PLAYWRIGHT_BINARY: str = Field(default='wget')
# PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
# PLAYWRIGHT_EXTRA_ARGS: List[str] = []
# PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
pass
DEFAULT_GLOBAL_CONFIG = {
}
PLAYWRIGHT_CONFIG = PlaywrightConfigs(**DEFAULT_GLOBAL_CONFIG)
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
class PlaywrightBinary(BaseBinary):
name: BinName = "playwright"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
PLAYWRIGHT_BINARY = PlaywrightBinary()
class PlaywrightBinProvider(BaseBinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
PATH: PATHStr = f"{settings.CONFIG.BIN_DIR}:{DEFAULT_ENV_PATH}"
puppeteer_browsers_dir: Optional[Path] = (
Path("~/Library/Caches/ms-playwright").expanduser()
if OPERATING_SYSTEM == "darwin" else
Path("~/.cache/ms-playwright").expanduser()
)
puppeteer_install_args: List[str] = ["install"] # --with-deps
packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda: ["chromium"],
}, exclude=True)
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
@computed_field
@property
def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
return PLAYWRIGHT_BINARY.load().abspath
def setup(self) -> None:
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
if self.puppeteer_browsers_dir:
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
if browser_name == 'chrome':
browser_name = 'chromium'
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
if platform.system().lower() == "darwin":
# ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
return sorted(
self.puppeteer_browsers_dir.glob(
f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
)
)
# ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}-*/*-linux/*"))
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
# already loaded, return abspath from cache
if bin_name in self._browser_abspaths:
return self._browser_abspaths[bin_name]
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
if matching_bins:
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
self._browser_abspaths[bin_name] = newest_bin
return self._browser_abspaths[bin_name]
# playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
if abspath:
self._browser_abspaths[bin_name] = abspath
return self._browser_abspaths[bin_name]
return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""playwright install chrome"""
self.setup()
assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
if not self.INSTALLER_BIN_ABSPATH:
raise Exception(
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
)
packages = packages or self.on_get_packages(bin_name)
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
install_args = [*self.puppeteer_install_args]
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
if proc.returncode != 0:
print(proc.stdout.strip())
print(proc.stderr.strip())
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
output_info = proc.stdout.strip().split("\n")[-1]
browser_abspath = output_info.split(" ", 1)[-1]
# browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0]
self._browser_abspaths[bin_name] = Path(browser_abspath)
return proc.stderr.strip() + "\n" + proc.stdout.strip()
PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()
class PlaywrightPlugin(BasePlugin):
app_label: str = 'playwright'
verbose_name: str = 'Playwright (PIP)'
hooks: List[InstanceOf[BaseHook]] = [
PLAYWRIGHT_CONFIG,
PLAYWRIGHT_BINPROVIDER,
PLAYWRIGHT_BINARY,
]
PLUGIN = PlaywrightPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -0,0 +1,169 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
InstallArgs,
PATHStr,
HostBinPath,
)
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
# Depends on Other Plugins:
from pkg_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
###################### Config ##########################
class PuppeteerConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# PUPPETEER_BINARY: str = Field(default='wget')
# PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
# PUPPETEER_EXTRA_ARGS: List[str] = []
# PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
pass
DEFAULT_GLOBAL_CONFIG = {
}
PUPPETEER_CONFIG = PuppeteerConfigs(**DEFAULT_GLOBAL_CONFIG)
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
class PuppeteerBinary(BaseBinary):
name: BinName = "puppeteer"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
PUPPETEER_BINARY = PuppeteerBinary()
class PuppeteerBinProvider(BaseBinProvider):
name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx"
PATH: PATHStr = str(settings.CONFIG.BIN_DIR)
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda:
['chrome@stable'],
}, exclude=True)
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
def setup(self) -> None:
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
if self.puppeteer_browsers_dir:
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
if platform.system().lower() == 'darwin':
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
# already loaded, return abspath from cache
if bin_name in self._browser_abspaths:
return self._browser_abspaths[bin_name]
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
if matching_bins:
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
self._browser_abspaths[bin_name] = newest_bin
return self._browser_abspaths[bin_name]
return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""npx @puppeteer/browsers install chrome@stable"""
self.setup()
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
if not self.INSTALLER_BIN_ABSPATH:
raise Exception(
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
)
packages = packages or self.on_get_packages(bin_name)
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
install_args = [*self.puppeteer_install_args]
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
if proc.returncode != 0:
print(proc.stdout.strip())
print(proc.stderr.strip())
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
output_info = proc.stdout.strip().split('\n')[-1]
browser_abspath = output_info.split(' ', 1)[-1]
# browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0]
self._browser_abspaths[bin_name] = Path(browser_abspath)
return proc.stderr.strip() + "\n" + proc.stdout.strip()
PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
# chrome_bin = run_playbook(install_playbook, data_dir=settings.CONFIG.OUTPUT_DIR, quiet=quiet).BINARIES.chrome
# return self.__class__.model_validate(
# {
# **self.model_dump(),
# "loaded_abspath": chrome_bin.symlink,
# "loaded_version": chrome_bin.version,
# "loaded_binprovider": env,
# "binproviders_supported": self.binproviders_supported,
# }
# )
class PuppeteerPlugin(BasePlugin):
app_label: str ='puppeteer'
verbose_name: str = 'Puppeteer (NPM)'
hooks: List[InstanceOf[BaseHook]] = [
PUPPETEER_CONFIG,
PUPPETEER_BINPROVIDER,
PUPPETEER_BINARY,
]
PLUGIN = PuppeteerPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig