diff --git a/archivebox/builtin_plugins/chrome/__init__.py b/archivebox/builtin_plugins/chrome/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/builtin_plugins/chrome/apps.py b/archivebox/builtin_plugins/chrome/apps.py new file mode 100644 index 00000000..d24e968b --- /dev/null +++ b/archivebox/builtin_plugins/chrome/apps.py @@ -0,0 +1,132 @@ +import platform +from pathlib import Path +from typing import List, Optional, Dict + +from django.conf import settings + +# Depends on other PyPI/vendor packages: +from pydantic import InstanceOf, Field +from pydantic_pkgr import ( + BinProvider, + BinName, + BinProviderName, + ProviderLookupDict, + bin_abspath, +) + +# Depends on other Django apps: +from plugantic.base_plugin import BasePlugin +from plugantic.base_configset import BaseConfigSet, ConfigSectionName +from plugantic.base_binary import BaseBinary, env +# from plugantic.base_extractor import BaseExtractor +# from plugantic.base_queue import BaseQueue +from plugantic.base_hook import BaseHook + +# Depends on Other Plugins: +from builtin_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER +from builtin_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER + + +CHROMIUM_BINARY_NAMES = [ + "chromium", + "chromium-browser", + "chromium-browser-beta", + "chromium-browser-unstable", + "chromium-browser-canary", + "chromium-browser-dev", + "/Applications/Chromium.app/Contents/MacOS/Chromium", +] +CHROME_BINARY_NAMES = [ + "google-chrome", + "google-chrome-stable", + "google-chrome-beta", + "google-chrome-canary", + "google-chrome-unstable", + "google-chrome-dev", + # 'chrome', + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", +] + + +def autodetect_system_chrome_install(PATH=None) -> Optional[Path]: + for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES: + abspath = bin_abspath(bin_name, PATH=env.PATH) + if abspath: + return abspath + return None + +###################### Config ########################## + + +class ChromeDependencyConfigs(BaseConfigSet): + section: ConfigSectionName = 'DEPENDENCY_CONFIG' + + CHROME_BINARY: str = Field(default='wget') + CHROME_ARGS: Optional[List[str]] = Field(default=None) + CHROME_EXTRA_ARGS: List[str] = [] + CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] + +class ChromeConfigs(ChromeDependencyConfigs): + # section: ConfigSectionName = 'ALL_CONFIGS' + pass + +DEFAULT_GLOBAL_CONFIG = { +} + +CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG) + + +class ChromeBinary(BaseBinary): + name: BinName = 'chrome' + binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER] + + provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + env.name: { + 'abspath': lambda: + autodetect_system_chrome_install(PATH=env.PATH), + }, + PUPPETEER_BINPROVIDER.name: { + 'packages': lambda: + ['chrome@stable'], + }, + PLAYWRIGHT_BINPROVIDER.name: { + 'packages': lambda: + ['chromium'], + }, + } + + @staticmethod + def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None: + if not (binary.abspath and binary.abspath.exists()): + return + bin_dir.mkdir(parents=True, exist_ok=True) + symlink = bin_dir / binary.name + + if platform.system().lower() == 'darwin': + # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink + symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""") + symlink.chmod(0o777) # make sure its executable by everyone + else: + # otherwise on linux we can symlink directly to binary executable + symlink.symlink_to(binary.abspath) + + +CHROME_BINARY = ChromeBinary() + +PLUGIN_BINARIES = [CHROME_BINARY] + +class ChromePlugin(BasePlugin): + app_label: str ='puppeteer' + verbose_name: str = 'Chrome & Playwright' + + hooks: List[InstanceOf[BaseHook]] = [ + CHROME_CONFIG, + CHROME_BINARY, + ] + + + +PLUGIN = ChromePlugin() +PLUGIN.register(settings) +DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/builtin_plugins/npm/apps.py b/archivebox/builtin_plugins/npm/apps.py index c6c43660..44e54428 100644 --- a/archivebox/builtin_plugins/npm/apps.py +++ b/archivebox/builtin_plugins/npm/apps.py @@ -4,12 +4,12 @@ from pathlib import Path from typing import List, Optional from django.conf import settings -from pydantic import InstanceOf, Field +from pydantic import InstanceOf from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName +from plugantic.base_configset import BaseConfigSet from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from plugantic.base_hook import BaseHook @@ -20,13 +20,14 @@ from ...config import CONFIG class NpmDependencyConfigs(BaseConfigSet): - section: ConfigSectionName = 'DEPENDENCY_CONFIG' + # section: ConfigSectionName = 'DEPENDENCY_CONFIG' - USE_NPM: bool = True - NPM_BINARY: str = Field(default='npm') - NPM_ARGS: Optional[List[str]] = Field(default=None) - NPM_EXTRA_ARGS: List[str] = [] - NPM_DEFAULT_ARGS: List[str] = [] + # USE_NPM: bool = True + # NPM_BINARY: str = Field(default='npm') + # NPM_ARGS: Optional[List[str]] = Field(default=None) + # NPM_EXTRA_ARGS: List[str] = [] + # NPM_DEFAULT_ARGS: List[str] = [] + pass DEFAULT_GLOBAL_CONFIG = { @@ -35,7 +36,7 @@ NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG) class SystemNpmProvider(NpmProvider, BaseBinProvider): - name: BinProviderName = "npm" + name: BinProviderName = "sys_npm" PATH: PATHStr = str(CONFIG.NODE_BIN_PATH) npm_prefix: Optional[Path] = None diff --git a/archivebox/builtin_plugins/pip/apps.py b/archivebox/builtin_plugins/pip/apps.py index a0b661c7..b339f247 100644 --- a/archivebox/builtin_plugins/pip/apps.py +++ b/archivebox/builtin_plugins/pip/apps.py @@ -30,6 +30,7 @@ class PipDependencyConfigs(BaseConfigSet): PIP_ARGS: Optional[List[str]] = Field(default=None) PIP_EXTRA_ARGS: List[str] = [] PIP_DEFAULT_ARGS: List[str] = [] + DEFAULT_GLOBAL_CONFIG = { @@ -37,15 +38,27 @@ DEFAULT_GLOBAL_CONFIG = { PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG) class SystemPipBinProvider(PipProvider, BaseBinProvider): - name: BinProviderName = "pip" + name: BinProviderName = "sys_pip" INSTALLER_BIN: BinName = "pip" pip_venv: Optional[Path] = None # global pip scope + def on_install(self, bin_name: str, **kwargs): + # never modify system pip packages + return 'refusing to install packages globally with system pip, use a venv instead' class SystemPipxBinProvider(PipProvider, BaseBinProvider): name: BinProviderName = "pipx" INSTALLER_BIN: BinName = "pipx" + + pip_venv: Optional[Path] = None # global pipx scope + + +class VenvPipBinProvider(PipProvider, BaseBinProvider): + name: BinProviderName = "venv_pip" + INSTALLER_BIN: BinName = "pip" + + pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv') class LibPipBinProvider(PipProvider, BaseBinProvider): @@ -55,7 +68,8 @@ class LibPipBinProvider(PipProvider, BaseBinProvider): pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv' SYS_PIP_BINPROVIDER = SystemPipBinProvider() -SYS_PIPX_BINPROVIDER = SystemPipxBinProvider() +PIPX_PIP_BINPROVIDER = SystemPipxBinProvider() +VENV_PIP_BINPROVIDER = VenvPipBinProvider() LIB_PIP_BINPROVIDER = LibPipBinProvider() pip = LIB_PIP_BINPROVIDER @@ -64,7 +78,7 @@ pip = LIB_PIP_BINPROVIDER class PythonBinary(BaseBinary): name: BinName = 'python' - binproviders_supported: List[InstanceOf[BinProvider]] = [SYS_PIP_BINPROVIDER, apt, brew, env] + binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { SYS_PIP_BINPROVIDER.name: { 'abspath': lambda: @@ -78,13 +92,15 @@ PYTHON_BINARY = PythonBinary() class SqliteBinary(BaseBinary): name: BinName = 'sqlite' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER]) - provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + VENV_PIP_BINPROVIDER.name: { + "abspath": lambda: Path(inspect.getfile(django_sqlite3)), + "version": lambda: SemVer(django_sqlite3.version), + }, SYS_PIP_BINPROVIDER.name: { - 'abspath': lambda: - Path(inspect.getfile(django_sqlite3)), - 'version': lambda: - SemVer(django_sqlite3.version), + "abspath": lambda: Path(inspect.getfile(django_sqlite3)), + "version": lambda: SemVer(django_sqlite3.version), }, } @@ -94,13 +110,15 @@ SQLITE_BINARY = SqliteBinary() class DjangoBinary(BaseBinary): name: BinName = 'django' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER]) - provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + VENV_PIP_BINPROVIDER.name: { + "abspath": lambda: inspect.getfile(django), + "version": lambda: django.VERSION[:3], + }, SYS_PIP_BINPROVIDER.name: { - 'abspath': lambda: - inspect.getfile(django), - 'version': lambda: - django.VERSION[:3], + "abspath": lambda: inspect.getfile(django), + "version": lambda: django.VERSION[:3], }, } @@ -108,7 +126,7 @@ DJANGO_BINARY = DjangoBinary() class PipBinary(BaseBinary): name: BinName = "pip" - binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] + binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] PIP_BINARY = PipBinary() @@ -164,7 +182,8 @@ class PipPlugin(BasePlugin): hooks: List[InstanceOf[BaseHook]] = [ PIP_CONFIG, SYS_PIP_BINPROVIDER, - SYS_PIPX_BINPROVIDER, + PIPX_PIP_BINPROVIDER, + VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, PIP_BINARY, PYTHON_BINARY, diff --git a/archivebox/builtin_plugins/playwright/__init__.py b/archivebox/builtin_plugins/playwright/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/builtin_plugins/playwright/apps.py b/archivebox/builtin_plugins/playwright/apps.py new file mode 100644 index 00000000..f7bf1351 --- /dev/null +++ b/archivebox/builtin_plugins/playwright/apps.py @@ -0,0 +1,182 @@ +import platform +from pathlib import Path +from typing import List, Optional, Dict, ClassVar + +from django.conf import settings + +# Depends on other PyPI/vendor packages: +from pydantic import InstanceOf, computed_field, Field +from pydantic_pkgr import ( + BinName, + BinProvider, + BinProviderName, + ProviderLookupDict, + InstallArgs, + PATHStr, + HostBinPath, + bin_abspath, + OPERATING_SYSTEM, + DEFAULT_ENV_PATH, +) + +# Depends on other Django apps: +from plugantic.base_plugin import BasePlugin +from plugantic.base_configset import BaseConfigSet +from plugantic.base_binary import BaseBinary, BaseBinProvider, env +# from plugantic.base_extractor import BaseExtractor +# from plugantic.base_queue import BaseQueue +from plugantic.base_hook import BaseHook + +# Depends on Other Plugins: +from builtin_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER + + +###################### Config ########################## + + +class PlaywrightConfigs(BaseConfigSet): + # section: ConfigSectionName = 'DEPENDENCY_CONFIG' + + # PLAYWRIGHT_BINARY: str = Field(default='wget') + # PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None) + # PLAYWRIGHT_EXTRA_ARGS: List[str] = [] + # PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] + pass + +DEFAULT_GLOBAL_CONFIG = { +} + +PLAYWRIGHT_CONFIG = PlaywrightConfigs(**DEFAULT_GLOBAL_CONFIG) + +LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers" + + + +class PlaywrightBinary(BaseBinary): + name: BinName = "playwright" + + binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env] + + + +PLAYWRIGHT_BINARY = PlaywrightBinary() + + +class PlaywrightBinProvider(BaseBinProvider): + name: BinProviderName = "playwright" + INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name + + PATH: PATHStr = f"{settings.CONFIG.BIN_DIR}:{DEFAULT_ENV_PATH}" + + puppeteer_browsers_dir: Optional[Path] = ( + Path("~/Library/Caches/ms-playwright").expanduser() + if OPERATING_SYSTEM == "darwin" else + Path("~/.cache/ms-playwright").expanduser() + ) + puppeteer_install_args: List[str] = ["install"] # --with-deps + + packages_handler: ProviderLookupDict = Field(default={ + "chrome": lambda: ["chromium"], + }, exclude=True) + + _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} + + @computed_field + @property + def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None: + return PLAYWRIGHT_BINARY.load().abspath + + def setup(self) -> None: + assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized" + + if self.puppeteer_browsers_dir: + self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True) + + def installed_browser_bins(self, browser_name: str = "*") -> List[Path]: + if browser_name == 'chrome': + browser_name = 'chromium' + + # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary + if platform.system().lower() == "darwin": + # ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium + return sorted( + self.puppeteer_browsers_dir.glob( + f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*" + ) + ) + + # ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium + return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}-*/*-linux/*")) + + def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]: + assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently." + + # already loaded, return abspath from cache + if bin_name in self._browser_abspaths: + return self._browser_abspaths[bin_name] + + # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries + matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)] + if matching_bins: + newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number + self._browser_abspaths[bin_name] = newest_bin + return self._browser_abspaths[bin_name] + + # playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well + abspath = bin_abspath('google-chrome-stable', PATH=env.PATH) + if abspath: + self._browser_abspaths[bin_name] = abspath + return self._browser_abspaths[bin_name] + + return None + + def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str: + """playwright install chrome""" + self.setup() + assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently." + + if not self.INSTALLER_BIN_ABSPATH: + raise Exception( + f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)" + ) + packages = packages or self.on_get_packages(bin_name) + + # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}') + + install_args = [*self.puppeteer_install_args] + + proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages]) + + if proc.returncode != 0: + print(proc.stdout.strip()) + print(proc.stderr.strip()) + raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}") + + # chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing + output_info = proc.stdout.strip().split("\n")[-1] + browser_abspath = output_info.split(" ", 1)[-1] + # browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0] + + self._browser_abspaths[bin_name] = Path(browser_abspath) + + return proc.stderr.strip() + "\n" + proc.stdout.strip() + +PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider() + + + +class PlaywrightPlugin(BasePlugin): + app_label: str = 'playwright' + verbose_name: str = 'Playwright' + + hooks: List[InstanceOf[BaseHook]] = [ + PLAYWRIGHT_CONFIG, + PLAYWRIGHT_BINPROVIDER, + PLAYWRIGHT_BINARY, + ] + + + +PLUGIN = PlaywrightPlugin() +PLUGIN.register(settings) +DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/builtin_plugins/puppeteer/apps.py b/archivebox/builtin_plugins/puppeteer/apps.py index ac3465d5..a7e84e7a 100644 --- a/archivebox/builtin_plugins/puppeteer/apps.py +++ b/archivebox/builtin_plugins/puppeteer/apps.py @@ -6,33 +6,38 @@ from django.conf import settings # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field -from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, InstallArgs, HostBinPath, bin_abspath +from pydantic_pkgr import ( + BinProvider, + BinName, + BinProviderName, + ProviderLookupDict, + InstallArgs, + PATHStr, + HostBinPath, +) # Depends on other Django apps: from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName +from plugantic.base_configset import BaseConfigSet from plugantic.base_binary import BaseBinary, BaseBinProvider, env # from plugantic.base_extractor import BaseExtractor # from plugantic.base_queue import BaseQueue from plugantic.base_hook import BaseHook # Depends on Other Plugins: -from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER +from builtin_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER ###################### Config ########################## -class PuppeteerDependencyConfigs(BaseConfigSet): - section: ConfigSectionName = 'DEPENDENCY_CONFIG' +class PuppeteerConfigs(BaseConfigSet): + # section: ConfigSectionName = 'DEPENDENCY_CONFIG' - PUPPETEER_BINARY: str = Field(default='wget') - PUPPETEER_ARGS: Optional[List[str]] = Field(default=None) - PUPPETEER_EXTRA_ARGS: List[str] = [] - PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] - -class PuppeteerConfigs(PuppeteerDependencyConfigs): - # section: ConfigSectionName = 'ALL_CONFIGS' + # PUPPETEER_BINARY: str = Field(default='wget') + # PUPPETEER_ARGS: Optional[List[str]] = Field(default=None) + # PUPPETEER_EXTRA_ARGS: List[str] = [] + # PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] pass DEFAULT_GLOBAL_CONFIG = { @@ -42,17 +47,29 @@ PUPPETEER_CONFIG = PuppeteerConfigs(**DEFAULT_GLOBAL_CONFIG) LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers" + +class PuppeteerBinary(BaseBinary): + name: BinName = "puppeteer" + + binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] + + +PUPPETEER_BINARY = PuppeteerBinary() + + class PuppeteerBinProvider(BaseBinProvider): name: BinProviderName = "puppeteer" INSTALLER_BIN: BinName = "npx" + + PATH: PATHStr = str(settings.CONFIG.BIN_DIR) puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)] - # packages_handler: ProviderLookupDict = { - # "chrome": lambda: - # ['chrome@stable'], - # } + packages_handler: ProviderLookupDict = Field(default={ + "chrome": lambda: + ['chrome@stable'], + }, exclude=True) _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} @@ -61,6 +78,15 @@ class PuppeteerBinProvider(BaseBinProvider): if self.puppeteer_browsers_dir: self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True) + + def installed_browser_bins(self, browser_name: str='*') -> List[Path]: + # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary + if platform.system().lower() == 'darwin': + # /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing + return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing')) + + # /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome + return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome")) def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]: assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.' @@ -70,21 +96,13 @@ class PuppeteerBinProvider(BaseBinProvider): return self._browser_abspaths[bin_name] # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries - browsers_present = [d.name for d in self.puppeteer_browsers_dir.glob("*")] - if bin_name in browsers_present: - candidates = [] - # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary - if platform.system().lower() == 'darwin': - # /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing - candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing')) - else: - # /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome - candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/linux*/chrome*/chrome')) - if candidates: - self._browser_abspaths[bin_name] = candidates[-1] - return self._browser_abspaths[bin_name] + matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)] + if matching_bins: + newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number + self._browser_abspaths[bin_name] = newest_bin + return self._browser_abspaths[bin_name] - return super().on_get_abspath(bin_name, **context) + return None def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str: """npx @puppeteer/browsers install chrome@stable""" @@ -119,64 +137,6 @@ class PuppeteerBinProvider(BaseBinProvider): PUPPETEER_BINPROVIDER = PuppeteerBinProvider() -CHROMIUM_BINARY_NAMES = [ - 'chromium', - 'chromium-browser', - 'chromium-browser-beta', - 'chromium-browser-unstable', - 'chromium-browser-canary', - 'chromium-browser-dev' - '/Applications/Chromium.app/Contents/MacOS/Chromium', -] -CHROME_BINARY_NAMES = [ - 'google-chrome', - 'google-chrome-stable', - 'google-chrome-beta', - 'google-chrome-canary', - 'google-chrome-unstable', - 'google-chrome-dev', - # 'chrome', - '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', - '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary', -] - -def autodetect_system_chrome_install(PATH=None): - for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES: - abspath = bin_abspath(bin_name, PATH=env.PATH) - if abspath: - return abspath - return None - -class ChromeBinary(BaseBinary): - name: BinName = 'chrome' - binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env] - - provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { - env.name: { - 'abspath': lambda: - autodetect_system_chrome_install(PATH=env.PATH), - }, - PUPPETEER_BINPROVIDER.name: { - 'packages': lambda: - ['chrome@stable'], - } - } - - @staticmethod - def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None: - if not (binary.abspath and binary.abspath.exists()): - return - bin_dir.mkdir(parents=True, exist_ok=True) - symlink = bin_dir / binary.name - - if platform.system().lower() == 'darwin': - # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink - symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""") - symlink.chmod(0o777) # make sure its executable by everyone - else: - # otherwise on linux we can symlink directly to binary executable - symlink.symlink_to(binary.abspath) - # ALTERNATIVE INSTALL METHOD using Ansible: # install_playbook = self.plugin_dir / 'install_puppeteer.yml' @@ -192,18 +152,14 @@ class ChromeBinary(BaseBinary): # ) -CHROME_BINARY = ChromeBinary() - -PLUGIN_BINARIES = [CHROME_BINARY] - class PuppeteerPlugin(BasePlugin): app_label: str ='puppeteer' - verbose_name: str = 'SingleFile' + verbose_name: str = 'Puppeteer & Playwright' hooks: List[InstanceOf[BaseHook]] = [ PUPPETEER_CONFIG, PUPPETEER_BINPROVIDER, - CHROME_BINARY, + PUPPETEER_BINARY, ] diff --git a/archivebox/plugantic/base_hook.py b/archivebox/plugantic/base_hook.py index 3622c3bb..12de56f5 100644 --- a/archivebox/plugantic/base_hook.py +++ b/archivebox/plugantic/base_hook.py @@ -4,7 +4,7 @@ import inspect from huey.api import TaskWrapper from pathlib import Path -from typing import List, Literal +from typing import List, Literal, ClassVar from pydantic import BaseModel, ConfigDict, Field, computed_field diff --git a/archivebox/plugantic/views.py b/archivebox/plugantic/views.py index de14043b..79146a88 100644 --- a/archivebox/plugantic/views.py +++ b/archivebox/plugantic/views.py @@ -70,7 +70,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: "Provided By": [], "Found Abspath": [], "Related Configuration": [], - "Overrides": [], + # "Overrides": [], # "Description": [], } @@ -109,7 +109,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: ))) # if not binary.provider_overrides: # import ipdb; ipdb.set_trace() - rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200]) + # rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200]) # rows['Description'].append(binary.description) return TableContext( diff --git a/pyproject.toml b/pyproject.toml index 1362a1f7..3cbcf733 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dependencies = [ ############# VENDORED LIBS ###################### # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian) # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py - "pydantic-pkgr>=0.3.0", + "pydantic-pkgr>=0.3.2", "atomicwrites==1.4.1", "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "django-taggit==1.3.0",