mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00

Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
128 lines
5.5 KiB
Python
128 lines
5.5 KiB
Python
__package__ = 'plugins_pkg.puppeteer'
|
|
|
|
import os
|
|
import platform
|
|
from pathlib import Path
|
|
from typing import List, Optional, Dict, ClassVar
|
|
|
|
from pydantic import Field
|
|
from pydantic_pkgr import (
|
|
BinName,
|
|
BinProviderName,
|
|
BinProviderOverrides,
|
|
InstallArgs,
|
|
PATHStr,
|
|
HostBinPath,
|
|
)
|
|
|
|
from archivebox.config import CONSTANTS
|
|
from archivebox.config.permissions import ARCHIVEBOX_USER
|
|
|
|
from abx.archivebox.base_binary import BaseBinProvider
|
|
|
|
from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
|
|
|
|
|
|
LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
|
|
|
|
|
|
class PuppeteerBinProvider(BaseBinProvider):
|
|
name: BinProviderName = "puppeteer"
|
|
INSTALLER_BIN: BinName = "npx"
|
|
|
|
PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR)
|
|
|
|
euid: Optional[int] = ARCHIVEBOX_USER
|
|
|
|
puppeteer_browsers_dir: Path = LIB_DIR_BROWSERS
|
|
puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
|
|
|
|
packages_handler: BinProviderOverrides = Field(default={
|
|
"chrome": lambda:
|
|
['chrome@stable'],
|
|
}, exclude=True)
|
|
|
|
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
|
|
|
def setup(self) -> None:
|
|
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
|
|
|
|
if self.puppeteer_browsers_dir:
|
|
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
|
|
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
|
|
if platform.system().lower() == 'darwin':
|
|
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
|
return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
|
|
|
|
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
|
|
# /data/lib/aarch64-linux/browsers/chrome/linux-129.0.6668.100/chrome-linux64/chrome
|
|
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
|
|
|
|
def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
|
|
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
|
|
|
|
# already loaded, return abspath from cache
|
|
if bin_name in self._browser_abspaths:
|
|
return self._browser_abspaths[bin_name]
|
|
|
|
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
|
|
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
|
|
if matching_bins:
|
|
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
|
|
self._browser_abspaths[bin_name] = newest_bin
|
|
return newest_bin
|
|
|
|
return None
|
|
|
|
def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
|
|
"""npx @puppeteer/browsers install chrome@stable"""
|
|
self.setup()
|
|
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
|
|
|
|
if not self.INSTALLER_BIN_ABSPATH:
|
|
raise Exception(
|
|
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
|
|
)
|
|
packages = packages or self.get_packages(bin_name)
|
|
assert packages, f"No packages specified for installation of {bin_name}"
|
|
|
|
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
|
|
|
|
install_args = [*self.puppeteer_install_args]
|
|
|
|
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
|
|
|
|
if proc.returncode != 0:
|
|
print(proc.stdout.strip())
|
|
print(proc.stderr.strip())
|
|
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
|
|
|
|
# chrome@129.0.6668.91 /tmp/test3/lib/x86_64-linux/browsers/chrome/linux-129.0.6668.91/chrome-linux64/chrome
|
|
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
|
# /data/lib/aarch64-linux/browsers/chrome/linux-129.0.6668.100/chrome-linux64/chrome
|
|
relpath = proc.stdout.strip().split(str(self.puppeteer_browsers_dir))[-1].split('\n', 1)[0]
|
|
abspath = self.puppeteer_browsers_dir / relpath
|
|
|
|
if os.path.isfile(abspath) and os.access(abspath, os.X_OK):
|
|
self._browser_abspaths[bin_name] = abspath
|
|
return abspath
|
|
|
|
return (proc.stderr.strip() + "\n" + proc.stdout.strip()).strip()
|
|
|
|
PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
|
|
|
|
|
|
# ALTERNATIVE INSTALL METHOD using Ansible:
|
|
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
|
|
# chrome_bin = run_playbook(install_playbook, data_dir=DATA_DIR, quiet=quiet).BINARIES.chrome
|
|
# return self.__class__.model_validate(
|
|
# {
|
|
# **self.model_dump(),
|
|
# "loaded_abspath": chrome_bin.symlink,
|
|
# "loaded_version": chrome_bin.version,
|
|
# "loaded_binprovider": env,
|
|
# "binproviders_supported": self.binproviders_supported,
|
|
# }
|
|
# )
|