split puppeteer plugin into Puppeteer, Playwright, and Chrome

This commit is contained in:
Nick Sweeting 2024-09-21 04:00:54 -07:00
parent 33fd7fe439
commit 541cd6c5a1
No known key found for this signature in database
10 changed files with 414 additions and 124 deletions

View file

@ -0,0 +1,132 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
bin_abspath,
)
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
# Depends on Other Plugins:
from builtin_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER
from builtin_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER
CHROMIUM_BINARY_NAMES = [
"chromium",
"chromium-browser",
"chromium-browser-beta",
"chromium-browser-unstable",
"chromium-browser-canary",
"chromium-browser-dev",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
]
CHROME_BINARY_NAMES = [
"google-chrome",
"google-chrome-stable",
"google-chrome-beta",
"google-chrome-canary",
"google-chrome-unstable",
"google-chrome-dev",
# 'chrome',
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
]
def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
abspath = bin_abspath(bin_name, PATH=env.PATH)
if abspath:
return abspath
return None
###################### Config ##########################
class ChromeDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
CHROME_BINARY: str = Field(default='wget')
CHROME_ARGS: Optional[List[str]] = Field(default=None)
CHROME_EXTRA_ARGS: List[str] = []
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
class ChromeConfigs(ChromeDependencyConfigs):
# section: ConfigSectionName = 'ALL_CONFIGS'
pass
DEFAULT_GLOBAL_CONFIG = {
}
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
class ChromeBinary(BaseBinary):
name: BinName = 'chrome'
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
env.name: {
'abspath': lambda:
autodetect_system_chrome_install(PATH=env.PATH),
},
PUPPETEER_BINPROVIDER.name: {
'packages': lambda:
['chrome@stable'],
},
PLAYWRIGHT_BINPROVIDER.name: {
'packages': lambda:
['chromium'],
},
}
@staticmethod
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
if not (binary.abspath and binary.abspath.exists()):
return
bin_dir.mkdir(parents=True, exist_ok=True)
symlink = bin_dir / binary.name
if platform.system().lower() == 'darwin':
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
symlink.chmod(0o777) # make sure its executable by everyone
else:
# otherwise on linux we can symlink directly to binary executable
symlink.symlink_to(binary.abspath)
CHROME_BINARY = ChromeBinary()
PLUGIN_BINARIES = [CHROME_BINARY]
class ChromePlugin(BasePlugin):
app_label: str ='puppeteer'
verbose_name: str = 'Chrome & Playwright'
hooks: List[InstanceOf[BaseHook]] = [
CHROME_CONFIG,
CHROME_BINARY,
]
PLUGIN = ChromePlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -4,12 +4,12 @@ from pathlib import Path
from typing import List, Optional from typing import List, Optional
from django.conf import settings from django.conf import settings
from pydantic import InstanceOf, Field from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
from plugantic.base_plugin import BasePlugin from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook from plugantic.base_hook import BaseHook
@ -20,13 +20,14 @@ from ...config import CONFIG
class NpmDependencyConfigs(BaseConfigSet): class NpmDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' # section: ConfigSectionName = 'DEPENDENCY_CONFIG'
USE_NPM: bool = True # USE_NPM: bool = True
NPM_BINARY: str = Field(default='npm') # NPM_BINARY: str = Field(default='npm')
NPM_ARGS: Optional[List[str]] = Field(default=None) # NPM_ARGS: Optional[List[str]] = Field(default=None)
NPM_EXTRA_ARGS: List[str] = [] # NPM_EXTRA_ARGS: List[str] = []
NPM_DEFAULT_ARGS: List[str] = [] # NPM_DEFAULT_ARGS: List[str] = []
pass
DEFAULT_GLOBAL_CONFIG = { DEFAULT_GLOBAL_CONFIG = {
@ -35,7 +36,7 @@ NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
class SystemNpmProvider(NpmProvider, BaseBinProvider): class SystemNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "npm" name: BinProviderName = "sys_npm"
PATH: PATHStr = str(CONFIG.NODE_BIN_PATH) PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
npm_prefix: Optional[Path] = None npm_prefix: Optional[Path] = None

View file

@ -30,6 +30,7 @@ class PipDependencyConfigs(BaseConfigSet):
PIP_ARGS: Optional[List[str]] = Field(default=None) PIP_ARGS: Optional[List[str]] = Field(default=None)
PIP_EXTRA_ARGS: List[str] = [] PIP_EXTRA_ARGS: List[str] = []
PIP_DEFAULT_ARGS: List[str] = [] PIP_DEFAULT_ARGS: List[str] = []
DEFAULT_GLOBAL_CONFIG = { DEFAULT_GLOBAL_CONFIG = {
@ -37,15 +38,27 @@ DEFAULT_GLOBAL_CONFIG = {
PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG) PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
class SystemPipBinProvider(PipProvider, BaseBinProvider): class SystemPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "pip" name: BinProviderName = "sys_pip"
INSTALLER_BIN: BinName = "pip" INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = None # global pip scope pip_venv: Optional[Path] = None # global pip scope
def on_install(self, bin_name: str, **kwargs):
# never modify system pip packages
return 'refusing to install packages globally with system pip, use a venv instead'
class SystemPipxBinProvider(PipProvider, BaseBinProvider): class SystemPipxBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "pipx" name: BinProviderName = "pipx"
INSTALLER_BIN: BinName = "pipx" INSTALLER_BIN: BinName = "pipx"
pip_venv: Optional[Path] = None # global pipx scope
class VenvPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv')
class LibPipBinProvider(PipProvider, BaseBinProvider): class LibPipBinProvider(PipProvider, BaseBinProvider):
@ -55,7 +68,8 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv' pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv'
SYS_PIP_BINPROVIDER = SystemPipBinProvider() SYS_PIP_BINPROVIDER = SystemPipBinProvider()
SYS_PIPX_BINPROVIDER = SystemPipxBinProvider() PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
VENV_PIP_BINPROVIDER = VenvPipBinProvider()
LIB_PIP_BINPROVIDER = LibPipBinProvider() LIB_PIP_BINPROVIDER = LibPipBinProvider()
pip = LIB_PIP_BINPROVIDER pip = LIB_PIP_BINPROVIDER
@ -64,7 +78,7 @@ pip = LIB_PIP_BINPROVIDER
class PythonBinary(BaseBinary): class PythonBinary(BaseBinary):
name: BinName = 'python' name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
SYS_PIP_BINPROVIDER.name: { SYS_PIP_BINPROVIDER.name: {
'abspath': lambda: 'abspath': lambda:
@ -78,13 +92,15 @@ PYTHON_BINARY = PythonBinary()
class SqliteBinary(BaseBinary): class SqliteBinary(BaseBinary):
name: BinName = 'sqlite' name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: Path(inspect.getfile(django_sqlite3)),
"version": lambda: SemVer(django_sqlite3.version),
},
SYS_PIP_BINPROVIDER.name: { SYS_PIP_BINPROVIDER.name: {
'abspath': lambda: "abspath": lambda: Path(inspect.getfile(django_sqlite3)),
Path(inspect.getfile(django_sqlite3)), "version": lambda: SemVer(django_sqlite3.version),
'version': lambda:
SemVer(django_sqlite3.version),
}, },
} }
@ -94,13 +110,15 @@ SQLITE_BINARY = SqliteBinary()
class DjangoBinary(BaseBinary): class DjangoBinary(BaseBinary):
name: BinName = 'django' name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: inspect.getfile(django),
"version": lambda: django.VERSION[:3],
},
SYS_PIP_BINPROVIDER.name: { SYS_PIP_BINPROVIDER.name: {
'abspath': lambda: "abspath": lambda: inspect.getfile(django),
inspect.getfile(django), "version": lambda: django.VERSION[:3],
'version': lambda:
django.VERSION[:3],
}, },
} }
@ -108,7 +126,7 @@ DJANGO_BINARY = DjangoBinary()
class PipBinary(BaseBinary): class PipBinary(BaseBinary):
name: BinName = "pip" name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
PIP_BINARY = PipBinary() PIP_BINARY = PipBinary()
@ -164,7 +182,8 @@ class PipPlugin(BasePlugin):
hooks: List[InstanceOf[BaseHook]] = [ hooks: List[InstanceOf[BaseHook]] = [
PIP_CONFIG, PIP_CONFIG,
SYS_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER,
SYS_PIPX_BINPROVIDER, PIPX_PIP_BINPROVIDER,
VENV_PIP_BINPROVIDER,
LIB_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER,
PIP_BINARY, PIP_BINARY,
PYTHON_BINARY, PYTHON_BINARY,

View file

@ -0,0 +1,182 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, computed_field, Field
from pydantic_pkgr import (
BinName,
BinProvider,
BinProviderName,
ProviderLookupDict,
InstallArgs,
PATHStr,
HostBinPath,
bin_abspath,
OPERATING_SYSTEM,
DEFAULT_ENV_PATH,
)
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
# Depends on Other Plugins:
from builtin_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
###################### Config ##########################
class PlaywrightConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# PLAYWRIGHT_BINARY: str = Field(default='wget')
# PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
# PLAYWRIGHT_EXTRA_ARGS: List[str] = []
# PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
pass
DEFAULT_GLOBAL_CONFIG = {
}
PLAYWRIGHT_CONFIG = PlaywrightConfigs(**DEFAULT_GLOBAL_CONFIG)
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
class PlaywrightBinary(BaseBinary):
name: BinName = "playwright"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
PLAYWRIGHT_BINARY = PlaywrightBinary()
class PlaywrightBinProvider(BaseBinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
PATH: PATHStr = f"{settings.CONFIG.BIN_DIR}:{DEFAULT_ENV_PATH}"
puppeteer_browsers_dir: Optional[Path] = (
Path("~/Library/Caches/ms-playwright").expanduser()
if OPERATING_SYSTEM == "darwin" else
Path("~/.cache/ms-playwright").expanduser()
)
puppeteer_install_args: List[str] = ["install"] # --with-deps
packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda: ["chromium"],
}, exclude=True)
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
@computed_field
@property
def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
return PLAYWRIGHT_BINARY.load().abspath
def setup(self) -> None:
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
if self.puppeteer_browsers_dir:
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
if browser_name == 'chrome':
browser_name = 'chromium'
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
if platform.system().lower() == "darwin":
# ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
return sorted(
self.puppeteer_browsers_dir.glob(
f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
)
)
# ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}-*/*-linux/*"))
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
# already loaded, return abspath from cache
if bin_name in self._browser_abspaths:
return self._browser_abspaths[bin_name]
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
if matching_bins:
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
self._browser_abspaths[bin_name] = newest_bin
return self._browser_abspaths[bin_name]
# playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
if abspath:
self._browser_abspaths[bin_name] = abspath
return self._browser_abspaths[bin_name]
return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""playwright install chrome"""
self.setup()
assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
if not self.INSTALLER_BIN_ABSPATH:
raise Exception(
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
)
packages = packages or self.on_get_packages(bin_name)
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
install_args = [*self.puppeteer_install_args]
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
if proc.returncode != 0:
print(proc.stdout.strip())
print(proc.stderr.strip())
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
output_info = proc.stdout.strip().split("\n")[-1]
browser_abspath = output_info.split(" ", 1)[-1]
# browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0]
self._browser_abspaths[bin_name] = Path(browser_abspath)
return proc.stderr.strip() + "\n" + proc.stdout.strip()
PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()
class PlaywrightPlugin(BasePlugin):
app_label: str = 'playwright'
verbose_name: str = 'Playwright'
hooks: List[InstanceOf[BaseHook]] = [
PLAYWRIGHT_CONFIG,
PLAYWRIGHT_BINPROVIDER,
PLAYWRIGHT_BINARY,
]
PLUGIN = PlaywrightPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -6,33 +6,38 @@ from django.conf import settings
# Depends on other PyPI/vendor packages: # Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, InstallArgs, HostBinPath, bin_abspath from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
InstallArgs,
PATHStr,
HostBinPath,
)
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor # from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue # from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook from plugantic.base_hook import BaseHook
# Depends on Other Plugins: # Depends on Other Plugins:
from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER from builtin_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
###################### Config ########################## ###################### Config ##########################
class PuppeteerDependencyConfigs(BaseConfigSet): class PuppeteerConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' # section: ConfigSectionName = 'DEPENDENCY_CONFIG'
PUPPETEER_BINARY: str = Field(default='wget') # PUPPETEER_BINARY: str = Field(default='wget')
PUPPETEER_ARGS: Optional[List[str]] = Field(default=None) # PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
PUPPETEER_EXTRA_ARGS: List[str] = [] # PUPPETEER_EXTRA_ARGS: List[str] = []
PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] # PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
class PuppeteerConfigs(PuppeteerDependencyConfigs):
# section: ConfigSectionName = 'ALL_CONFIGS'
pass pass
DEFAULT_GLOBAL_CONFIG = { DEFAULT_GLOBAL_CONFIG = {
@ -42,17 +47,29 @@ PUPPETEER_CONFIG = PuppeteerConfigs(**DEFAULT_GLOBAL_CONFIG)
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers" LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
class PuppeteerBinary(BaseBinary):
name: BinName = "puppeteer"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
PUPPETEER_BINARY = PuppeteerBinary()
class PuppeteerBinProvider(BaseBinProvider): class PuppeteerBinProvider(BaseBinProvider):
name: BinProviderName = "puppeteer" name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx" INSTALLER_BIN: BinName = "npx"
PATH: PATHStr = str(settings.CONFIG.BIN_DIR)
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)] puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
# packages_handler: ProviderLookupDict = { packages_handler: ProviderLookupDict = Field(default={
# "chrome": lambda: "chrome": lambda:
# ['chrome@stable'], ['chrome@stable'],
# } }, exclude=True)
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
@ -61,6 +78,15 @@ class PuppeteerBinProvider(BaseBinProvider):
if self.puppeteer_browsers_dir: if self.puppeteer_browsers_dir:
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True) self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
if platform.system().lower() == 'darwin':
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]: def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.' assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
@ -70,21 +96,13 @@ class PuppeteerBinProvider(BaseBinProvider):
return self._browser_abspaths[bin_name] return self._browser_abspaths[bin_name]
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
browsers_present = [d.name for d in self.puppeteer_browsers_dir.glob("*")] matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
if bin_name in browsers_present: if matching_bins:
candidates = [] newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary self._browser_abspaths[bin_name] = newest_bin
if platform.system().lower() == 'darwin': return self._browser_abspaths[bin_name]
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
else:
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/linux*/chrome*/chrome'))
if candidates:
self._browser_abspaths[bin_name] = candidates[-1]
return self._browser_abspaths[bin_name]
return super().on_get_abspath(bin_name, **context) return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str: def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""npx @puppeteer/browsers install chrome@stable""" """npx @puppeteer/browsers install chrome@stable"""
@ -119,64 +137,6 @@ class PuppeteerBinProvider(BaseBinProvider):
PUPPETEER_BINPROVIDER = PuppeteerBinProvider() PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
CHROMIUM_BINARY_NAMES = [
'chromium',
'chromium-browser',
'chromium-browser-beta',
'chromium-browser-unstable',
'chromium-browser-canary',
'chromium-browser-dev'
'/Applications/Chromium.app/Contents/MacOS/Chromium',
]
CHROME_BINARY_NAMES = [
'google-chrome',
'google-chrome-stable',
'google-chrome-beta',
'google-chrome-canary',
'google-chrome-unstable',
'google-chrome-dev',
# 'chrome',
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
]
def autodetect_system_chrome_install(PATH=None):
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
abspath = bin_abspath(bin_name, PATH=env.PATH)
if abspath:
return abspath
return None
class ChromeBinary(BaseBinary):
name: BinName = 'chrome'
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
env.name: {
'abspath': lambda:
autodetect_system_chrome_install(PATH=env.PATH),
},
PUPPETEER_BINPROVIDER.name: {
'packages': lambda:
['chrome@stable'],
}
}
@staticmethod
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
if not (binary.abspath and binary.abspath.exists()):
return
bin_dir.mkdir(parents=True, exist_ok=True)
symlink = bin_dir / binary.name
if platform.system().lower() == 'darwin':
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
symlink.chmod(0o777) # make sure its executable by everyone
else:
# otherwise on linux we can symlink directly to binary executable
symlink.symlink_to(binary.abspath)
# ALTERNATIVE INSTALL METHOD using Ansible: # ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = self.plugin_dir / 'install_puppeteer.yml' # install_playbook = self.plugin_dir / 'install_puppeteer.yml'
@ -192,18 +152,14 @@ class ChromeBinary(BaseBinary):
# ) # )
CHROME_BINARY = ChromeBinary()
PLUGIN_BINARIES = [CHROME_BINARY]
class PuppeteerPlugin(BasePlugin): class PuppeteerPlugin(BasePlugin):
app_label: str ='puppeteer' app_label: str ='puppeteer'
verbose_name: str = 'SingleFile' verbose_name: str = 'Puppeteer & Playwright'
hooks: List[InstanceOf[BaseHook]] = [ hooks: List[InstanceOf[BaseHook]] = [
PUPPETEER_CONFIG, PUPPETEER_CONFIG,
PUPPETEER_BINPROVIDER, PUPPETEER_BINPROVIDER,
CHROME_BINARY, PUPPETEER_BINARY,
] ]

View file

@ -4,7 +4,7 @@ import inspect
from huey.api import TaskWrapper from huey.api import TaskWrapper
from pathlib import Path from pathlib import Path
from typing import List, Literal from typing import List, Literal, ClassVar
from pydantic import BaseModel, ConfigDict, Field, computed_field from pydantic import BaseModel, ConfigDict, Field, computed_field

View file

@ -70,7 +70,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
"Provided By": [], "Provided By": [],
"Found Abspath": [], "Found Abspath": [],
"Related Configuration": [], "Related Configuration": [],
"Overrides": [], # "Overrides": [],
# "Description": [], # "Description": [],
} }
@ -109,7 +109,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
))) )))
# if not binary.provider_overrides: # if not binary.provider_overrides:
# import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()
rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200]) # rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
# rows['Description'].append(binary.description) # rows['Description'].append(binary.description)
return TableContext( return TableContext(

View file

@ -77,7 +77,7 @@ dependencies = [
############# VENDORED LIBS ###################### ############# VENDORED LIBS ######################
# these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian) # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
# archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
"pydantic-pkgr>=0.3.0", "pydantic-pkgr>=0.3.2",
"atomicwrites==1.4.1", "atomicwrites==1.4.1",
"pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
"django-taggit==1.3.0", "django-taggit==1.3.0",