mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
split puppeteer plugin into Puppeteer, Playwright, and Chrome
This commit is contained in:
parent
33fd7fe439
commit
541cd6c5a1
10 changed files with 414 additions and 124 deletions
0
archivebox/builtin_plugins/chrome/__init__.py
Normal file
0
archivebox/builtin_plugins/chrome/__init__.py
Normal file
132
archivebox/builtin_plugins/chrome/apps.py
Normal file
132
archivebox/builtin_plugins/chrome/apps.py
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
import platform
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Dict
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
# Depends on other PyPI/vendor packages:
|
||||||
|
from pydantic import InstanceOf, Field
|
||||||
|
from pydantic_pkgr import (
|
||||||
|
BinProvider,
|
||||||
|
BinName,
|
||||||
|
BinProviderName,
|
||||||
|
ProviderLookupDict,
|
||||||
|
bin_abspath,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Depends on other Django apps:
|
||||||
|
from plugantic.base_plugin import BasePlugin
|
||||||
|
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
|
||||||
|
from plugantic.base_binary import BaseBinary, env
|
||||||
|
# from plugantic.base_extractor import BaseExtractor
|
||||||
|
# from plugantic.base_queue import BaseQueue
|
||||||
|
from plugantic.base_hook import BaseHook
|
||||||
|
|
||||||
|
# Depends on Other Plugins:
|
||||||
|
from builtin_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER
|
||||||
|
from builtin_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
CHROMIUM_BINARY_NAMES = [
|
||||||
|
"chromium",
|
||||||
|
"chromium-browser",
|
||||||
|
"chromium-browser-beta",
|
||||||
|
"chromium-browser-unstable",
|
||||||
|
"chromium-browser-canary",
|
||||||
|
"chromium-browser-dev",
|
||||||
|
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||||
|
]
|
||||||
|
CHROME_BINARY_NAMES = [
|
||||||
|
"google-chrome",
|
||||||
|
"google-chrome-stable",
|
||||||
|
"google-chrome-beta",
|
||||||
|
"google-chrome-canary",
|
||||||
|
"google-chrome-unstable",
|
||||||
|
"google-chrome-dev",
|
||||||
|
# 'chrome',
|
||||||
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||||
|
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
|
||||||
|
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
|
||||||
|
abspath = bin_abspath(bin_name, PATH=env.PATH)
|
||||||
|
if abspath:
|
||||||
|
return abspath
|
||||||
|
return None
|
||||||
|
|
||||||
|
###################### Config ##########################
|
||||||
|
|
||||||
|
|
||||||
|
class ChromeDependencyConfigs(BaseConfigSet):
|
||||||
|
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||||
|
|
||||||
|
CHROME_BINARY: str = Field(default='wget')
|
||||||
|
CHROME_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
|
CHROME_EXTRA_ARGS: List[str] = []
|
||||||
|
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||||
|
|
||||||
|
class ChromeConfigs(ChromeDependencyConfigs):
|
||||||
|
# section: ConfigSectionName = 'ALL_CONFIGS'
|
||||||
|
pass
|
||||||
|
|
||||||
|
DEFAULT_GLOBAL_CONFIG = {
|
||||||
|
}
|
||||||
|
|
||||||
|
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||||
|
|
||||||
|
|
||||||
|
class ChromeBinary(BaseBinary):
|
||||||
|
name: BinName = 'chrome'
|
||||||
|
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
|
||||||
|
|
||||||
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
||||||
|
env.name: {
|
||||||
|
'abspath': lambda:
|
||||||
|
autodetect_system_chrome_install(PATH=env.PATH),
|
||||||
|
},
|
||||||
|
PUPPETEER_BINPROVIDER.name: {
|
||||||
|
'packages': lambda:
|
||||||
|
['chrome@stable'],
|
||||||
|
},
|
||||||
|
PLAYWRIGHT_BINPROVIDER.name: {
|
||||||
|
'packages': lambda:
|
||||||
|
['chromium'],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
||||||
|
if not (binary.abspath and binary.abspath.exists()):
|
||||||
|
return
|
||||||
|
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
symlink = bin_dir / binary.name
|
||||||
|
|
||||||
|
if platform.system().lower() == 'darwin':
|
||||||
|
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
|
||||||
|
symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
|
||||||
|
symlink.chmod(0o777) # make sure its executable by everyone
|
||||||
|
else:
|
||||||
|
# otherwise on linux we can symlink directly to binary executable
|
||||||
|
symlink.symlink_to(binary.abspath)
|
||||||
|
|
||||||
|
|
||||||
|
CHROME_BINARY = ChromeBinary()
|
||||||
|
|
||||||
|
PLUGIN_BINARIES = [CHROME_BINARY]
|
||||||
|
|
||||||
|
class ChromePlugin(BasePlugin):
|
||||||
|
app_label: str ='puppeteer'
|
||||||
|
verbose_name: str = 'Chrome & Playwright'
|
||||||
|
|
||||||
|
hooks: List[InstanceOf[BaseHook]] = [
|
||||||
|
CHROME_CONFIG,
|
||||||
|
CHROME_BINARY,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PLUGIN = ChromePlugin()
|
||||||
|
PLUGIN.register(settings)
|
||||||
|
DJANGO_APP = PLUGIN.AppConfig
|
|
@ -4,12 +4,12 @@ from pathlib import Path
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pydantic import InstanceOf, Field
|
from pydantic import InstanceOf
|
||||||
|
|
||||||
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
|
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
|
||||||
|
|
||||||
from plugantic.base_plugin import BasePlugin
|
from plugantic.base_plugin import BasePlugin
|
||||||
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
|
from plugantic.base_configset import BaseConfigSet
|
||||||
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
|
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
|
||||||
from plugantic.base_hook import BaseHook
|
from plugantic.base_hook import BaseHook
|
||||||
|
|
||||||
|
@ -20,13 +20,14 @@ from ...config import CONFIG
|
||||||
|
|
||||||
|
|
||||||
class NpmDependencyConfigs(BaseConfigSet):
|
class NpmDependencyConfigs(BaseConfigSet):
|
||||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||||
|
|
||||||
USE_NPM: bool = True
|
# USE_NPM: bool = True
|
||||||
NPM_BINARY: str = Field(default='npm')
|
# NPM_BINARY: str = Field(default='npm')
|
||||||
NPM_ARGS: Optional[List[str]] = Field(default=None)
|
# NPM_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
NPM_EXTRA_ARGS: List[str] = []
|
# NPM_EXTRA_ARGS: List[str] = []
|
||||||
NPM_DEFAULT_ARGS: List[str] = []
|
# NPM_DEFAULT_ARGS: List[str] = []
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_GLOBAL_CONFIG = {
|
DEFAULT_GLOBAL_CONFIG = {
|
||||||
|
@ -35,7 +36,7 @@ NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||||
|
|
||||||
|
|
||||||
class SystemNpmProvider(NpmProvider, BaseBinProvider):
|
class SystemNpmProvider(NpmProvider, BaseBinProvider):
|
||||||
name: BinProviderName = "npm"
|
name: BinProviderName = "sys_npm"
|
||||||
PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
|
PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
|
||||||
|
|
||||||
npm_prefix: Optional[Path] = None
|
npm_prefix: Optional[Path] = None
|
||||||
|
|
|
@ -30,6 +30,7 @@ class PipDependencyConfigs(BaseConfigSet):
|
||||||
PIP_ARGS: Optional[List[str]] = Field(default=None)
|
PIP_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
PIP_EXTRA_ARGS: List[str] = []
|
PIP_EXTRA_ARGS: List[str] = []
|
||||||
PIP_DEFAULT_ARGS: List[str] = []
|
PIP_DEFAULT_ARGS: List[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_GLOBAL_CONFIG = {
|
DEFAULT_GLOBAL_CONFIG = {
|
||||||
|
@ -37,15 +38,27 @@ DEFAULT_GLOBAL_CONFIG = {
|
||||||
PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
|
PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||||
|
|
||||||
class SystemPipBinProvider(PipProvider, BaseBinProvider):
|
class SystemPipBinProvider(PipProvider, BaseBinProvider):
|
||||||
name: BinProviderName = "pip"
|
name: BinProviderName = "sys_pip"
|
||||||
INSTALLER_BIN: BinName = "pip"
|
INSTALLER_BIN: BinName = "pip"
|
||||||
|
|
||||||
pip_venv: Optional[Path] = None # global pip scope
|
pip_venv: Optional[Path] = None # global pip scope
|
||||||
|
|
||||||
|
def on_install(self, bin_name: str, **kwargs):
|
||||||
|
# never modify system pip packages
|
||||||
|
return 'refusing to install packages globally with system pip, use a venv instead'
|
||||||
|
|
||||||
class SystemPipxBinProvider(PipProvider, BaseBinProvider):
|
class SystemPipxBinProvider(PipProvider, BaseBinProvider):
|
||||||
name: BinProviderName = "pipx"
|
name: BinProviderName = "pipx"
|
||||||
INSTALLER_BIN: BinName = "pipx"
|
INSTALLER_BIN: BinName = "pipx"
|
||||||
|
|
||||||
|
pip_venv: Optional[Path] = None # global pipx scope
|
||||||
|
|
||||||
|
|
||||||
|
class VenvPipBinProvider(PipProvider, BaseBinProvider):
|
||||||
|
name: BinProviderName = "venv_pip"
|
||||||
|
INSTALLER_BIN: BinName = "pip"
|
||||||
|
|
||||||
|
pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv')
|
||||||
|
|
||||||
|
|
||||||
class LibPipBinProvider(PipProvider, BaseBinProvider):
|
class LibPipBinProvider(PipProvider, BaseBinProvider):
|
||||||
|
@ -55,7 +68,8 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
|
||||||
pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv'
|
pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv'
|
||||||
|
|
||||||
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
|
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
|
||||||
SYS_PIPX_BINPROVIDER = SystemPipxBinProvider()
|
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
|
||||||
|
VENV_PIP_BINPROVIDER = VenvPipBinProvider()
|
||||||
LIB_PIP_BINPROVIDER = LibPipBinProvider()
|
LIB_PIP_BINPROVIDER = LibPipBinProvider()
|
||||||
pip = LIB_PIP_BINPROVIDER
|
pip = LIB_PIP_BINPROVIDER
|
||||||
|
|
||||||
|
@ -64,7 +78,7 @@ pip = LIB_PIP_BINPROVIDER
|
||||||
class PythonBinary(BaseBinary):
|
class PythonBinary(BaseBinary):
|
||||||
name: BinName = 'python'
|
name: BinName = 'python'
|
||||||
|
|
||||||
binproviders_supported: List[InstanceOf[BinProvider]] = [SYS_PIP_BINPROVIDER, apt, brew, env]
|
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||||
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
||||||
SYS_PIP_BINPROVIDER.name: {
|
SYS_PIP_BINPROVIDER.name: {
|
||||||
'abspath': lambda:
|
'abspath': lambda:
|
||||||
|
@ -78,13 +92,15 @@ PYTHON_BINARY = PythonBinary()
|
||||||
|
|
||||||
class SqliteBinary(BaseBinary):
|
class SqliteBinary(BaseBinary):
|
||||||
name: BinName = 'sqlite'
|
name: BinName = 'sqlite'
|
||||||
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER])
|
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||||
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
||||||
|
VENV_PIP_BINPROVIDER.name: {
|
||||||
|
"abspath": lambda: Path(inspect.getfile(django_sqlite3)),
|
||||||
|
"version": lambda: SemVer(django_sqlite3.version),
|
||||||
|
},
|
||||||
SYS_PIP_BINPROVIDER.name: {
|
SYS_PIP_BINPROVIDER.name: {
|
||||||
'abspath': lambda:
|
"abspath": lambda: Path(inspect.getfile(django_sqlite3)),
|
||||||
Path(inspect.getfile(django_sqlite3)),
|
"version": lambda: SemVer(django_sqlite3.version),
|
||||||
'version': lambda:
|
|
||||||
SemVer(django_sqlite3.version),
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,13 +110,15 @@ SQLITE_BINARY = SqliteBinary()
|
||||||
class DjangoBinary(BaseBinary):
|
class DjangoBinary(BaseBinary):
|
||||||
name: BinName = 'django'
|
name: BinName = 'django'
|
||||||
|
|
||||||
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER])
|
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||||
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
||||||
|
VENV_PIP_BINPROVIDER.name: {
|
||||||
|
"abspath": lambda: inspect.getfile(django),
|
||||||
|
"version": lambda: django.VERSION[:3],
|
||||||
|
},
|
||||||
SYS_PIP_BINPROVIDER.name: {
|
SYS_PIP_BINPROVIDER.name: {
|
||||||
'abspath': lambda:
|
"abspath": lambda: inspect.getfile(django),
|
||||||
inspect.getfile(django),
|
"version": lambda: django.VERSION[:3],
|
||||||
'version': lambda:
|
|
||||||
django.VERSION[:3],
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +126,7 @@ DJANGO_BINARY = DjangoBinary()
|
||||||
|
|
||||||
class PipBinary(BaseBinary):
|
class PipBinary(BaseBinary):
|
||||||
name: BinName = "pip"
|
name: BinName = "pip"
|
||||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||||
|
|
||||||
|
|
||||||
PIP_BINARY = PipBinary()
|
PIP_BINARY = PipBinary()
|
||||||
|
@ -164,7 +182,8 @@ class PipPlugin(BasePlugin):
|
||||||
hooks: List[InstanceOf[BaseHook]] = [
|
hooks: List[InstanceOf[BaseHook]] = [
|
||||||
PIP_CONFIG,
|
PIP_CONFIG,
|
||||||
SYS_PIP_BINPROVIDER,
|
SYS_PIP_BINPROVIDER,
|
||||||
SYS_PIPX_BINPROVIDER,
|
PIPX_PIP_BINPROVIDER,
|
||||||
|
VENV_PIP_BINPROVIDER,
|
||||||
LIB_PIP_BINPROVIDER,
|
LIB_PIP_BINPROVIDER,
|
||||||
PIP_BINARY,
|
PIP_BINARY,
|
||||||
PYTHON_BINARY,
|
PYTHON_BINARY,
|
||||||
|
|
0
archivebox/builtin_plugins/playwright/__init__.py
Normal file
0
archivebox/builtin_plugins/playwright/__init__.py
Normal file
182
archivebox/builtin_plugins/playwright/apps.py
Normal file
182
archivebox/builtin_plugins/playwright/apps.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
import platform
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Dict, ClassVar
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
# Depends on other PyPI/vendor packages:
|
||||||
|
from pydantic import InstanceOf, computed_field, Field
|
||||||
|
from pydantic_pkgr import (
|
||||||
|
BinName,
|
||||||
|
BinProvider,
|
||||||
|
BinProviderName,
|
||||||
|
ProviderLookupDict,
|
||||||
|
InstallArgs,
|
||||||
|
PATHStr,
|
||||||
|
HostBinPath,
|
||||||
|
bin_abspath,
|
||||||
|
OPERATING_SYSTEM,
|
||||||
|
DEFAULT_ENV_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Depends on other Django apps:
|
||||||
|
from plugantic.base_plugin import BasePlugin
|
||||||
|
from plugantic.base_configset import BaseConfigSet
|
||||||
|
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
|
||||||
|
# from plugantic.base_extractor import BaseExtractor
|
||||||
|
# from plugantic.base_queue import BaseQueue
|
||||||
|
from plugantic.base_hook import BaseHook
|
||||||
|
|
||||||
|
# Depends on Other Plugins:
|
||||||
|
from builtin_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
###################### Config ##########################
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightConfigs(BaseConfigSet):
|
||||||
|
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||||
|
|
||||||
|
# PLAYWRIGHT_BINARY: str = Field(default='wget')
|
||||||
|
# PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
|
# PLAYWRIGHT_EXTRA_ARGS: List[str] = []
|
||||||
|
# PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||||
|
pass
|
||||||
|
|
||||||
|
DEFAULT_GLOBAL_CONFIG = {
|
||||||
|
}
|
||||||
|
|
||||||
|
PLAYWRIGHT_CONFIG = PlaywrightConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||||
|
|
||||||
|
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightBinary(BaseBinary):
|
||||||
|
name: BinName = "playwright"
|
||||||
|
|
||||||
|
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PLAYWRIGHT_BINARY = PlaywrightBinary()
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightBinProvider(BaseBinProvider):
|
||||||
|
name: BinProviderName = "playwright"
|
||||||
|
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
|
||||||
|
|
||||||
|
PATH: PATHStr = f"{settings.CONFIG.BIN_DIR}:{DEFAULT_ENV_PATH}"
|
||||||
|
|
||||||
|
puppeteer_browsers_dir: Optional[Path] = (
|
||||||
|
Path("~/Library/Caches/ms-playwright").expanduser()
|
||||||
|
if OPERATING_SYSTEM == "darwin" else
|
||||||
|
Path("~/.cache/ms-playwright").expanduser()
|
||||||
|
)
|
||||||
|
puppeteer_install_args: List[str] = ["install"] # --with-deps
|
||||||
|
|
||||||
|
packages_handler: ProviderLookupDict = Field(default={
|
||||||
|
"chrome": lambda: ["chromium"],
|
||||||
|
}, exclude=True)
|
||||||
|
|
||||||
|
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
||||||
|
|
||||||
|
@computed_field
|
||||||
|
@property
|
||||||
|
def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
|
||||||
|
return PLAYWRIGHT_BINARY.load().abspath
|
||||||
|
|
||||||
|
def setup(self) -> None:
|
||||||
|
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
|
||||||
|
|
||||||
|
if self.puppeteer_browsers_dir:
|
||||||
|
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
|
||||||
|
if browser_name == 'chrome':
|
||||||
|
browser_name = 'chromium'
|
||||||
|
|
||||||
|
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
|
||||||
|
if platform.system().lower() == "darwin":
|
||||||
|
# ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
|
||||||
|
return sorted(
|
||||||
|
self.puppeteer_browsers_dir.glob(
|
||||||
|
f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
|
||||||
|
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}-*/*-linux/*"))
|
||||||
|
|
||||||
|
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
|
||||||
|
assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
|
||||||
|
|
||||||
|
# already loaded, return abspath from cache
|
||||||
|
if bin_name in self._browser_abspaths:
|
||||||
|
return self._browser_abspaths[bin_name]
|
||||||
|
|
||||||
|
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
|
||||||
|
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
|
||||||
|
if matching_bins:
|
||||||
|
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
|
||||||
|
self._browser_abspaths[bin_name] = newest_bin
|
||||||
|
return self._browser_abspaths[bin_name]
|
||||||
|
|
||||||
|
# playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
|
||||||
|
abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
|
||||||
|
if abspath:
|
||||||
|
self._browser_abspaths[bin_name] = abspath
|
||||||
|
return self._browser_abspaths[bin_name]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
|
||||||
|
"""playwright install chrome"""
|
||||||
|
self.setup()
|
||||||
|
assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
|
||||||
|
|
||||||
|
if not self.INSTALLER_BIN_ABSPATH:
|
||||||
|
raise Exception(
|
||||||
|
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
|
||||||
|
)
|
||||||
|
packages = packages or self.on_get_packages(bin_name)
|
||||||
|
|
||||||
|
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
|
||||||
|
|
||||||
|
install_args = [*self.puppeteer_install_args]
|
||||||
|
|
||||||
|
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
print(proc.stdout.strip())
|
||||||
|
print(proc.stderr.strip())
|
||||||
|
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
|
||||||
|
|
||||||
|
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
||||||
|
output_info = proc.stdout.strip().split("\n")[-1]
|
||||||
|
browser_abspath = output_info.split(" ", 1)[-1]
|
||||||
|
# browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0]
|
||||||
|
|
||||||
|
self._browser_abspaths[bin_name] = Path(browser_abspath)
|
||||||
|
|
||||||
|
return proc.stderr.strip() + "\n" + proc.stdout.strip()
|
||||||
|
|
||||||
|
PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightPlugin(BasePlugin):
|
||||||
|
app_label: str = 'playwright'
|
||||||
|
verbose_name: str = 'Playwright'
|
||||||
|
|
||||||
|
hooks: List[InstanceOf[BaseHook]] = [
|
||||||
|
PLAYWRIGHT_CONFIG,
|
||||||
|
PLAYWRIGHT_BINPROVIDER,
|
||||||
|
PLAYWRIGHT_BINARY,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PLUGIN = PlaywrightPlugin()
|
||||||
|
PLUGIN.register(settings)
|
||||||
|
DJANGO_APP = PLUGIN.AppConfig
|
|
@ -6,33 +6,38 @@ from django.conf import settings
|
||||||
|
|
||||||
# Depends on other PyPI/vendor packages:
|
# Depends on other PyPI/vendor packages:
|
||||||
from pydantic import InstanceOf, Field
|
from pydantic import InstanceOf, Field
|
||||||
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, InstallArgs, HostBinPath, bin_abspath
|
from pydantic_pkgr import (
|
||||||
|
BinProvider,
|
||||||
|
BinName,
|
||||||
|
BinProviderName,
|
||||||
|
ProviderLookupDict,
|
||||||
|
InstallArgs,
|
||||||
|
PATHStr,
|
||||||
|
HostBinPath,
|
||||||
|
)
|
||||||
|
|
||||||
# Depends on other Django apps:
|
# Depends on other Django apps:
|
||||||
from plugantic.base_plugin import BasePlugin
|
from plugantic.base_plugin import BasePlugin
|
||||||
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
|
from plugantic.base_configset import BaseConfigSet
|
||||||
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
|
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
|
||||||
# from plugantic.base_extractor import BaseExtractor
|
# from plugantic.base_extractor import BaseExtractor
|
||||||
# from plugantic.base_queue import BaseQueue
|
# from plugantic.base_queue import BaseQueue
|
||||||
from plugantic.base_hook import BaseHook
|
from plugantic.base_hook import BaseHook
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER
|
from builtin_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
|
||||||
|
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
|
||||||
|
|
||||||
class PuppeteerDependencyConfigs(BaseConfigSet):
|
class PuppeteerConfigs(BaseConfigSet):
|
||||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||||
|
|
||||||
PUPPETEER_BINARY: str = Field(default='wget')
|
# PUPPETEER_BINARY: str = Field(default='wget')
|
||||||
PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
|
# PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
PUPPETEER_EXTRA_ARGS: List[str] = []
|
# PUPPETEER_EXTRA_ARGS: List[str] = []
|
||||||
PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
# PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||||
|
|
||||||
class PuppeteerConfigs(PuppeteerDependencyConfigs):
|
|
||||||
# section: ConfigSectionName = 'ALL_CONFIGS'
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
DEFAULT_GLOBAL_CONFIG = {
|
DEFAULT_GLOBAL_CONFIG = {
|
||||||
|
@ -42,17 +47,29 @@ PUPPETEER_CONFIG = PuppeteerConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||||
|
|
||||||
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
|
LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
|
||||||
|
|
||||||
|
|
||||||
|
class PuppeteerBinary(BaseBinary):
|
||||||
|
name: BinName = "puppeteer"
|
||||||
|
|
||||||
|
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
|
||||||
|
|
||||||
|
|
||||||
|
PUPPETEER_BINARY = PuppeteerBinary()
|
||||||
|
|
||||||
|
|
||||||
class PuppeteerBinProvider(BaseBinProvider):
|
class PuppeteerBinProvider(BaseBinProvider):
|
||||||
name: BinProviderName = "puppeteer"
|
name: BinProviderName = "puppeteer"
|
||||||
INSTALLER_BIN: BinName = "npx"
|
INSTALLER_BIN: BinName = "npx"
|
||||||
|
|
||||||
|
PATH: PATHStr = str(settings.CONFIG.BIN_DIR)
|
||||||
|
|
||||||
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
|
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
|
||||||
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
|
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
|
||||||
|
|
||||||
# packages_handler: ProviderLookupDict = {
|
packages_handler: ProviderLookupDict = Field(default={
|
||||||
# "chrome": lambda:
|
"chrome": lambda:
|
||||||
# ['chrome@stable'],
|
['chrome@stable'],
|
||||||
# }
|
}, exclude=True)
|
||||||
|
|
||||||
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
||||||
|
|
||||||
|
@ -61,6 +78,15 @@ class PuppeteerBinProvider(BaseBinProvider):
|
||||||
|
|
||||||
if self.puppeteer_browsers_dir:
|
if self.puppeteer_browsers_dir:
|
||||||
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
|
self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
|
||||||
|
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
|
||||||
|
if platform.system().lower() == 'darwin':
|
||||||
|
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
||||||
|
return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
|
||||||
|
|
||||||
|
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
|
||||||
|
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
|
||||||
|
|
||||||
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
|
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
|
||||||
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
|
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
|
||||||
|
@ -70,21 +96,13 @@ class PuppeteerBinProvider(BaseBinProvider):
|
||||||
return self._browser_abspaths[bin_name]
|
return self._browser_abspaths[bin_name]
|
||||||
|
|
||||||
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
|
# first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
|
||||||
browsers_present = [d.name for d in self.puppeteer_browsers_dir.glob("*")]
|
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
|
||||||
if bin_name in browsers_present:
|
if matching_bins:
|
||||||
candidates = []
|
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
|
||||||
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
|
self._browser_abspaths[bin_name] = newest_bin
|
||||||
if platform.system().lower() == 'darwin':
|
return self._browser_abspaths[bin_name]
|
||||||
# /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
|
||||||
candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
|
|
||||||
else:
|
|
||||||
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
|
|
||||||
candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/linux*/chrome*/chrome'))
|
|
||||||
if candidates:
|
|
||||||
self._browser_abspaths[bin_name] = candidates[-1]
|
|
||||||
return self._browser_abspaths[bin_name]
|
|
||||||
|
|
||||||
return super().on_get_abspath(bin_name, **context)
|
return None
|
||||||
|
|
||||||
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
|
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
|
||||||
"""npx @puppeteer/browsers install chrome@stable"""
|
"""npx @puppeteer/browsers install chrome@stable"""
|
||||||
|
@ -119,64 +137,6 @@ class PuppeteerBinProvider(BaseBinProvider):
|
||||||
|
|
||||||
PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
|
PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
|
||||||
|
|
||||||
CHROMIUM_BINARY_NAMES = [
|
|
||||||
'chromium',
|
|
||||||
'chromium-browser',
|
|
||||||
'chromium-browser-beta',
|
|
||||||
'chromium-browser-unstable',
|
|
||||||
'chromium-browser-canary',
|
|
||||||
'chromium-browser-dev'
|
|
||||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
||||||
]
|
|
||||||
CHROME_BINARY_NAMES = [
|
|
||||||
'google-chrome',
|
|
||||||
'google-chrome-stable',
|
|
||||||
'google-chrome-beta',
|
|
||||||
'google-chrome-canary',
|
|
||||||
'google-chrome-unstable',
|
|
||||||
'google-chrome-dev',
|
|
||||||
# 'chrome',
|
|
||||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
||||||
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
|
||||||
]
|
|
||||||
|
|
||||||
def autodetect_system_chrome_install(PATH=None):
|
|
||||||
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
|
|
||||||
abspath = bin_abspath(bin_name, PATH=env.PATH)
|
|
||||||
if abspath:
|
|
||||||
return abspath
|
|
||||||
return None
|
|
||||||
|
|
||||||
class ChromeBinary(BaseBinary):
|
|
||||||
name: BinName = 'chrome'
|
|
||||||
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env]
|
|
||||||
|
|
||||||
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
|
||||||
env.name: {
|
|
||||||
'abspath': lambda:
|
|
||||||
autodetect_system_chrome_install(PATH=env.PATH),
|
|
||||||
},
|
|
||||||
PUPPETEER_BINPROVIDER.name: {
|
|
||||||
'packages': lambda:
|
|
||||||
['chrome@stable'],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
|
||||||
if not (binary.abspath and binary.abspath.exists()):
|
|
||||||
return
|
|
||||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
symlink = bin_dir / binary.name
|
|
||||||
|
|
||||||
if platform.system().lower() == 'darwin':
|
|
||||||
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
|
|
||||||
symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
|
|
||||||
symlink.chmod(0o777) # make sure its executable by everyone
|
|
||||||
else:
|
|
||||||
# otherwise on linux we can symlink directly to binary executable
|
|
||||||
symlink.symlink_to(binary.abspath)
|
|
||||||
|
|
||||||
|
|
||||||
# ALTERNATIVE INSTALL METHOD using Ansible:
|
# ALTERNATIVE INSTALL METHOD using Ansible:
|
||||||
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
|
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
|
||||||
|
@ -192,18 +152,14 @@ class ChromeBinary(BaseBinary):
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
|
||||||
CHROME_BINARY = ChromeBinary()
|
|
||||||
|
|
||||||
PLUGIN_BINARIES = [CHROME_BINARY]
|
|
||||||
|
|
||||||
class PuppeteerPlugin(BasePlugin):
|
class PuppeteerPlugin(BasePlugin):
|
||||||
app_label: str ='puppeteer'
|
app_label: str ='puppeteer'
|
||||||
verbose_name: str = 'SingleFile'
|
verbose_name: str = 'Puppeteer & Playwright'
|
||||||
|
|
||||||
hooks: List[InstanceOf[BaseHook]] = [
|
hooks: List[InstanceOf[BaseHook]] = [
|
||||||
PUPPETEER_CONFIG,
|
PUPPETEER_CONFIG,
|
||||||
PUPPETEER_BINPROVIDER,
|
PUPPETEER_BINPROVIDER,
|
||||||
CHROME_BINARY,
|
PUPPETEER_BINARY,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ import inspect
|
||||||
from huey.api import TaskWrapper
|
from huey.api import TaskWrapper
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Literal
|
from typing import List, Literal, ClassVar
|
||||||
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -70,7 +70,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
"Provided By": [],
|
"Provided By": [],
|
||||||
"Found Abspath": [],
|
"Found Abspath": [],
|
||||||
"Related Configuration": [],
|
"Related Configuration": [],
|
||||||
"Overrides": [],
|
# "Overrides": [],
|
||||||
# "Description": [],
|
# "Description": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
)))
|
)))
|
||||||
# if not binary.provider_overrides:
|
# if not binary.provider_overrides:
|
||||||
# import ipdb; ipdb.set_trace()
|
# import ipdb; ipdb.set_trace()
|
||||||
rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
|
# rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
|
||||||
# rows['Description'].append(binary.description)
|
# rows['Description'].append(binary.description)
|
||||||
|
|
||||||
return TableContext(
|
return TableContext(
|
||||||
|
|
|
@ -77,7 +77,7 @@ dependencies = [
|
||||||
############# VENDORED LIBS ######################
|
############# VENDORED LIBS ######################
|
||||||
# these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
|
# these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
|
||||||
# archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
|
# archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
|
||||||
"pydantic-pkgr>=0.3.0",
|
"pydantic-pkgr>=0.3.2",
|
||||||
"atomicwrites==1.4.1",
|
"atomicwrites==1.4.1",
|
||||||
"pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
|
"pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
|
||||||
"django-taggit==1.3.0",
|
"django-taggit==1.3.0",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue