mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
new vastly simplified plugin spec without pydantic
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
This commit is contained in:
parent
abf75f49f4
commit
01ba6d49d3
115 changed files with 2466 additions and 2301 deletions
|
@ -1,203 +0,0 @@
|
|||
__package__ = 'archivebox.plugins_pkg.playwright'
|
||||
|
||||
import os
|
||||
import platform
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, ClassVar
|
||||
|
||||
# Depends on other PyPI/vendor packages:
|
||||
from pydantic import InstanceOf, computed_field, Field
|
||||
from pydantic_pkgr import (
|
||||
BinName,
|
||||
BinProvider,
|
||||
BinProviderName,
|
||||
BinProviderOverrides,
|
||||
InstallArgs,
|
||||
PATHStr,
|
||||
HostBinPath,
|
||||
bin_abspath,
|
||||
OPERATING_SYSTEM,
|
||||
DEFAULT_ENV_PATH,
|
||||
)
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
# Depends on other Django apps:
|
||||
from abx.archivebox.base_plugin import BasePlugin
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env
|
||||
# from abx.archivebox.base_extractor import BaseExtractor
|
||||
# from abx.archivebox.base_queue import BaseQueue
|
||||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
|
||||
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
|
||||
class PlaywrightConfigs(BaseConfigSet):
|
||||
# PLAYWRIGHT_BINARY: str = Field(default='wget')
|
||||
# PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
|
||||
# PLAYWRIGHT_EXTRA_ARGS: List[str] = []
|
||||
# PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||
pass
|
||||
|
||||
|
||||
PLAYWRIGHT_CONFIG = PlaywrightConfigs()
|
||||
|
||||
LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
|
||||
|
||||
|
||||
|
||||
class PlaywrightBinary(BaseBinary):
|
||||
name: BinName = "playwright"
|
||||
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
|
||||
|
||||
|
||||
|
||||
PLAYWRIGHT_BINARY = PlaywrightBinary()
|
||||
|
||||
|
||||
class PlaywrightBinProvider(BaseBinProvider):
|
||||
name: BinProviderName = "playwright"
|
||||
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
|
||||
|
||||
PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
|
||||
|
||||
playwright_browsers_dir: Path = (
|
||||
Path("~/Library/Caches/ms-playwright").expanduser() # macos playwright cache dir
|
||||
if OPERATING_SYSTEM == "darwin" else
|
||||
Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir
|
||||
)
|
||||
playwright_install_args: List[str] = ["install"] # --with-deps
|
||||
|
||||
packages_handler: BinProviderOverrides = Field(default={
|
||||
"chrome": ["chromium"],
|
||||
}, exclude=True)
|
||||
|
||||
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
|
||||
return PLAYWRIGHT_BINARY.load().abspath
|
||||
|
||||
def setup(self) -> None:
|
||||
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
|
||||
|
||||
if self.playwright_browsers_dir:
|
||||
self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
|
||||
if browser_name == 'chrome':
|
||||
browser_name = 'chromium'
|
||||
|
||||
# if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
|
||||
if platform.system().lower() == "darwin":
|
||||
# ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
|
||||
return sorted(
|
||||
self.playwright_browsers_dir.glob(
|
||||
f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
|
||||
)
|
||||
)
|
||||
|
||||
# ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
|
||||
paths = []
|
||||
for path in sorted(self.playwright_browsers_dir.glob(f"{browser_name}-*/*-linux/*")):
|
||||
if 'xdg-settings' in str(path):
|
||||
continue
|
||||
if 'ffmpeg' in str(path):
|
||||
continue
|
||||
if '/chrom' in str(path) and 'chrom' in path.name.lower():
|
||||
paths.append(path)
|
||||
return paths
|
||||
|
||||
def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
|
||||
assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
|
||||
|
||||
# already loaded, return abspath from cache
|
||||
if bin_name in self._browser_abspaths:
|
||||
return self._browser_abspaths[bin_name]
|
||||
|
||||
# first time loading, find browser in self.playwright_browsers_dir by searching filesystem for installed binaries
|
||||
matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
|
||||
if matching_bins:
|
||||
newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
|
||||
self._browser_abspaths[bin_name] = newest_bin
|
||||
return self._browser_abspaths[bin_name]
|
||||
|
||||
# playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
|
||||
abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
|
||||
if abspath:
|
||||
self._browser_abspaths[bin_name] = abspath
|
||||
return self._browser_abspaths[bin_name]
|
||||
|
||||
return None
|
||||
|
||||
def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
|
||||
"""playwright install chrome"""
|
||||
self.setup()
|
||||
assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
|
||||
|
||||
if not self.INSTALLER_BIN_ABSPATH:
|
||||
raise Exception(
|
||||
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
|
||||
)
|
||||
packages = packages or self.get_packages(bin_name)
|
||||
|
||||
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
|
||||
|
||||
|
||||
# playwright install-deps (to install system dependencies like fonts, graphics libraries, etc.)
|
||||
if platform.system().lower() != 'darwin':
|
||||
# libglib2.0-0, libnss3, libnspr4, libdbus-1-3, libatk1.0-0, libatk-bridge2.0-0, libcups2, libdrm2, libxcb1, libxkbcommon0, libatspi2.0-0, libx11-6, libxcomposite1, libxdamage1, libxext6, libxfixes3, libxrandr2, libgbm1, libcairo2, libasound2
|
||||
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=['install-deps'])
|
||||
if proc.returncode != 0:
|
||||
print(proc.stdout.strip())
|
||||
print(proc.stderr.strip())
|
||||
|
||||
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=['install', *packages])
|
||||
|
||||
if proc.returncode != 0:
|
||||
print(proc.stdout.strip())
|
||||
print(proc.stderr.strip())
|
||||
raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages} PACKAGES={packages}")
|
||||
|
||||
# chrome@129.0.6668.58 /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
|
||||
# playwright build v1010 downloaded to /home/squash/.cache/ms-playwright/ffmpeg-1010
|
||||
output_lines = [
|
||||
line for line in proc.stdout.strip().split('\n')
|
||||
if '/chrom' in line
|
||||
and 'chrom' in line.rsplit('/', 1)[-1].lower() # if final path segment (filename) contains chrome or chromium
|
||||
and 'xdg-settings' not in line
|
||||
and 'ffmpeg' not in line
|
||||
]
|
||||
if output_lines:
|
||||
relpath = output_lines[0].split(str(self.playwright_browsers_dir))[-1]
|
||||
abspath = self.playwright_browsers_dir / relpath
|
||||
if os.path.isfile(abspath) and os.access(abspath, os.X_OK):
|
||||
self._browser_abspaths[bin_name] = abspath
|
||||
|
||||
return (proc.stderr.strip() + "\n" + proc.stdout.strip()).strip()
|
||||
|
||||
PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()
|
||||
|
||||
|
||||
|
||||
class PlaywrightPlugin(BasePlugin):
|
||||
app_label: str = 'playwright'
|
||||
verbose_name: str = 'Playwright (PIP)'
|
||||
|
||||
hooks: List[InstanceOf[BaseHook]] = [
|
||||
PLAYWRIGHT_CONFIG,
|
||||
PLAYWRIGHT_BINPROVIDER,
|
||||
PLAYWRIGHT_BINARY,
|
||||
]
|
||||
|
||||
|
||||
|
||||
PLUGIN = PlaywrightPlugin()
|
||||
# PLUGIN.register(settings)
|
||||
DJANGO_APP = PLUGIN.AppConfig
|
Loading…
Add table
Add a link
Reference in a new issue