mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-21 10:25:11 -04:00
merge plugantic and abx, all praise be to praise our glorious pluggy gods
This commit is contained in:
parent
4f42eb0313
commit
8d3f45b720
59 changed files with 870 additions and 1343 deletions
74
archivebox/plugins_extractor/wget/apps.py
Normal file
74
archivebox/plugins_extractor/wget/apps.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
from typing import List
|
||||
from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook
|
||||
|
||||
|
||||
# class WgetToggleConfig(ConfigSet):
|
||||
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
|
||||
|
||||
# SAVE_WGET: bool = True
|
||||
# SAVE_WARC: bool = True
|
||||
|
||||
# class WgetDependencyConfig(ConfigSet):
|
||||
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||
|
||||
# WGET_BINARY: str = Field(default='wget')
|
||||
# WGET_ARGS: Optional[List[str]] = Field(default=None)
|
||||
# WGET_EXTRA_ARGS: List[str] = []
|
||||
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||
|
||||
# class WgetOptionsConfig(ConfigSet):
|
||||
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
|
||||
|
||||
# # loaded from shared config
|
||||
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
|
||||
# SAVE_WGET_REQUISITES: bool = Field(default=True)
|
||||
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
|
||||
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
|
||||
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
|
||||
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
|
||||
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
|
||||
|
||||
|
||||
# CONFIG = {
|
||||
# 'CHECK_SSL_VALIDITY': False,
|
||||
# 'SAVE_WARC': False,
|
||||
# 'TIMEOUT': 999,
|
||||
# }
|
||||
|
||||
|
||||
# WGET_CONFIG = [
|
||||
# WgetToggleConfig(**CONFIG),
|
||||
# WgetDependencyConfig(**CONFIG),
|
||||
# WgetOptionsConfig(**CONFIG),
|
||||
# ]
|
||||
|
||||
|
||||
|
||||
# class WgetExtractor(Extractor):
|
||||
# name: ExtractorName = 'wget'
|
||||
# binary: Binary = WgetBinary()
|
||||
|
||||
# def get_output_path(self, snapshot) -> Path:
|
||||
# return get_wget_output_path(snapshot)
|
||||
|
||||
|
||||
# class WarcExtractor(Extractor):
|
||||
# name: ExtractorName = 'warc'
|
||||
# binary: Binary = WgetBinary()
|
||||
|
||||
# def get_output_path(self, snapshot) -> Path:
|
||||
# return get_wget_output_path(snapshot)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class WgetPlugin(BasePlugin):
|
||||
app_label: str = 'wget'
|
||||
verbose_name: str = 'WGET'
|
||||
|
||||
hooks: List[InstanceOf[BaseHook]] = []
|
||||
|
||||
|
||||
PLUGIN = WgetPlugin()
|
||||
DJANGO_APP = PLUGIN.AppConfig
|
Loading…
Add table
Add a link
Reference in a new issue