mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-21 02:15:10 -04:00
move final legacy config to plugins and fix archivebox config cmd and add search opt
This commit is contained in:
parent
115f89fd8b
commit
b3107ab830
20 changed files with 379 additions and 275 deletions
|
@ -1,5 +1,6 @@
|
|||
__package__ = 'plugins_extractor.chrome'
|
||||
__label__ = 'chrome'
|
||||
__id__ = 'chrome'
|
||||
__label__ = 'Chrome'
|
||||
__version__ = '2024.10.14'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome'
|
||||
|
@ -11,13 +12,14 @@ import abx
|
|||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
'chrome': {
|
||||
'PACKAGE': __package__,
|
||||
'LABEL': __label__,
|
||||
'VERSION': __version__,
|
||||
'AUTHOR': __author__,
|
||||
'HOMEPAGE': __homepage__,
|
||||
'DEPENDENCIES': __dependencies__,
|
||||
__id__: {
|
||||
'id': __id__,
|
||||
'package': __package__,
|
||||
'label': __label__,
|
||||
'version': __version__,
|
||||
'author': __author__,
|
||||
'homepage': __homepage__,
|
||||
'dependencies': __dependencies__,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,7 +28,7 @@ def get_CONFIG():
|
|||
from .config import CHROME_CONFIG
|
||||
|
||||
return {
|
||||
'chrome': CHROME_CONFIG
|
||||
__id__: CHROME_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
|
@ -50,22 +52,3 @@ def ready():
|
|||
# 'screenshot': SCREENSHOT_EXTRACTOR,
|
||||
# 'dom': DOM_EXTRACTOR,
|
||||
# }
|
||||
|
||||
# Hooks Available:
|
||||
|
||||
# Events:
|
||||
# on_crawl_schedule_tick
|
||||
# on_seed_post_save
|
||||
# on_crawl_post_save
|
||||
# on_snapshot_post_save
|
||||
# on_archiveresult_post_save
|
||||
|
||||
|
||||
# create_root_snapshot_from_seed
|
||||
# create_archiveresults_pending_from_snapshot
|
||||
# create_crawl_from_crawlschedule_if_due
|
||||
# create_crawl_copy_from_template
|
||||
#
|
||||
|
||||
|
||||
# create_crawl_from_crawlschedule_if_due
|
||||
|
|
41
archivebox/plugins_extractor/htmltotext/__init__.py
Normal file
41
archivebox/plugins_extractor/htmltotext/__init__.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
__package__ = 'plugins_extractor.htmltotext'
|
||||
__id__ = 'htmltotext'
|
||||
__label__ = 'HTML-to-Text'
|
||||
__version__ = '2024.10.14'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/ArchiveBox/archivebox'
|
||||
__dependencies__ = []
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
__id__: {
|
||||
'id': __id__,
|
||||
'package': __package__,
|
||||
'label': __label__,
|
||||
'version': __version__,
|
||||
'author': __author__,
|
||||
'homepage': __homepage__,
|
||||
'dependencies': __dependencies__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import HTMLTOTEXT_CONFIG
|
||||
|
||||
return {
|
||||
__id__: HTMLTOTEXT_CONFIG
|
||||
}
|
||||
|
||||
|
||||
# @abx.hookimpl
|
||||
# def get_EXTRACTORS():
|
||||
# from .extractors import FAVICON_EXTRACTOR
|
||||
|
||||
# return {
|
||||
# 'htmltotext': FAVICON_EXTRACTOR,
|
||||
# }
|
11
archivebox/plugins_extractor/htmltotext/config.py
Normal file
11
archivebox/plugins_extractor/htmltotext/config.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
__package__ = 'plugins_extractor.htmltotext'
|
||||
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
|
||||
class HtmltotextConfig(BaseConfigSet):
|
||||
SAVE_HTMLTOTEXT: bool = True
|
||||
|
||||
|
||||
HTMLTOTEXT_CONFIG = HtmltotextConfig()
|
37
archivebox/plugins_extractor/pocket/__init__.py
Normal file
37
archivebox/plugins_extractor/pocket/__init__.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
__package__ = 'plugins_extractor.pocket'
|
||||
__id__ = 'pocket'
|
||||
__label__ = 'pocket'
|
||||
__version__ = '2024.10.21'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket'
|
||||
__dependencies__ = []
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
__id__: {
|
||||
'id': __id__,
|
||||
'package': __package__,
|
||||
'label': __label__,
|
||||
'version': __version__,
|
||||
'author': __author__,
|
||||
'homepage': __homepage__,
|
||||
'dependencies': __dependencies__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import POCKET_CONFIG
|
||||
|
||||
return {
|
||||
__id__: POCKET_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def ready():
|
||||
from .config import POCKET_CONFIG
|
||||
POCKET_CONFIG.validate()
|
15
archivebox/plugins_extractor/pocket/config.py
Normal file
15
archivebox/plugins_extractor/pocket/config.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
__package__ = 'plugins_extractor.pocket'
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
|
||||
class PocketConfig(BaseConfigSet):
|
||||
POCKET_CONSUMER_KEY: str | None = Field(default=None)
|
||||
POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {<username>: <access_token>, ...}
|
||||
|
||||
|
||||
POCKET_CONFIG = PocketConfig()
|
37
archivebox/plugins_extractor/readwise/__init__.py
Normal file
37
archivebox/plugins_extractor/readwise/__init__.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
__package__ = 'plugins_extractor.readwise'
|
||||
__id__ = 'readwise'
|
||||
__label__ = 'readwise'
|
||||
__version__ = '2024.10.21'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
|
||||
__dependencies__ = []
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
__id__: {
|
||||
'id': __id__,
|
||||
'package': __package__,
|
||||
'label': __label__,
|
||||
'version': __version__,
|
||||
'author': __author__,
|
||||
'homepage': __homepage__,
|
||||
'dependencies': __dependencies__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import READWISE_CONFIG
|
||||
|
||||
return {
|
||||
__id__: READWISE_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def ready():
|
||||
from .config import READWISE_CONFIG
|
||||
READWISE_CONFIG.validate()
|
17
archivebox/plugins_extractor/readwise/config.py
Normal file
17
archivebox/plugins_extractor/readwise/config.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
__package__ = 'plugins_extractor.readwise'
|
||||
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
|
||||
class ReadwiseConfig(BaseConfigSet):
|
||||
READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db")
|
||||
READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {<username>: <access_token>, ...}
|
||||
|
||||
READWISE_CONFIG = ReadwiseConfig()
|
|
@ -14,7 +14,30 @@ class YtdlpConfig(BaseConfigSet):
|
|||
USE_YTDLP: bool = Field(default=True, validation_alias=AliasChoices('USE_YOUTUBEDL', 'SAVE_MEDIA'))
|
||||
|
||||
YTDLP_BINARY: str = Field(default='yt-dlp', alias='YOUTUBEDL_BINARY')
|
||||
YTDLP_EXTRA_ARGS: List[str] = Field(default=[], alias='YOUTUBEDL_EXTRA_ARGS')
|
||||
YTDLP_EXTRA_ARGS: List[str] = Field(default=lambda: [
|
||||
'--restrict-filenames',
|
||||
'--trim-filenames', '128',
|
||||
'--write-description',
|
||||
'--write-info-json',
|
||||
'--write-annotations',
|
||||
'--write-thumbnail',
|
||||
'--no-call-home',
|
||||
'--write-sub',
|
||||
'--write-auto-subs',
|
||||
'--convert-subs=srt',
|
||||
'--yes-playlist',
|
||||
'--continue',
|
||||
# This flag doesn't exist in youtube-dl
|
||||
# only in yt-dlp
|
||||
'--no-abort-on-error',
|
||||
# --ignore-errors must come AFTER
|
||||
# --no-abort-on-error
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/4914
|
||||
'--ignore-errors',
|
||||
'--geo-bypass',
|
||||
'--add-metadata',
|
||||
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(ARCHIVING_CONFIG.MEDIA_MAX_SIZE, ARCHIVING_CONFIG.MEDIA_MAX_SIZE),
|
||||
], alias='YOUTUBEDL_EXTRA_ARGS')
|
||||
|
||||
YTDLP_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
|
||||
YTDLP_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.MEDIA_TIMEOUT)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue