diff --git a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py index ff7297cd..c030fde5 100644 --- a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py +++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py @@ -1,18 +1,19 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_archivedotorg' from pathlib import Path from typing import Optional, List, Dict, Tuple from collections import defaultdict -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.misc.system import run, chmod_file from archivebox.misc.util import enforce_types, is_static_file, dedupe -from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG -from archivebox.plugins_extractor.curl.config import CURL_CONFIG -from archivebox.plugins_extractor.curl.binaries import CURL_BINARY -from ..logging_util import TimedProgress +from abx_plugin_curl.config import CURL_CONFIG +from abx_plugin_curl.binaries import CURL_BINARY + +from .config import ARCHIVEDOTORG_CONFIG def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py index 07057a44..721d7f17 100644 --- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py @@ -1,18 +1,18 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_chrome' from pathlib import Path from typing import Optional -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.misc.system import run, chmod_file, atomic_write from archivebox.misc.util import ( enforce_types, is_static_file, ) -from ..logging_util import TimedProgress +from archivebox.logging_util import TimedProgress -from plugins_extractor.chrome.config import CHROME_CONFIG -from plugins_extractor.chrome.binaries import CHROME_BINARY +from .config import CHROME_CONFIG +from .binaries import CHROME_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py index d3310ba1..cb69544b 100644 --- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_chrome' from pathlib import Path from typing import Optional @@ -8,11 +8,11 @@ from archivebox.misc.util import ( enforce_types, is_static_file, ) -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress -from plugins_extractor.chrome.config import CHROME_CONFIG -from plugins_extractor.chrome.binaries import CHROME_BINARY +from .config import CHROME_CONFIG +from .binaries import CHROME_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py index adc309aa..227d2ad5 100644 --- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py @@ -1,15 +1,15 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_chrome' from pathlib import Path from typing import Optional from archivebox.misc.system import run, chmod_file from archivebox.misc.util import enforce_types, is_static_file -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress -from plugins_extractor.chrome.config import CHROME_CONFIG -from plugins_extractor.chrome.binaries import CHROME_BINARY +from .config import CHROME_CONFIG +from .binaries import CHROME_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py index e49907cb..335d5678 100644 --- a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py +++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py @@ -1,19 +1,21 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_curl' from pathlib import Path from typing import Optional +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput from archivebox.misc.system import atomic_write from archivebox.misc.util import ( enforce_types, get_headers, dedupe, ) -from archivebox.plugins_extractor.curl.config import CURL_CONFIG -from archivebox.plugins_extractor.curl.binaries import CURL_BINARY -from ..index.schema import Link, ArchiveResult, ArchiveOutput -from ..logging_util import TimedProgress + +from .binaries import CURL_BINARY +from .config import CURL_CONFIG + def get_output_path(): return 'headers.json' diff --git a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py index 09cfae44..e77a61ce 100644 --- a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py +++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py @@ -1,14 +1,16 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_favicon' from pathlib import Path from archivebox.misc.system import chmod_file, run from archivebox.misc.util import enforce_types, domain, dedupe -from archivebox.plugins_extractor.favicon.config import FAVICON_CONFIG -from archivebox.plugins_extractor.curl.config import CURL_CONFIG -from archivebox.plugins_extractor.curl.binaries import CURL_BINARY -from ..index.schema import Link, ArchiveResult, ArchiveOutput -from ..logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput +from archivebox.logging_util import TimedProgress + +from abx_plugin_curl.config import CURL_CONFIG +from abx_plugin_curl.binaries import CURL_BINARY + +from .config import FAVICON_CONFIG @enforce_types diff --git a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py index 128ba0e7..713b124e 100644 --- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_git' from pathlib import Path @@ -13,8 +13,8 @@ from archivebox.misc.util import ( without_query, without_fragment, ) -from ..logging_util import TimedProgress -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from abx_plugin_git.config import GIT_CONFIG from abx_plugin_git.binaries import GIT_BINARY diff --git a/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py index 2eb7d424..6faac6b3 100644 --- a/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py +++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_htmltotext' from html.parser import HTMLParser import io @@ -9,13 +9,12 @@ from archivebox.config import VERSION from archivebox.config.common import ARCHIVING_CONFIG from archivebox.misc.system import atomic_write from archivebox.misc.util import enforce_types, is_static_file +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveError -from archivebox.plugins_extractor.htmltotext.config import HTMLTOTEXT_CONFIG - -from ..logging_util import TimedProgress -from ..index.schema import Link, ArchiveResult, ArchiveError -from .title import get_html +from abx_plugin_title.extractor import get_html +from .config import HTMLTOTEXT_CONFIG def get_output_path(): return "htmltotext.txt" diff --git a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py index 08be60ad..5bb6fead 100644 --- a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py +++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_mercury' from pathlib import Path @@ -6,16 +6,16 @@ from subprocess import CompletedProcess from typing import Optional, List import json -from ..index.schema import Link, ArchiveResult, ArchiveError +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveError from archivebox.misc.system import run, atomic_write from archivebox.misc.util import ( enforce_types, is_static_file, ) -from archivebox.plugins_extractor.mercury.config import MERCURY_CONFIG -from archivebox.plugins_extractor.mercury.binaries import MERCURY_BINARY +from .config import MERCURY_CONFIG +from .binaries import MERCURY_BINARY -from ..logging_util import TimedProgress def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py index ccfde023..e17349df 100644 --- a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py +++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_readability' from pathlib import Path from tempfile import NamedTemporaryFile @@ -8,12 +8,12 @@ import json from archivebox.misc.system import run, atomic_write from archivebox.misc.util import enforce_types, is_static_file -from ..index.schema import Link, ArchiveResult, ArchiveError -from ..logging_util import TimedProgress -from .title import get_html +from archivebox.index.schema import Link, ArchiveResult, ArchiveError +from archivebox.logging_util import TimedProgress +from abx_plugin_title.extractor import get_html -from plugins_extractor.readability.config import READABILITY_CONFIG -from plugins_extractor.readability.binaries import READABILITY_BINARY +from .config import READABILITY_CONFIG +from .binaries import READABILITY_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py index 6988fd25..361f996c 100644 --- a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py @@ -1,19 +1,19 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_singlefile' from pathlib import Path from typing import Optional import json -from ..index.schema import Link, ArchiveResult, ArchiveError +from archivebox.index.schema import Link, ArchiveResult, ArchiveError from archivebox.misc.system import run, chmod_file from archivebox.misc.util import enforce_types, is_static_file, dedupe -from ..logging_util import TimedProgress +from archivebox.logging_util import TimedProgress -from plugins_extractor.chrome.config import CHROME_CONFIG -from plugins_extractor.chrome.binaries import CHROME_BINARY -from plugins_extractor.singlefile.config import SINGLEFILE_CONFIG -from plugins_extractor.singlefile.binaries import SINGLEFILE_BINARY +from abx_plugin_chrome.config import CHROME_CONFIG +from abx_plugin_chrome.binaries import CHROME_BINARY +from .config import SINGLEFILE_CONFIG +from .binaries import SINGLEFILE_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py index a8ef52cf..ec4507df 100644 --- a/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py +++ b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py @@ -1,21 +1,20 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_title' import re from html.parser import HTMLParser from pathlib import Path from typing import Optional +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress from archivebox.misc.util import ( enforce_types, download_url, htmldecode, dedupe, ) -from abx_plugin_curl_extractor.config import CURL_CONFIG -from abx_plugin_curl_extractor.binaries import CURL_BINARY - -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..logging_util import TimedProgress +from abx_plugin_curl.config import CURL_CONFIG +from abx_plugin_curl.binaries import CURL_BINARY diff --git a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py index caaaeaf6..db589dd8 100644 --- a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py @@ -1,4 +1,4 @@ -__package__ = 'abx_plugin_wget_extractor' +__package__ = 'abx_plugin_wget' import re import os @@ -7,6 +7,8 @@ from pathlib import Path from typing import Optional from datetime import datetime, timezone +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.misc.system import run, chmod_file from archivebox.misc.util import ( enforce_types, @@ -20,8 +22,6 @@ from archivebox.misc.util import ( from .config import WGET_CONFIG from .binaries import WGET_BINARY -from archivebox.logging_util import TimedProgress -from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py index c1f3bbc9..03df93d1 100644 --- a/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py +++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py @@ -1,15 +1,16 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_ytdlp' from pathlib import Path from typing import Optional from archivebox.misc.system import run, chmod_file from archivebox.misc.util import enforce_types, is_static_file, dedupe -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from archivebox.logging_util import TimedProgress + +from .config import YTDLP_CONFIG +from .binaries import YTDLP_BINARY -from plugins_extractor.ytdlp.config import YTDLP_CONFIG -from plugins_extractor.ytdlp.binaries import YTDLP_BINARY def get_output_path(): return 'media/' diff --git a/docs b/docs index 1c69b154..02003ab1 160000 --- a/docs +++ b/docs @@ -1 +1 @@ -Subproject commit 1c69b1544a275938088e7bfd52a9ebd8c21f76fa +Subproject commit 02003ab1d212712075cb2fec2c645a9c4a0843d2