fix docs build for vendored pkgs
Some checks are pending
CodeQL / Analyze (python) (push) Waiting to run
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Deploy static content to Pages / deploy (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Build GitHub Pages website / build (push) Waiting to run
Build GitHub Pages website / deploy (push) Blocked by required conditions
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run

This commit is contained in:
Nick Sweeting 2024-11-12 23:53:34 -08:00
parent f0a7198861
commit ec100bfe29
No known key found for this signature in database
15 changed files with 78 additions and 74 deletions

View file

@ -1,18 +1,19 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_archivedotorg'
from pathlib import Path from pathlib import Path
from typing import Optional, List, Dict, Tuple from typing import Optional, List, Dict, Tuple
from collections import defaultdict from collections import defaultdict
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from archivebox.misc.system import run, chmod_file from archivebox.misc.system import run, chmod_file
from archivebox.misc.util import enforce_types, is_static_file, dedupe from archivebox.misc.util import enforce_types, is_static_file, dedupe
from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
from archivebox.plugins_extractor.curl.config import CURL_CONFIG
from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
from ..logging_util import TimedProgress from abx_plugin_curl.config import CURL_CONFIG
from abx_plugin_curl.binaries import CURL_BINARY
from .config import ARCHIVEDOTORG_CONFIG
def get_output_path(): def get_output_path():

View file

@ -1,18 +1,18 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_chrome'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from archivebox.misc.system import run, chmod_file, atomic_write from archivebox.misc.system import run, chmod_file, atomic_write
from archivebox.misc.util import ( from archivebox.misc.util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
) )
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from plugins_extractor.chrome.config import CHROME_CONFIG from .config import CHROME_CONFIG
from plugins_extractor.chrome.binaries import CHROME_BINARY from .binaries import CHROME_BINARY
def get_output_path(): def get_output_path():

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_chrome'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@ -8,11 +8,11 @@ from archivebox.misc.util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
) )
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from plugins_extractor.chrome.config import CHROME_CONFIG from .config import CHROME_CONFIG
from plugins_extractor.chrome.binaries import CHROME_BINARY from .binaries import CHROME_BINARY
def get_output_path(): def get_output_path():

View file

@ -1,15 +1,15 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_chrome'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from archivebox.misc.system import run, chmod_file from archivebox.misc.system import run, chmod_file
from archivebox.misc.util import enforce_types, is_static_file from archivebox.misc.util import enforce_types, is_static_file
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from plugins_extractor.chrome.config import CHROME_CONFIG from .config import CHROME_CONFIG
from plugins_extractor.chrome.binaries import CHROME_BINARY from .binaries import CHROME_BINARY
def get_output_path(): def get_output_path():

View file

@ -1,19 +1,21 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_curl'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from archivebox.misc.util import ( from archivebox.misc.util import (
enforce_types, enforce_types,
get_headers, get_headers,
dedupe, dedupe,
) )
from archivebox.plugins_extractor.curl.config import CURL_CONFIG
from archivebox.plugins_extractor.curl.binaries import CURL_BINARY from .binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput from .config import CURL_CONFIG
from ..logging_util import TimedProgress
def get_output_path(): def get_output_path():
return 'headers.json' return 'headers.json'

View file

@ -1,14 +1,16 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_favicon'
from pathlib import Path from pathlib import Path
from archivebox.misc.system import chmod_file, run from archivebox.misc.system import chmod_file, run
from archivebox.misc.util import enforce_types, domain, dedupe from archivebox.misc.util import enforce_types, domain, dedupe
from archivebox.plugins_extractor.favicon.config import FAVICON_CONFIG from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput
from archivebox.plugins_extractor.curl.config import CURL_CONFIG from archivebox.logging_util import TimedProgress
from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput from abx_plugin_curl.config import CURL_CONFIG
from ..logging_util import TimedProgress from abx_plugin_curl.binaries import CURL_BINARY
from .config import FAVICON_CONFIG
@enforce_types @enforce_types

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_git'
from pathlib import Path from pathlib import Path
@ -13,8 +13,8 @@ from archivebox.misc.util import (
without_query, without_query,
without_fragment, without_fragment,
) )
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from abx_plugin_git.config import GIT_CONFIG from abx_plugin_git.config import GIT_CONFIG
from abx_plugin_git.binaries import GIT_BINARY from abx_plugin_git.binaries import GIT_BINARY

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_htmltotext'
from html.parser import HTMLParser from html.parser import HTMLParser
import io import io
@ -9,13 +9,12 @@ from archivebox.config import VERSION
from archivebox.config.common import ARCHIVING_CONFIG from archivebox.config.common import ARCHIVING_CONFIG
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from archivebox.misc.util import enforce_types, is_static_file from archivebox.misc.util import enforce_types, is_static_file
from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveError
from archivebox.plugins_extractor.htmltotext.config import HTMLTOTEXT_CONFIG from abx_plugin_title.extractor import get_html
from ..logging_util import TimedProgress
from ..index.schema import Link, ArchiveResult, ArchiveError
from .title import get_html
from .config import HTMLTOTEXT_CONFIG
def get_output_path(): def get_output_path():
return "htmltotext.txt" return "htmltotext.txt"

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_mercury'
from pathlib import Path from pathlib import Path
@ -6,16 +6,16 @@ from subprocess import CompletedProcess
from typing import Optional, List from typing import Optional, List
import json import json
from ..index.schema import Link, ArchiveResult, ArchiveError from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveError
from archivebox.misc.system import run, atomic_write from archivebox.misc.system import run, atomic_write
from archivebox.misc.util import ( from archivebox.misc.util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
) )
from archivebox.plugins_extractor.mercury.config import MERCURY_CONFIG from .config import MERCURY_CONFIG
from archivebox.plugins_extractor.mercury.binaries import MERCURY_BINARY from .binaries import MERCURY_BINARY
from ..logging_util import TimedProgress
def get_output_path(): def get_output_path():

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_readability'
from pathlib import Path from pathlib import Path
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
@ -8,12 +8,12 @@ import json
from archivebox.misc.system import run, atomic_write from archivebox.misc.system import run, atomic_write
from archivebox.misc.util import enforce_types, is_static_file from archivebox.misc.util import enforce_types, is_static_file
from ..index.schema import Link, ArchiveResult, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveError
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from .title import get_html from abx_plugin_title.extractor import get_html
from plugins_extractor.readability.config import READABILITY_CONFIG from .config import READABILITY_CONFIG
from plugins_extractor.readability.binaries import READABILITY_BINARY from .binaries import READABILITY_BINARY
def get_output_path(): def get_output_path():

View file

@ -1,19 +1,19 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_singlefile'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import json import json
from ..index.schema import Link, ArchiveResult, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveError
from archivebox.misc.system import run, chmod_file from archivebox.misc.system import run, chmod_file
from archivebox.misc.util import enforce_types, is_static_file, dedupe from archivebox.misc.util import enforce_types, is_static_file, dedupe
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from plugins_extractor.chrome.config import CHROME_CONFIG from abx_plugin_chrome.config import CHROME_CONFIG
from plugins_extractor.chrome.binaries import CHROME_BINARY from abx_plugin_chrome.binaries import CHROME_BINARY
from plugins_extractor.singlefile.config import SINGLEFILE_CONFIG from .config import SINGLEFILE_CONFIG
from plugins_extractor.singlefile.binaries import SINGLEFILE_BINARY from .binaries import SINGLEFILE_BINARY
def get_output_path(): def get_output_path():

View file

@ -1,21 +1,20 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_title'
import re import re
from html.parser import HTMLParser from html.parser import HTMLParser
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from archivebox.logging_util import TimedProgress
from archivebox.misc.util import ( from archivebox.misc.util import (
enforce_types, enforce_types,
download_url, download_url,
htmldecode, htmldecode,
dedupe, dedupe,
) )
from abx_plugin_curl_extractor.config import CURL_CONFIG from abx_plugin_curl.config import CURL_CONFIG
from abx_plugin_curl_extractor.binaries import CURL_BINARY from abx_plugin_curl.binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress

View file

@ -1,4 +1,4 @@
__package__ = 'abx_plugin_wget_extractor' __package__ = 'abx_plugin_wget'
import re import re
import os import os
@ -7,6 +7,8 @@ from pathlib import Path
from typing import Optional from typing import Optional
from datetime import datetime, timezone from datetime import datetime, timezone
from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from archivebox.misc.system import run, chmod_file from archivebox.misc.system import run, chmod_file
from archivebox.misc.util import ( from archivebox.misc.util import (
enforce_types, enforce_types,
@ -20,8 +22,6 @@ from archivebox.misc.util import (
from .config import WGET_CONFIG from .config import WGET_CONFIG
from .binaries import WGET_BINARY from .binaries import WGET_BINARY
from archivebox.logging_util import TimedProgress
from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
def get_output_path(): def get_output_path():

View file

@ -1,15 +1,16 @@
__package__ = 'archivebox.extractors' __package__ = 'abx_plugin_ytdlp'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from archivebox.misc.system import run, chmod_file from archivebox.misc.system import run, chmod_file
from archivebox.misc.util import enforce_types, is_static_file, dedupe from archivebox.misc.util import enforce_types, is_static_file, dedupe
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress from archivebox.logging_util import TimedProgress
from .config import YTDLP_CONFIG
from .binaries import YTDLP_BINARY
from plugins_extractor.ytdlp.config import YTDLP_CONFIG
from plugins_extractor.ytdlp.binaries import YTDLP_BINARY
def get_output_path(): def get_output_path():
return 'media/' return 'media/'

2
docs

@ -1 +1 @@
Subproject commit 1c69b1544a275938088e7bfd52a9ebd8c21f76fa Subproject commit 02003ab1d212712075cb2fec2c645a9c4a0843d2