mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-22 10:55:18 -04:00
Merge pull request #582 from cdvv7788/remove-setup-django
This commit is contained in:
commit
83d9a0878e
8 changed files with 7 additions and 21 deletions
|
@ -63,7 +63,7 @@ def run_subcommand(subcommand: str,
|
||||||
|
|
||||||
if subcommand not in meta_cmds:
|
if subcommand not in meta_cmds:
|
||||||
from ..config import setup_django
|
from ..config import setup_django
|
||||||
setup_django(in_memory_db=subcommand in fake_db)
|
setup_django(in_memory_db=subcommand in fake_db, check_db=subcommand in archive_cmds)
|
||||||
|
|
||||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||||
|
|
|
@ -20,7 +20,6 @@ from ..config import (
|
||||||
CURL_ARGS,
|
CURL_ARGS,
|
||||||
CURL_VERSION,
|
CURL_VERSION,
|
||||||
CURL_USER_AGENT,
|
CURL_USER_AGENT,
|
||||||
setup_django,
|
|
||||||
)
|
)
|
||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
@ -81,7 +80,6 @@ def extract_title_with_regex(html):
|
||||||
def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||||
"""try to guess the page's title from its content"""
|
"""try to guess the page's title from its content"""
|
||||||
|
|
||||||
setup_django(out_dir=out_dir)
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
|
|
||||||
output: ArchiveOutput = None
|
output: ArchiveOutput = None
|
||||||
|
|
|
@ -18,7 +18,6 @@ from ..util import (
|
||||||
ExtendedEncoder,
|
ExtendedEncoder,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
setup_django,
|
|
||||||
ARCHIVE_DIR_NAME,
|
ARCHIVE_DIR_NAME,
|
||||||
SQL_INDEX_FILENAME,
|
SQL_INDEX_FILENAME,
|
||||||
JSON_INDEX_FILENAME,
|
JSON_INDEX_FILENAME,
|
||||||
|
@ -243,16 +242,9 @@ def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
|
|
||||||
log_indexing_process_finished()
|
log_indexing_process_finished()
|
||||||
|
|
||||||
@enforce_types
|
|
||||||
def get_empty_snapshot_queryset(out_dir: Path=OUTPUT_DIR):
|
|
||||||
setup_django(out_dir, check_db=True)
|
|
||||||
from core.models import Snapshot
|
|
||||||
return Snapshot.objects.none()
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
|
def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
|
||||||
"""parse and load existing index with any new links from import_path merged in"""
|
"""parse and load existing index with any new links from import_path merged in"""
|
||||||
setup_django(out_dir, check_db=True)
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
try:
|
try:
|
||||||
return Snapshot.objects.all()
|
return Snapshot.objects.all()
|
||||||
|
@ -390,8 +382,9 @@ def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type:
|
||||||
color='red',
|
color='red',
|
||||||
)
|
)
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
from core.models import Snapshot
|
||||||
|
|
||||||
qsearch = get_empty_snapshot_queryset()
|
qsearch = Snapshot.objects.none()
|
||||||
for pattern in filter_patterns:
|
for pattern in filter_patterns:
|
||||||
try:
|
try:
|
||||||
qsearch |= query_search_index(pattern)
|
qsearch |= query_search_index(pattern)
|
||||||
|
|
|
@ -23,7 +23,6 @@ from ..config import (
|
||||||
GIT_SHA,
|
GIT_SHA,
|
||||||
FOOTER_INFO,
|
FOOTER_INFO,
|
||||||
HTML_INDEX_FILENAME,
|
HTML_INDEX_FILENAME,
|
||||||
setup_django,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
MAIN_INDEX_TEMPLATE = 'main_index.html'
|
MAIN_INDEX_TEMPLATE = 'main_index.html'
|
||||||
|
@ -111,7 +110,6 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
||||||
"""render a given html template string with the given template content"""
|
"""render a given html template string with the given template content"""
|
||||||
from django.template.loader import render_to_string
|
from django.template.loader import render_to_string
|
||||||
|
|
||||||
setup_django(check_db=False)
|
|
||||||
return render_to_string(template, context)
|
return render_to_string(template, context)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,6 @@ from .util import enforce_types # type: ignore
|
||||||
from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
|
from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
|
||||||
from .index import (
|
from .index import (
|
||||||
load_main_index,
|
load_main_index,
|
||||||
get_empty_snapshot_queryset,
|
|
||||||
parse_links_from_source,
|
parse_links_from_source,
|
||||||
dedupe_links,
|
dedupe_links,
|
||||||
write_main_index,
|
write_main_index,
|
||||||
|
@ -265,6 +264,7 @@ def run(subcommand: str,
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||||
|
from core.models import Snapshot
|
||||||
Path(out_dir).mkdir(exist_ok=True)
|
Path(out_dir).mkdir(exist_ok=True)
|
||||||
is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
|
is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
|
||||||
|
|
||||||
|
@ -335,7 +335,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
print()
|
print()
|
||||||
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
|
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
|
||||||
|
|
||||||
all_links = get_empty_snapshot_queryset()
|
all_links = Snapshot.objects.none()
|
||||||
pending_links: Dict[str, Link] = {}
|
pending_links: Dict[str, Link] = {}
|
||||||
|
|
||||||
if existing_index:
|
if existing_index:
|
||||||
|
|
|
@ -6,7 +6,7 @@ from django.db.models import QuerySet
|
||||||
|
|
||||||
from archivebox.index.schema import Link
|
from archivebox.index.schema import Link
|
||||||
from archivebox.util import enforce_types
|
from archivebox.util import enforce_types
|
||||||
from archivebox.config import setup_django,stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
|
from archivebox.config import stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
|
||||||
|
|
||||||
from .utils import get_indexable_content, log_index_started
|
from .utils import get_indexable_content, log_index_started
|
||||||
|
|
||||||
|
@ -49,7 +49,6 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
||||||
setup_django(out_dir, check_db=True)
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
|
|
||||||
if search_backend_enabled():
|
if search_backend_enabled():
|
||||||
|
|
|
@ -2,7 +2,7 @@ import re
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
from typing import List, Generator
|
from typing import List, Generator
|
||||||
|
|
||||||
from archivebox.config import setup_django, ARCHIVE_DIR
|
from archivebox.config import ARCHIVE_DIR
|
||||||
from archivebox.util import enforce_types
|
from archivebox.util import enforce_types
|
||||||
|
|
||||||
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
|
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
|
||||||
|
@ -30,7 +30,6 @@ def search(text: str) -> List[str]:
|
||||||
if is_rg_installed.returncode:
|
if is_rg_installed.returncode:
|
||||||
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
|
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
|
||||||
|
|
||||||
setup_django(check_db=True)
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
|
|
||||||
rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)]
|
rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)]
|
||||||
|
|
|
@ -20,7 +20,6 @@ def test_oneshot_command_saves_page_in_right_folder(tmp_path, disable_extractors
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
env=disable_extractors_dict,
|
env=disable_extractors_dict,
|
||||||
)
|
)
|
||||||
print(process.stdout)
|
|
||||||
items = ' '.join([str(x) for x in tmp_path.iterdir()])
|
items = ' '.join([str(x) for x in tmp_path.iterdir()])
|
||||||
current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
|
current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
|
||||||
assert "index.json" in items
|
assert "index.json" in items
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue