From 3234a366d50c02ae22649e7349a5b6af9dea7494 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 19 Jan 2024 07:09:24 -0800 Subject: [PATCH] wip refactoring --- archivebox/core/settings.py | 113 +++++++++++++------------ archivebox/plugins/gallerydl/models.py | 23 +++-- 2 files changed, 79 insertions(+), 57 deletions(-) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 780fba00..4ecdffef 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -64,6 +64,9 @@ INSTALLED_APPS = [ # Plugins 'plugins.replaywebpage', + 'plugins.gallerydl', + # 'plugins.browsertrix', + # 'plugins.playwright', # ... # someday we may have enough plugins to justify dynamic loading: # *(path.parent.name for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/apps.py')),, @@ -71,6 +74,64 @@ INSTALLED_APPS = [ 'django_extensions', ] +################################################################################ +### Staticfile and Template Settings +################################################################################ + +STATIC_URL = '/static/' + +STATIC_ROOT = Path(PACKAGE_DIR) / 'collected_static' + +STATICFILES_DIRS = [ + *([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []), + str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'), + + # Plugins + str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'), + str(Path(PACKAGE_DIR) / 'plugins/gallerydl/static'), + # str(Path(PACKAGE_DIR) / 'plugins/browsertrix/static'), + # str(Path(PACKAGE_DIR) / 'plugins/playwright/static'), + # ... + # someday if there are many more plugins / user-addable plugins: + # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/static')), +] + +MEDIA_URL = '/archive/' +MEDIA_ROOT = OUTPUT_DIR / 'archive' + + +TEMPLATE_DIRS = [ + *([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []), + str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'), + str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'), + str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME), + + # Plugins + str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates'), + str(Path(PACKAGE_DIR) / 'plugins/gallerydl/templates'), + # str(Path(PACKAGE_DIR) / 'plugins/browsertrix/templates'), + # str(Path(PACKAGE_DIR) / 'plugins/playwright/templates'), + # ... + # + # someday if there are many more plugins / user-addable plugins: + # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/templates')), +] + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': TEMPLATE_DIRS, + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] # For usage with https://www.jetadmin.io/integrations/django # INSTALLED_APPS += ['jet_django'] @@ -178,58 +239,6 @@ if DEBUG_TOOLBAR: ] MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware'] -################################################################################ -### Staticfile and Template Settings -################################################################################ - -STATIC_URL = '/static/' - -STATIC_ROOT = Path(PACKAGE_DIR) / 'collected_static' - -STATICFILES_DIRS = [ - *([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []), - str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'), - - # Plugins - str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'), - # ... - # someday if there are many more plugins / user-addable plugins: - # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/static')), -] - -MEDIA_URL = '/archive/' -MEDIA_ROOT = OUTPUT_DIR / 'archive' - - -TEMPLATE_DIRS = [ - *([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []), - str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'), - str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'), - str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME), - - # Plugins - str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates') - # ... - # - # someday if there are many more plugins / user-addable plugins: - # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/templates')), -] - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': TEMPLATE_DIRS, - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] ################################################################################ diff --git a/archivebox/plugins/gallerydl/models.py b/archivebox/plugins/gallerydl/models.py index 662e9347..5e6a153e 100644 --- a/archivebox/plugins/gallerydl/models.py +++ b/archivebox/plugins/gallerydl/models.py @@ -5,6 +5,9 @@ class GalleryDLDependency(SingletonModel): GALLERYDL_ENABLED = models.BooleanField(default=True) GALLERYDL_BINARY = models.CharField(max_length=255, default='gallery-dl') + # GALLERYDL_WORKERS = models.IntegerField(default='{NUM_CORES}') + + def __str__(self): return "GalleryDL Dependency Configuration" @@ -28,6 +31,20 @@ class GalleryDLDependency(SingletonModel): return self.GALLERYDL_ENABLED and self.is_valid + def run(args, pwd, timeout): + errors = None + timer = TimedProgress(timeout, prefix=' ') + try: + proc = run(cmd=[self.bin_path, *args]=True, pwd=pwd, timeout=timeout)run(cmd=[self.bin_path, *args]=True, pwd=pwd, timeout=timeout) + + except Exception as err: + errors = err + finally: + timer.end() + + return proc, timer, errors + + def pretty_version(self): if self.enabled: if self.is_valid: @@ -142,9 +159,8 @@ class GalleryDLExtractor(SingletonModel): ] status, stdout, stderr, output_path = 'failed', '', '', None - timer = TimedProgress(timeout, prefix=' ') try: - proc = run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT, text=True) + proc, timer, errors = self.GALLERYDL_DEPENDENCY.run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT) stdout, stderr = proc.stdout, proc.stderr if 'ERROR: Unsupported URL' in stderr: @@ -154,11 +170,8 @@ class GalleryDLExtractor(SingletonModel): if proc.returncode == 0 and 'finished' in stdout: output_path = extractor_dir / 'index.html' status = 'succeeded' - except Exception as err: stderr += err - finally: - timer.end() num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)