mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
121 lines
3.8 KiB
Python
121 lines
3.8 KiB
Python
from django.db import models
|
|
from django.utils.functional import cached_property
|
|
|
|
from solo.models import SingletonModel
|
|
|
|
from archivebox.plugins.defaults.models import (
|
|
ArchiveBoxDefaultDependency,
|
|
ArchiveBoxDefaultExtractor,
|
|
BashEnvironmentDependency,
|
|
PipEnvironmentDependency,
|
|
)
|
|
|
|
|
|
class GalleryDLDependency(ArchiveBoxDefaultDependency, SingletonModel):
|
|
NAME = 'GALLERYDL'
|
|
LABEL = "GalleryDL"
|
|
REQUIRED = False
|
|
|
|
PARENT_DEPENDENCIES = [
|
|
BashEnvironmentDependency,
|
|
PipEnvironmentDependency,
|
|
]
|
|
|
|
BIN_DEPENDENCIES = ['gallery-dl']
|
|
APT_DEPENDENCIES = []
|
|
BREW_DEPENDENCIES = []
|
|
PIP_PACKAGES = ['gallery-dl']
|
|
NPM_PACKAGES = []
|
|
|
|
DEFAULT_BINARY = 'gallery-dl'
|
|
DEFAULT_START_CMD = None
|
|
DEFAULT_ARGS = []
|
|
VERSION_CMD = '{BINARY} --version'
|
|
|
|
ENABLED = models.BooleanField(default=True)
|
|
BINARY = models.CharField(max_length=255, default='gallery-dl')
|
|
|
|
WORKERS = models.IntegerField(default='1')
|
|
|
|
|
|
class GalleryDLExtractor(ArchiveBoxDefaultExtractor, SingletonModel):
|
|
NAME = 'GALLERYDL'
|
|
LABEL = 'gallery-dl'
|
|
|
|
DEPENDENCY = GalleryDLDependency.get_solo()
|
|
|
|
# https://github.com/mikf/gallery-dl
|
|
DEFAULT_CMD = [
|
|
'{DEPENDENCY.BINARY}',
|
|
'{ARGS}'
|
|
'{url}',
|
|
]
|
|
DEFAULT_ARGS = [
|
|
'--timeout', self.TIMEOUT.format(**config),
|
|
'--cookies', self.COOKIES_TXT.format(**config),
|
|
'--user-agent', self.COOKIES_TXT.format(**config),
|
|
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
|
|
]
|
|
|
|
ENABLED = models.BooleanField(default=True)
|
|
|
|
CMD = models.CharField(max_length=255, default=DEFAULT_CMD)
|
|
ARGS = models.CSVField(max_length=255, default=DEFAULT_ARGS)
|
|
|
|
TIMEOUT = models.CharField(max_length=255, default='{TIMEOUT}')
|
|
USER_AGENT = models.CharField(max_length=255, default='{USER_AGENT}')
|
|
COOKIES_TXT = models.CharField(max_length=255, default='{COOKIES_TXT}')
|
|
CHECK_SSL_VALIDITY = models.CharField(default='{CHECK_SSL_VALIDITY}')
|
|
|
|
# @task
|
|
# @requires_config('HOSTNAME', 'TIMEOUT', 'USER_AGENT', 'CHECK_SSL_VALIDITY')
|
|
def extract(self, url: str, out_dir: Path, config: ConfigDict):
|
|
if not self.ENABLED:
|
|
return
|
|
|
|
extractor_dir = self.create_extractor_directory(out_dir)
|
|
|
|
cmd = [
|
|
self.CMD,
|
|
url,
|
|
'--timeout', self.TIMEOUT.format(**config),
|
|
'--cookies', self.COOKIES_TXT.format(**config),
|
|
'--user-agent', self.COOKIES_TXT.format(**config),
|
|
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
|
|
*split_args(self.ARGS.format(**config)),
|
|
]
|
|
|
|
status, stdout, stderr, output_path = 'failed', '', '', None
|
|
try:
|
|
proc, timer, errors = self.DEPENDENCY.run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT)
|
|
stdout, stderr = proc.stdout, proc.stderr
|
|
|
|
if 'ERROR: Unsupported URL' in stderr:
|
|
hints = ('gallery-dl doesnt support this type of url yet',)
|
|
raise ArchiveError('Failed to save gallerydl', hints)
|
|
|
|
if proc.returncode == 0 and 'finished' in stdout:
|
|
output_path = extractor_dir / 'index.html'
|
|
status = 'succeeded'
|
|
except Exception as err:
|
|
stderr += err
|
|
|
|
num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)
|
|
|
|
return ArchiveResult(
|
|
cmd=cmd,
|
|
pwd=str(out_dir),
|
|
cmd_version=self.DEPENDENCY.bin_version,
|
|
cmd_path=self.DEPENDENCY.bin_path,
|
|
cmd_hostname=config.HOSTNAME,
|
|
|
|
output_path=output_path,
|
|
stdout=stdout,
|
|
stderr=stderr,
|
|
status=status,
|
|
|
|
num_bytes=num_bytes,
|
|
num_files=num_files,
|
|
num_dirs=num_dirs,
|
|
**timer.stats,
|
|
)
|