From d9fd1e38111b91090b10f1bf73e5b67f7151fffe Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 10:51:57 -0500 Subject: [PATCH] Add selector for archive modes --- archivebox/core/forms.py | 20 +++++++++++++++++++- archivebox/core/views.py | 5 +++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 8f48929b..4905464d 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -10,10 +10,28 @@ CHOICES = ( ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'), ) +ARCHIVE_METHODS = [ + ('title', 'title'), + ('favicon', 'favicon'), + ('wget', 'wget'), + ('warc', 'warc'), + ('pdf', 'pdf'), + ('screenshot', 'screenshot'), + ('dom', 'dom'), + ('singlefile', 'singlefile'), + ('git', 'git'), + ('media', 'media'), + ('archive_org', 'archive_org'), +] + + class AddLinkForm(forms.Form): url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True) depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0') - + archiveMethods = forms.MultipleChoiceField( + required=False, + widget=forms.SelectMultiple, + choices=ARCHIVE_METHODS,) class TagWidgetMixin: def format_value(self, value): diff --git a/archivebox/core/views.py b/archivebox/core/views.py index dfea7700..5faf3a29 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -138,12 +138,17 @@ class AddView(UserPassesTestMixin, FormView): url = form.cleaned_data["url"] print(f'[+] Adding URL: {url}') depth = 0 if form.cleaned_data["depth"] == "0" else 1 + extractors = "" + for extractor in form.cleaned_data["archiveMethods"]: + extractors = extractors + extractor + ',' input_kwargs = { "urls": url, "depth": depth, "update_all": False, "out_dir": OUTPUT_DIR, } + if extractors: + input_kwargs.append("extractors": extractors) add_stdout = StringIO() with redirect_stdout(add_stdout): add(**input_kwargs)