From d9fd1e38111b91090b10f1bf73e5b67f7151fffe Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 10:51:57 -0500 Subject: [PATCH 1/5] Add selector for archive modes --- archivebox/core/forms.py | 20 +++++++++++++++++++- archivebox/core/views.py | 5 +++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 8f48929b..4905464d 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -10,10 +10,28 @@ CHOICES = ( ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'), ) +ARCHIVE_METHODS = [ + ('title', 'title'), + ('favicon', 'favicon'), + ('wget', 'wget'), + ('warc', 'warc'), + ('pdf', 'pdf'), + ('screenshot', 'screenshot'), + ('dom', 'dom'), + ('singlefile', 'singlefile'), + ('git', 'git'), + ('media', 'media'), + ('archive_org', 'archive_org'), +] + + class AddLinkForm(forms.Form): url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True) depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0') - + archiveMethods = forms.MultipleChoiceField( + required=False, + widget=forms.SelectMultiple, + choices=ARCHIVE_METHODS,) class TagWidgetMixin: def format_value(self, value): diff --git a/archivebox/core/views.py b/archivebox/core/views.py index dfea7700..5faf3a29 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -138,12 +138,17 @@ class AddView(UserPassesTestMixin, FormView): url = form.cleaned_data["url"] print(f'[+] Adding URL: {url}') depth = 0 if form.cleaned_data["depth"] == "0" else 1 + extractors = "" + for extractor in form.cleaned_data["archiveMethods"]: + extractors = extractors + extractor + ',' input_kwargs = { "urls": url, "depth": depth, "update_all": False, "out_dir": OUTPUT_DIR, } + if extractors: + input_kwargs.append("extractors": extractors) add_stdout = StringIO() with redirect_stdout(add_stdout): add(**input_kwargs) From 8b0ff2dfee1a6549a7275fe84fbffb2f60ed5fb8 Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 11:08:27 -0500 Subject: [PATCH 2/5] update instead of append --- archivebox/core/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 5faf3a29..a195ea24 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -148,7 +148,7 @@ class AddView(UserPassesTestMixin, FormView): "out_dir": OUTPUT_DIR, } if extractors: - input_kwargs.append("extractors": extractors) + input_kwargs.update({"extractors": extractors}) add_stdout = StringIO() with redirect_stdout(add_stdout): add(**input_kwargs) From 7ce1f631830bc114823191379486ee37bd6f45ee Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 12:44:38 -0500 Subject: [PATCH 3/5] Update archivebox/core/forms.py Format cleanup Co-authored-by: Nick Sweeting --- archivebox/core/forms.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 4905464d..14893d96 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -28,11 +28,11 @@ ARCHIVE_METHODS = [ class AddLinkForm(forms.Form): url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True) depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0') - archiveMethods = forms.MultipleChoiceField( - required=False, - widget=forms.SelectMultiple, - choices=ARCHIVE_METHODS,) - + archive_methods = forms.MultipleChoiceField( + required=False, + widget=forms.SelectMultiple, + choices=ARCHIVE_METHODS, + ) class TagWidgetMixin: def format_value(self, value): if value is not None and not isinstance(value, str): From 35809eab1c09f327c7aee9c66194f4825b795181 Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 12:45:30 -0500 Subject: [PATCH 4/5] Update archivebox/core/views.py Cleaner handling of the archive methods input Co-authored-by: Nick Sweeting --- archivebox/core/views.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index a195ea24..a9578869 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -138,9 +138,7 @@ class AddView(UserPassesTestMixin, FormView): url = form.cleaned_data["url"] print(f'[+] Adding URL: {url}') depth = 0 if form.cleaned_data["depth"] == "0" else 1 - extractors = "" - for extractor in form.cleaned_data["archiveMethods"]: - extractors = extractors + extractor + ',' + extractors = ','.join(form.cleaned_data["archive_methods"]) input_kwargs = { "urls": url, "depth": depth, From 6f462b45d7dd6bc5a0d49a3329c592d32c610b9f Mon Sep 17 00:00:00 2001 From: BlipRanger Date: Thu, 10 Dec 2020 12:46:16 -0500 Subject: [PATCH 5/5] Update archivebox/core/forms.py Cleaner handling of the ARCHIVE_METHODS values Co-authored-by: Nick Sweeting --- archivebox/core/forms.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 14893d96..25d393ad 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -10,18 +10,11 @@ CHOICES = ( ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'), ) +from ..extractors import get_default_archive_methods + ARCHIVE_METHODS = [ - ('title', 'title'), - ('favicon', 'favicon'), - ('wget', 'wget'), - ('warc', 'warc'), - ('pdf', 'pdf'), - ('screenshot', 'screenshot'), - ('dom', 'dom'), - ('singlefile', 'singlefile'), - ('git', 'git'), - ('media', 'media'), - ('archive_org', 'archive_org'), + (name, name) + for name, _, _ in get_default_archive_methods() ]