mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
hardcode EXTRACTOR_CHOICES to prevent nondeterministic migrations
This commit is contained in:
parent
0a5b22700c
commit
09553d8340
5 changed files with 44 additions and 19 deletions
|
@ -180,12 +180,8 @@ class SnapshotActionForm(ActionForm):
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: allow selecting actions for specific extractors? is this useful?
|
# TODO: allow selecting actions for specific extractors? is this useful?
|
||||||
# EXTRACTOR_CHOICES = [
|
|
||||||
# (name, name.title())
|
|
||||||
# for name, _, _ in get_default_archive_methods()
|
|
||||||
# ]
|
|
||||||
# extractor = forms.ChoiceField(
|
# extractor = forms.ChoiceField(
|
||||||
# choices=EXTRACTOR_CHOICES,
|
# choices=ArchiveResult.EXTRACTOR_CHOICES,
|
||||||
# required=False,
|
# required=False,
|
||||||
# widget=forms.MultileChoiceField(attrs={'class': "form-control"})
|
# widget=forms.MultileChoiceField(attrs={'class': "form-control"})
|
||||||
# )
|
# )
|
||||||
|
|
|
@ -38,6 +38,21 @@ class Migration(migrations.Migration):
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='archiveresult',
|
model_name='archiveresult',
|
||||||
name='extractor',
|
name='extractor',
|
||||||
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
|
field=models.CharField(choices=(
|
||||||
|
('htmltotext', 'htmltotext'),
|
||||||
|
('git', 'git'),
|
||||||
|
('singlefile', 'singlefile'),
|
||||||
|
('media', 'media'),
|
||||||
|
('archive_org', 'archive_org'),
|
||||||
|
('readability', 'readability'),
|
||||||
|
('mercury', 'mercury'),
|
||||||
|
('favicon', 'favicon'),
|
||||||
|
('pdf', 'pdf'),
|
||||||
|
('headers', 'headers'),
|
||||||
|
('screenshot', 'screenshot'),
|
||||||
|
('dom', 'dom'),
|
||||||
|
('title', 'title'),
|
||||||
|
('wget', 'wget'),
|
||||||
|
), max_length=32),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -28,13 +28,6 @@ from ..index.html import snapshot_icons
|
||||||
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
||||||
|
|
||||||
|
|
||||||
EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
|
|
||||||
STATUS_CHOICES = [
|
|
||||||
("succeeded", "succeeded"),
|
|
||||||
("failed", "failed"),
|
|
||||||
("skipped", "skipped")
|
|
||||||
]
|
|
||||||
|
|
||||||
def rand_int_id():
|
def rand_int_id():
|
||||||
return random.getrandbits(32)
|
return random.getrandbits(32)
|
||||||
|
|
||||||
|
@ -376,7 +369,28 @@ class ArchiveResult(ABIDModel):
|
||||||
abid_uri_src = 'self.snapshot.url'
|
abid_uri_src = 'self.snapshot.url'
|
||||||
abid_subtype_src = 'self.extractor'
|
abid_subtype_src = 'self.extractor'
|
||||||
abid_rand_src = 'self.old_id'
|
abid_rand_src = 'self.old_id'
|
||||||
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
|
||||||
|
EXTRACTOR_CHOICES = (
|
||||||
|
('htmltotext', 'htmltotext'),
|
||||||
|
('git', 'git'),
|
||||||
|
('singlefile', 'singlefile'),
|
||||||
|
('media', 'media'),
|
||||||
|
('archive_org', 'archive_org'),
|
||||||
|
('readability', 'readability'),
|
||||||
|
('mercury', 'mercury'),
|
||||||
|
('favicon', 'favicon'),
|
||||||
|
('pdf', 'pdf'),
|
||||||
|
('headers', 'headers'),
|
||||||
|
('screenshot', 'screenshot'),
|
||||||
|
('dom', 'dom'),
|
||||||
|
('title', 'title'),
|
||||||
|
('wget', 'wget'),
|
||||||
|
)
|
||||||
|
STATUS_CHOICES = [
|
||||||
|
("succeeded", "succeeded"),
|
||||||
|
("failed", "failed"),
|
||||||
|
("skipped", "skipped")
|
||||||
|
]
|
||||||
|
|
||||||
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
|
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
||||||
|
|
||||||
def calc_snapshot_icons():
|
def calc_snapshot_icons():
|
||||||
from core.models import EXTRACTOR_CHOICES
|
from core.models import ArchiveResult
|
||||||
# start = datetime.now(timezone.utc)
|
# start = datetime.now(timezone.utc)
|
||||||
|
|
||||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
||||||
|
@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
|
||||||
# Missing specific entry for WARC
|
# Missing specific entry for WARC
|
||||||
|
|
||||||
extractor_outputs = defaultdict(lambda: None)
|
extractor_outputs = defaultdict(lambda: None)
|
||||||
for extractor, _ in EXTRACTOR_CHOICES:
|
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
|
||||||
for result in archive_results:
|
for result in archive_results:
|
||||||
if result.extractor == extractor and result:
|
if result.extractor == extractor and result:
|
||||||
extractor_outputs[extractor] = result
|
extractor_outputs[extractor] = result
|
||||||
|
|
||||||
for extractor, _ in EXTRACTOR_CHOICES:
|
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
|
||||||
if extractor not in exclude:
|
if extractor not in exclude:
|
||||||
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
|
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
|
||||||
|
|
|
@ -529,8 +529,8 @@ def log_shell_welcome_msg():
|
||||||
from .cli import list_subcommands
|
from .cli import list_subcommands
|
||||||
|
|
||||||
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
||||||
print('{green}from archivebox.core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
||||||
print('{green}from archivebox.cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
|
print('{green}from cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
|
||||||
print()
|
print()
|
||||||
print('[i] Welcome to the ArchiveBox Shell!')
|
print('[i] Welcome to the ArchiveBox Shell!')
|
||||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue