From 55a347c32eba27915effb9529d40a76de1276370 Mon Sep 17 00:00:00 2001 From: Nick Sweeting <git@sweeting.me> Date: Thu, 2 Jan 2025 23:58:59 -0800 Subject: [PATCH] Update file_migrations.py --- archivebox/filestore/file_migrations.py | 154 ++++++++++++------------ 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/archivebox/filestore/file_migrations.py b/archivebox/filestore/file_migrations.py index 2753dfc1..784952d5 100644 --- a/archivebox/filestore/file_migrations.py +++ b/archivebox/filestore/file_migrations.py @@ -1,99 +1,99 @@ -__package__ = 'archivebox.filestore' +# __package__ = 'archivebox.filestore' -import re -from pathlib import Path -from functools import wraps -from enum import Enum +# import re +# from pathlib import Path +# from functools import wraps +# from enum import Enum -import archivebox -from archivebox import CONSTANTS +# import archivebox +# from archivebox import CONSTANTS -from core.models import Snapshot -from .models import File +# from core.models import Snapshot +# from .models import File -class FilestoreVersion(Enum): - v0_7_2 = 'v0.7.2' - v0_8_6 = 'v0.8.6' - v0_9_0 = 'v0.9.0' +# class FilestoreVersion(Enum): +# v0_7_2 = 'v0.7.2' +# v0_8_6 = 'v0.8.6' +# v0_9_0 = 'v0.9.0' -LATEST_VERSION = FilestoreVersion.v0_9_0 +# LATEST_VERSION = FilestoreVersion.v0_9_0 -def migration(src_ver: FilestoreVersion, dst_ver: FilestoreVersion, pattern: str, timeout_seconds: int = 600): - """Decorator for a migration function that will only run on files that match the given pattern and are at the given version.""" - def decorator(migration_func): - @wraps(migration_func) - def wrapper(file: File) -> None: - # skip if this migration doesn't apply to this file - if file.version != src_ver: - return None - if not re.match(pattern, file.file.name): - return None +# def migration(src_ver: FilestoreVersion, dst_ver: FilestoreVersion, pattern: str, timeout_seconds: int = 600): +# """Decorator for a migration function that will only run on files that match the given pattern and are at the given version.""" +# def decorator(migration_func): +# @wraps(migration_func) +# def wrapper(file: File) -> None: +# # skip if this migration doesn't apply to this file +# if file.version != src_ver: +# return None +# if not re.match(pattern, file.file.name): +# return None - # acquire lock, run migration + update version, then unlock - try: - file.acquire_lock(timeout_seconds) - migration_func(file) - file.version = dst_ver - except Exception as e: - # logger.error(f"Failed to migrate file {file.id}: {e}") - print(f"Failed to migrate file {file.id}: {e}") - file.version = src_ver # roll back version to original version - finally: - file.release_lock() - file.save() +# # acquire lock, run migration + update version, then unlock +# try: +# file.acquire_lock(timeout_seconds) +# migration_func(file) +# file.version = dst_ver +# except Exception as e: +# # logger.error(f"Failed to migrate file {file.id}: {e}") +# print(f"Failed to migrate file {file.id}: {e}") +# file.version = src_ver # roll back version to original version +# finally: +# file.release_lock() +# file.save() - wrapper.src_ver = src_ver # type: ignore - wrapper.dst_ver = dst_ver # type: ignore - wrapper.pattern = pattern # type: ignore - wrapper.timeout_seconds = timeout_seconds # type: ignore - return wrapper - return decorator +# wrapper.src_ver = src_ver # type: ignore +# wrapper.dst_ver = dst_ver # type: ignore +# wrapper.pattern = pattern # type: ignore +# wrapper.timeout_seconds = timeout_seconds # type: ignore +# return wrapper +# return decorator -def detect_archiveresult(path: Path) -> 'ArchiveResult' | None: - # archive/1723423525.0/singlefile.html - timestamp = path.parts[1] - snapshot = Snapshot.objects.filter(timestamp=timestamp).last() - if not snapshot: - return +# def detect_archiveresult(path: Path) -> 'ArchiveResult' | None: +# # archive/1723423525.0/singlefile.html +# timestamp = path.parts[1] +# snapshot = Snapshot.objects.filter(timestamp=timestamp).last() +# if not snapshot: +# return - result = snapshot.archiveresult_set.filter(output=path.name).last() - if not result: - return - return result +# result = snapshot.archiveresult_set.filter(output=path.name).last() +# if not result: +# return +# return result -# @hookimpl(hook_name='migrate_file') -@migration(FilestoreVersion.v0_7_2, FilestoreVersion.v0_8_6, r'archive/([0-9\.]+)/.+', timeout_seconds=600) -def migrate_v07_to_v08_singlefile(file: File) -> None: - result = detect_archiveresult(file.relpath) - new_path = result.OUTPUT_DIR / 'index.html' - file.move_to(new_path) +# # @hookimpl(hook_name='migrate_file') +# @migration(FilestoreVersion.v0_7_2, FilestoreVersion.v0_8_6, r'archive/([0-9\.]+)/.+', timeout_seconds=600) +# def migrate_v07_to_v08_singlefile(file: File) -> None: +# result = detect_archiveresult(file.relpath) +# new_path = result.OUTPUT_DIR / 'index.html' +# file.move_to(new_path) -# @hookimpl(hook_name='migrate_file') -@migration(FilestoreVersion.v0_8_6, FilestoreVersion.v0_9_0, r'archive/([0-9\.]+)/singlefile.html', timeout_seconds=600) -def migrate_v08_to_v09_singlefile(file: File) -> None: - result = detect_archiveresult(file.relpath) - new_path = result.OUTPUT_DIR / 'index.html' - file.move_to(new_path) +# # @hookimpl(hook_name='migrate_file') +# @migration(FilestoreVersion.v0_8_6, FilestoreVersion.v0_9_0, r'archive/([0-9\.]+)/singlefile.html', timeout_seconds=600) +# def migrate_v08_to_v09_singlefile(file: File) -> None: +# result = detect_archiveresult(file.relpath) +# new_path = result.OUTPUT_DIR / 'index.html' +# file.move_to(new_path) -def migrate_all_files(target=LATEST_VERSION, batch_size: int = 100): - File.release_expired_locks() +# def migrate_all_files(target=LATEST_VERSION, batch_size: int = 100): +# File.release_expired_locks() - pending_files = ( - File.objects - .filter(status='unlocked') - .exclude(version=target) - .iterator(chunk_size=batch_size) - ) +# pending_files = ( +# File.objects +# .filter(status='unlocked') +# .exclude(version=target) +# .iterator(chunk_size=batch_size) +# ) - for file in pending_files: - try: - archivebox.pm.hook.migrate_file(file=file) - except Exception as e: - print(f"Failed to migrate file {file.id}: {e}") +# for file in pending_files: +# try: +# archivebox.pm.hook.migrate_file(file=file) +# except Exception as e: +# print(f"Failed to migrate file {file.id}: {e}")