mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-09 12:21:57 -04:00
Update file_migrations.py
Some checks failed
Build Pip package / build (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Deploy static content to Pages / deploy (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
Some checks failed
Build Pip package / build (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Deploy static content to Pages / deploy (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
This commit is contained in:
parent
a851ad4c87
commit
55a347c32e
1 changed files with 77 additions and 77 deletions
|
@ -1,99 +1,99 @@
|
|||
__package__ = 'archivebox.filestore'
|
||||
# __package__ = 'archivebox.filestore'
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from functools import wraps
|
||||
from enum import Enum
|
||||
# import re
|
||||
# from pathlib import Path
|
||||
# from functools import wraps
|
||||
# from enum import Enum
|
||||
|
||||
|
||||
import archivebox
|
||||
from archivebox import CONSTANTS
|
||||
# import archivebox
|
||||
# from archivebox import CONSTANTS
|
||||
|
||||
from core.models import Snapshot
|
||||
from .models import File
|
||||
# from core.models import Snapshot
|
||||
# from .models import File
|
||||
|
||||
|
||||
class FilestoreVersion(Enum):
|
||||
v0_7_2 = 'v0.7.2'
|
||||
v0_8_6 = 'v0.8.6'
|
||||
v0_9_0 = 'v0.9.0'
|
||||
# class FilestoreVersion(Enum):
|
||||
# v0_7_2 = 'v0.7.2'
|
||||
# v0_8_6 = 'v0.8.6'
|
||||
# v0_9_0 = 'v0.9.0'
|
||||
|
||||
LATEST_VERSION = FilestoreVersion.v0_9_0
|
||||
# LATEST_VERSION = FilestoreVersion.v0_9_0
|
||||
|
||||
|
||||
def migration(src_ver: FilestoreVersion, dst_ver: FilestoreVersion, pattern: str, timeout_seconds: int = 600):
|
||||
"""Decorator for a migration function that will only run on files that match the given pattern and are at the given version."""
|
||||
def decorator(migration_func):
|
||||
@wraps(migration_func)
|
||||
def wrapper(file: File) -> None:
|
||||
# skip if this migration doesn't apply to this file
|
||||
if file.version != src_ver:
|
||||
return None
|
||||
if not re.match(pattern, file.file.name):
|
||||
return None
|
||||
# def migration(src_ver: FilestoreVersion, dst_ver: FilestoreVersion, pattern: str, timeout_seconds: int = 600):
|
||||
# """Decorator for a migration function that will only run on files that match the given pattern and are at the given version."""
|
||||
# def decorator(migration_func):
|
||||
# @wraps(migration_func)
|
||||
# def wrapper(file: File) -> None:
|
||||
# # skip if this migration doesn't apply to this file
|
||||
# if file.version != src_ver:
|
||||
# return None
|
||||
# if not re.match(pattern, file.file.name):
|
||||
# return None
|
||||
|
||||
# acquire lock, run migration + update version, then unlock
|
||||
try:
|
||||
file.acquire_lock(timeout_seconds)
|
||||
migration_func(file)
|
||||
file.version = dst_ver
|
||||
except Exception as e:
|
||||
# logger.error(f"Failed to migrate file {file.id}: {e}")
|
||||
print(f"Failed to migrate file {file.id}: {e}")
|
||||
file.version = src_ver # roll back version to original version
|
||||
finally:
|
||||
file.release_lock()
|
||||
file.save()
|
||||
# # acquire lock, run migration + update version, then unlock
|
||||
# try:
|
||||
# file.acquire_lock(timeout_seconds)
|
||||
# migration_func(file)
|
||||
# file.version = dst_ver
|
||||
# except Exception as e:
|
||||
# # logger.error(f"Failed to migrate file {file.id}: {e}")
|
||||
# print(f"Failed to migrate file {file.id}: {e}")
|
||||
# file.version = src_ver # roll back version to original version
|
||||
# finally:
|
||||
# file.release_lock()
|
||||
# file.save()
|
||||
|
||||
wrapper.src_ver = src_ver # type: ignore
|
||||
wrapper.dst_ver = dst_ver # type: ignore
|
||||
wrapper.pattern = pattern # type: ignore
|
||||
wrapper.timeout_seconds = timeout_seconds # type: ignore
|
||||
return wrapper
|
||||
return decorator
|
||||
# wrapper.src_ver = src_ver # type: ignore
|
||||
# wrapper.dst_ver = dst_ver # type: ignore
|
||||
# wrapper.pattern = pattern # type: ignore
|
||||
# wrapper.timeout_seconds = timeout_seconds # type: ignore
|
||||
# return wrapper
|
||||
# return decorator
|
||||
|
||||
def detect_archiveresult(path: Path) -> 'ArchiveResult' | None:
|
||||
# archive/1723423525.0/singlefile.html
|
||||
timestamp = path.parts[1]
|
||||
snapshot = Snapshot.objects.filter(timestamp=timestamp).last()
|
||||
if not snapshot:
|
||||
return
|
||||
# def detect_archiveresult(path: Path) -> 'ArchiveResult' | None:
|
||||
# # archive/1723423525.0/singlefile.html
|
||||
# timestamp = path.parts[1]
|
||||
# snapshot = Snapshot.objects.filter(timestamp=timestamp).last()
|
||||
# if not snapshot:
|
||||
# return
|
||||
|
||||
result = snapshot.archiveresult_set.filter(output=path.name).last()
|
||||
if not result:
|
||||
return
|
||||
return result
|
||||
# result = snapshot.archiveresult_set.filter(output=path.name).last()
|
||||
# if not result:
|
||||
# return
|
||||
# return result
|
||||
|
||||
|
||||
# @hookimpl(hook_name='migrate_file')
|
||||
@migration(FilestoreVersion.v0_7_2, FilestoreVersion.v0_8_6, r'archive/([0-9\.]+)/.+', timeout_seconds=600)
|
||||
def migrate_v07_to_v08_singlefile(file: File) -> None:
|
||||
result = detect_archiveresult(file.relpath)
|
||||
new_path = result.OUTPUT_DIR / 'index.html'
|
||||
file.move_to(new_path)
|
||||
# # @hookimpl(hook_name='migrate_file')
|
||||
# @migration(FilestoreVersion.v0_7_2, FilestoreVersion.v0_8_6, r'archive/([0-9\.]+)/.+', timeout_seconds=600)
|
||||
# def migrate_v07_to_v08_singlefile(file: File) -> None:
|
||||
# result = detect_archiveresult(file.relpath)
|
||||
# new_path = result.OUTPUT_DIR / 'index.html'
|
||||
# file.move_to(new_path)
|
||||
|
||||
# @hookimpl(hook_name='migrate_file')
|
||||
@migration(FilestoreVersion.v0_8_6, FilestoreVersion.v0_9_0, r'archive/([0-9\.]+)/singlefile.html', timeout_seconds=600)
|
||||
def migrate_v08_to_v09_singlefile(file: File) -> None:
|
||||
result = detect_archiveresult(file.relpath)
|
||||
new_path = result.OUTPUT_DIR / 'index.html'
|
||||
file.move_to(new_path)
|
||||
# # @hookimpl(hook_name='migrate_file')
|
||||
# @migration(FilestoreVersion.v0_8_6, FilestoreVersion.v0_9_0, r'archive/([0-9\.]+)/singlefile.html', timeout_seconds=600)
|
||||
# def migrate_v08_to_v09_singlefile(file: File) -> None:
|
||||
# result = detect_archiveresult(file.relpath)
|
||||
# new_path = result.OUTPUT_DIR / 'index.html'
|
||||
# file.move_to(new_path)
|
||||
|
||||
|
||||
|
||||
|
||||
def migrate_all_files(target=LATEST_VERSION, batch_size: int = 100):
|
||||
File.release_expired_locks()
|
||||
# def migrate_all_files(target=LATEST_VERSION, batch_size: int = 100):
|
||||
# File.release_expired_locks()
|
||||
|
||||
pending_files = (
|
||||
File.objects
|
||||
.filter(status='unlocked')
|
||||
.exclude(version=target)
|
||||
.iterator(chunk_size=batch_size)
|
||||
)
|
||||
# pending_files = (
|
||||
# File.objects
|
||||
# .filter(status='unlocked')
|
||||
# .exclude(version=target)
|
||||
# .iterator(chunk_size=batch_size)
|
||||
# )
|
||||
|
||||
for file in pending_files:
|
||||
try:
|
||||
archivebox.pm.hook.migrate_file(file=file)
|
||||
except Exception as e:
|
||||
print(f"Failed to migrate file {file.id}: {e}")
|
||||
# for file in pending_files:
|
||||
# try:
|
||||
# archivebox.pm.hook.migrate_file(file=file)
|
||||
# except Exception as e:
|
||||
# print(f"Failed to migrate file {file.id}: {e}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue