mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-09 12:21:57 -04:00
add filestore content addressible store draft
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Deploy static content to Pages / deploy (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Deploy static content to Pages / deploy (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
This commit is contained in:
parent
dc0f1b0efc
commit
d192eb5c48
6 changed files with 556 additions and 349 deletions
|
@ -213,6 +213,10 @@ DATABASES = {
|
|||
"NAME": CONSTANTS.QUEUE_DATABASE_FILE,
|
||||
**SQLITE_CONNECTION_OPTIONS,
|
||||
},
|
||||
# "filestore": {
|
||||
# "NAME": CONSTANTS.FILESTORE_DATABASE_FILE,
|
||||
# **SQLITE_CONNECTION_OPTIONS,
|
||||
# },
|
||||
# 'cache': {
|
||||
# 'NAME': CACHE_DB_PATH,
|
||||
# **SQLITE_CONNECTION_OPTIONS,
|
||||
|
@ -266,15 +270,16 @@ class HueyDBRouter:
|
|||
"""
|
||||
|
||||
route_app_labels = {"huey_monitor", "django_huey", "djhuey"}
|
||||
db_name = "queue"
|
||||
|
||||
def db_for_read(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return "queue"
|
||||
return self.db_name
|
||||
return 'default'
|
||||
|
||||
def db_for_write(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return "queue"
|
||||
return self.db_name
|
||||
return 'default'
|
||||
|
||||
def allow_relation(self, obj1, obj2, **hints):
|
||||
|
@ -284,9 +289,39 @@ class HueyDBRouter:
|
|||
|
||||
def allow_migrate(self, db, app_label, model_name=None, **hints):
|
||||
if app_label in self.route_app_labels:
|
||||
return db == "queue"
|
||||
return db == self.db_name
|
||||
return db == "default"
|
||||
|
||||
# class FilestoreDBRouter:
|
||||
# """
|
||||
# A router to store all the File models in the filestore.sqlite3 database.
|
||||
# This data just mirrors what is in the file system, so we want to keep it in a separate database
|
||||
# from the main index database to avoid contention.
|
||||
# """
|
||||
|
||||
# route_app_labels = {"filestore"}
|
||||
# db_name = "filestore"
|
||||
|
||||
# def db_for_read(self, model, **hints):
|
||||
# if model._meta.app_label in self.route_app_labels:
|
||||
# return self.db_name
|
||||
# return 'default'
|
||||
|
||||
# def db_for_write(self, model, **hints):
|
||||
# if model._meta.app_label in self.route_app_labels:
|
||||
# return self.db_name
|
||||
# return 'default'
|
||||
|
||||
# def allow_relation(self, obj1, obj2, **hints):
|
||||
# if obj1._meta.app_label in self.route_app_labels or obj2._meta.app_label in self.route_app_labels:
|
||||
# return obj1._meta.app_label == obj2._meta.app_label
|
||||
# return None
|
||||
|
||||
# def allow_migrate(self, db, app_label, model_name=None, **hints):
|
||||
# if app_label in self.route_app_labels:
|
||||
# return db == self.db_name
|
||||
# return db == "default"
|
||||
|
||||
DATABASE_ROUTERS = ['core.settings.HueyDBRouter']
|
||||
|
||||
CACHES = {
|
||||
|
@ -313,6 +348,13 @@ STORAGES = {
|
|||
"location": ARCHIVE_DIR,
|
||||
},
|
||||
},
|
||||
# "snapshots": {
|
||||
# "BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
# "OPTIONS": {
|
||||
# "base_url": "/snapshots/",
|
||||
# "location": CONSTANTS.SNAPSHOTS_DIR,
|
||||
# },
|
||||
# },
|
||||
# "personas": {
|
||||
# "BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
# "OPTIONS": {
|
||||
|
|
|
@ -1,66 +1,67 @@
|
|||
import mimetypes
|
||||
import uuid
|
||||
# import mimetypes
|
||||
# import uuid
|
||||
|
||||
from django.db import models
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
# from django.db import models
|
||||
# from django.conf import settings
|
||||
# from django.utils import timezone
|
||||
|
||||
from archivebox import DATA_DIR
|
||||
from archivebox.misc.hashing import get_dir_info, hash_file
|
||||
from base_models.abid import DEFAULT_ABID_URI_SALT
|
||||
from base_models.models import ABIDModel, ABIDField, get_or_create_system_user_pk
|
||||
# from archivebox import DATA_DIR
|
||||
# from archivebox.misc.hashing import get_dir_info, hash_file
|
||||
# from base_models.abid import DEFAULT_ABID_URI_SALT
|
||||
# from base_models.models import ABIDModel, ABIDField, get_or_create_system_user_pk
|
||||
|
||||
class File(ABIDModel):
|
||||
abid_prefix = 'fil_'
|
||||
abid_ts_src = 'self.created_at'
|
||||
abid_uri_src = 'self.path'
|
||||
abid_subtype_src = 'self.mime_type'
|
||||
abid_rand_src = 'self.id'
|
||||
abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
|
||||
abid_drift_allowed: bool = False
|
||||
|
||||
# class File(ABIDModel):
|
||||
# abid_prefix = 'fil_'
|
||||
# abid_ts_src = 'self.created_at'
|
||||
# abid_uri_src = 'self.path'
|
||||
# abid_subtype_src = 'self.mime_type'
|
||||
# abid_rand_src = 'self.id'
|
||||
# abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
|
||||
# abid_drift_allowed: bool = False
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False, null=False)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False, null=False)
|
||||
# abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
created_at = models.DateTimeField(default=timezone.now, null=False)
|
||||
modified_at = models.DateTimeField(default=timezone.now, null=False)
|
||||
created_by = models.ForeignKey(settings.USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
|
||||
# created_at = models.DateTimeField(default=timezone.now, null=False)
|
||||
# modified_at = models.DateTimeField(default=timezone.now, null=False)
|
||||
# created_by = models.ForeignKey(settings.USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
|
||||
|
||||
path = models.FilePathField(path=str(DATA_DIR), recursive=True, allow_files=True, allow_folders=True, db_index=True, unique=True)
|
||||
# path = models.FilePathField(path=str(DATA_DIR), recursive=True, allow_files=True, allow_folders=True, db_index=True, unique=True)
|
||||
|
||||
basename = models.CharField(max_length=255, default=None, null=False) # e.g. 'index'
|
||||
extension = models.CharField(max_length=63, default='', null=False) # e.g. 'html'
|
||||
mime_type = models.CharField(max_length=63, default=None, null=False, db_index=True) # e.g. 'inode/directory' or 'text/html'
|
||||
num_subpaths = models.IntegerField(default=None, null=False) # e.g. 3
|
||||
num_bytes = models.IntegerField(default=None, null=False) # e.g. 123456
|
||||
# basename = models.CharField(max_length=255, default=None, null=False) # e.g. 'index'
|
||||
# extension = models.CharField(max_length=63, default='', null=False) # e.g. 'html'
|
||||
# mime_type = models.CharField(max_length=63, default=None, null=False, db_index=True) # e.g. 'inode/directory' or 'text/html'
|
||||
# num_subpaths = models.IntegerField(default=None, null=False) # e.g. 3
|
||||
# num_bytes = models.IntegerField(default=None, null=False) # e.g. 123456
|
||||
|
||||
hash_sha256 = models.CharField(max_length=64, default=None, null=False, db_index=True) # e.g. '5994471abb01112afcc1815994471abb01112afcc1815994471abb01112afcc181'
|
||||
# hash_blake3 = models.CharField(max_length=64, default=None, null=False, db_index=True) # e.g. '5994471abb01112afcc1815994471abb01112afcc1815994471abb01112afcc181'
|
||||
# hash_sha256 = models.CharField(max_length=64, default=None, null=False, db_index=True) # e.g. '5994471abb01112afcc1815994471abb01112afcc1815994471abb01112afcc181'
|
||||
# # hash_blake3 = models.CharField(max_length=64, default=None, null=False, db_index=True) # e.g. '5994471abb01112afcc1815994471abb01112afcc1815994471abb01112afcc181'
|
||||
|
||||
DIR = 'inode/directory'
|
||||
# DIR = 'inode/directory'
|
||||
|
||||
|
||||
@property
|
||||
def parent(self) -> 'File':
|
||||
return File.objects.get(path=self.path.parent) or File(path=self.path.parent)
|
||||
# @property
|
||||
# def parent(self) -> 'File':
|
||||
# return File.objects.get(path=self.path.parent) or File(path=self.path.parent)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
assert self.path.exists()
|
||||
# def save(self, *args, **kwargs):
|
||||
# assert self.path.exists()
|
||||
|
||||
if self.path.is_dir():
|
||||
self.basename = self.path.name
|
||||
self.extension = ''
|
||||
self.mime_type = self.DIR
|
||||
dir_info = get_dir_info(self.path)
|
||||
self.num_subpaths = dir_info['.']['num_subpaths']
|
||||
self.num_bytes = dir_info['.']['num_bytes']
|
||||
self.hash_sha256 = dir_info['.']['hash_sha256']
|
||||
# TODO: hash_blake3 = dir_info['.']['hash_blake3']
|
||||
else:
|
||||
self.basename = self.path.name
|
||||
self.extension = self.path.suffix
|
||||
self.mime_type = mimetypes.guess_type(self.path)[0]
|
||||
self.num_bytes = self.path.stat().st_size
|
||||
self.hash_sha256, self.hash_blake3 = hash_file(self.path)
|
||||
super().save(*args, **kwargs)
|
||||
# if self.path.is_dir():
|
||||
# self.basename = self.path.name
|
||||
# self.extension = ''
|
||||
# self.mime_type = self.DIR
|
||||
# dir_info = get_dir_info(self.path)
|
||||
# self.num_subpaths = dir_info['.']['num_subpaths']
|
||||
# self.num_bytes = dir_info['.']['num_bytes']
|
||||
# self.hash_sha256 = dir_info['.']['hash_sha256']
|
||||
# # TODO: hash_blake3 = dir_info['.']['hash_blake3']
|
||||
# else:
|
||||
# self.basename = self.path.name
|
||||
# self.extension = self.path.suffix
|
||||
# self.mime_type = mimetypes.guess_type(self.path)[0]
|
||||
# self.num_bytes = self.path.stat().st_size
|
||||
# self.hash_sha256, self.hash_blake3 = hash_file(self.path)
|
||||
# super().save(*args, **kwargs)
|
||||
|
||||
|
|
|
@ -221,3 +221,53 @@ if __name__ == '__main__':
|
|||
dir_info = get_dir_info(Path('.'), max_depth=6)
|
||||
with open('.hashes.json', 'w') as f:
|
||||
json.dump(dir_info, f, indent=4)
|
||||
print('√ Wrote .hashes.json')
|
||||
|
||||
# Example output:
|
||||
# {
|
||||
# ".": {
|
||||
# "basename": "misc",
|
||||
# "mime_type": "inode/directory",
|
||||
# "extension": "",
|
||||
# "num_subpaths": 25,
|
||||
# "num_bytes": 214677,
|
||||
# "hash_sha256": "addfacf88b2ff6b564846415fb7b21dcb7e63ee4e911bc0aec255ee354958530",
|
||||
# "hash_blake3": "3403a1f876453c7749f17ee3502769eff05cff20b5d6c2f2cf458e6353a380db",
|
||||
# "created_at": "2024-12-04T00:08:38.537449",
|
||||
# "modified_at": "2024-12-04T00:08:38.537449"
|
||||
# },
|
||||
# "__init__.py": {
|
||||
# "basename": "__init__",
|
||||
# "mime_type": "text/x-python",
|
||||
# "extension": ".py",
|
||||
# "num_subpaths": null,
|
||||
# "num_bytes": 32,
|
||||
# "hash_sha256": "b0e5e7ff17db3b60535cf664282787767c336e3e203a43e21b6326c6fe457551",
|
||||
# "hash_blake3": "4a801eb2a4cdde8d3422be1e2074b78574a5890afb3027cbe6f3b3cf4d113fd1",
|
||||
# "created_at": "2024-10-08T00:51:41.001359",
|
||||
# "modified_at": "2024-10-08T00:51:41.001359"
|
||||
# },
|
||||
# "__pycache__/": {
|
||||
# "basename": "__pycache__",
|
||||
# "mime_type": "inode/directory",
|
||||
# "extension": "",
|
||||
# "num_subpaths": 8,
|
||||
# "num_bytes": 107593,
|
||||
# "hash_sha256": "9e917a438be774ffc7ea9125de71008c29a7d9003b6f5e09e2085aa1ef3157b3",
|
||||
# "hash_blake3": "e87184485bd67bd9b723a9ee4d472e8c1d24a4388d373046a27e5a1e10467a06",
|
||||
# "created_at": "2024-12-04T00:00:16.149390",
|
||||
# "modified_at": "2024-12-04T00:00:16.149390"
|
||||
# },
|
||||
# "__pycache__/__init__.cpython-313.pyc": {
|
||||
# "basename": "__init__.cpython-313",
|
||||
# "mime_type": "application/x-python-code",
|
||||
# "extension": ".pyc",
|
||||
# "num_subpaths": null,
|
||||
# "num_bytes": 223,
|
||||
# "hash_sha256": "d29e3ee5e6b9b564422d9ef2c7325d28cf759b9fb868f59551ba43cd991d51be",
|
||||
# "hash_blake3": "279a6dc4c8161d6ddb18fa72c882f375324ed152dc6c7c7eac9ef5fdd066f2fd",
|
||||
# "created_at": "2024-12-03T03:13:43.257430",
|
||||
# "modified_at": "2024-12-03T03:13:43.257308"
|
||||
# },
|
||||
# ...
|
||||
# }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue