diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 3710ace4..4e60807f 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -386,10 +386,115 @@ class ModelWithHealthStats(models.Model): +class ModelWithOutputDir(ABIDModel): + class Meta: + abstract = True + + # output_dir = models.FilePathField(path=CONSTANTS.DATA_DIR, max_length=200, blank=True, null=True) + # output_files = models.JSONField(default=dict) + def save(self, *args, write_indexes=False, **kwargs) -> None: + super().save(*args, **kwargs) + if write_indexes: + self.write_indexes() + @property + def output_dir_type(self) -> str: + """Get the model type parent directory name that holds this object's data e.g. 'archiveresults'""" + parent_dir = getattr(self, 'output_dir_parent', self._meta.model_name) + assert parent_dir + return f'{parent_dir}s' # e.g. archiveresults + + @property + def output_dir_name(self) -> str: + """Get the subdirectory name for the filesystem directory that holds this object's data e.g. 'snp_2342353k2jn3j32l4324'""" + assert self.ABID + return str(self.ABID) # e.g. snp_2342353k2jn3j32l4324 + + @property + def output_dir_str(self) -> str: + """Get relateive the filesystem directory Path that holds that data for this object e.g. 'snapshots/snp_2342353k2jn3j32l4324'""" + return f'{self.output_dir_type}/{self.output_dir_name}' # e.g. snapshots/snp_2342353k2jn3j32l4324 + + @property + def OUTPUT_DIR(self) -> Path: + """Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')""" + from archivebox import DATA_DIR + return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324 + + def write_indexes(self): + """Write the Snapshot json, html, and merkle indexes to its output dir""" + print(f'{self}.write_indexes()') + self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + self.migrate_output_dir() + self.save_merkle_index() + self.save_html_index() + self.save_json_index() + self.save_symlinks_index() + + def migrate_output_dir(self): + """Move the output files to the new folder structure if needed""" + print(f'{self}.migrate_output_dir()') + self.migrate_from_0_7_2() + self.migrate_from_0_8_6() + # ... future migrations here + + def migrate_from_0_7_2(self) -> None: + """Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version""" + print(f'{self}.migrate_from_0_7_2()') + # move /data/archive/ -> /data/archive/snapshots/ + # update self.output_path = /data/archive/snapshots/ + pass + + def migrate_from_0_8_6(self) -> None: + """Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version""" + # ... future migration code here ... + print(f'{self}.migrate_from_0_8_6()') + pass + def save_merkle_index(self, **kwargs) -> None: + """Write the ./.index.merkle file to the output dir""" + # write self.generate_merkle_tree() to self.output_dir / '.index.merkle' + print(f'{self}.save_merkle_index()') + pass + + def save_html_index(self, **kwargs) -> None: + # write self.as_html() to self.output_dir / 'index.html' + print(f'{self}.save_html_index()') + pass + + def save_json_index(self, **kwargs) -> None: + print(f'{self}.save_json_index()') + # write self.as_json() to self.output_dir / 'index.json' + pass + + def save_symlinks_index(self) -> None: + print(f'{self}.save_symlinks_index()') + # ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/ + # ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/ + # ln -s self.output_dir data/archive/1453452234234.21445 + pass + def as_json(self) -> dict: + """Get the object's properties as a dict""" + # dump the object's properties to a json-ready dict + return { + 'TYPE': self.TYPE, + 'id': self.id, + 'abid': str(self.ABID), + 'str': str(self), + 'modified_at': self.modified_at, + 'created_at': self.created_at, + 'created_by_id': self.created_by_id, + 'status': getattr(self, 'status', None), + 'retry_at': getattr(self, 'retry_at', None), + 'notes': getattr(self, 'notes', None), + } + + def as_html(self) -> str: + """Get the object's properties as a html string""" + # render snapshot_detail.html template with self as context and return html string + return '' ####################################################