rename archivebox-info to archivebox-status

This commit is contained in:
Nick Sweeting 2020-06-25 23:32:01 -04:00
parent 5c2bbe7efe
commit b9e17fa0d1
5 changed files with 76 additions and 37 deletions

View file

@ -1,30 +1,30 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
__package__ = 'archivebox.cli' __package__ = 'archivebox.cli'
__command__ = 'archivebox info' __command__ = 'archivebox status'
import sys import sys
import argparse import argparse
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import info, docstring from ..main import status, docstring
from ..config import OUTPUT_DIR from ..config import OUTPUT_DIR
from .logging import SmartFormatter, reject_stdin from .logging import SmartFormatter, reject_stdin
@docstring(info.__doc__) @docstring(status.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog=__command__, prog=__command__,
description=info.__doc__, description=status.__doc__,
add_help=True, add_help=True,
formatter_class=SmartFormatter, formatter_class=SmartFormatter,
) )
parser.parse_args(args or ()) parser.parse_args(args or ())
reject_stdin(__command__, stdin) reject_stdin(__command__, stdin)
info(out_dir=pwd or OUTPUT_DIR) status(out_dir=pwd or OUTPUT_DIR)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -785,14 +785,14 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME)) json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
if not json_index_exists: if not json_index_exists:
stderr('[X] No archive main index was found in current directory.', color='red') stderr('[X] No archivebox index found in the current directory.', color='red')
stderr(f' {output_dir}') stderr(f' {output_dir}', color='lightyellow')
stderr() stderr()
stderr(' Are you running archivebox in the right folder?') stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
stderr(' cd path/to/your/archive/folder') stderr(' cd path/to/your/archive/folder')
stderr(' archivebox [command]') stderr(' archivebox [command]')
stderr() stderr()
stderr(' To create a new archive collection or import existing data in this folder, run:') stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
stderr(' archivebox init') stderr(' archivebox init')
raise SystemExit(2) raise SystemExit(2)

View file

@ -12,13 +12,13 @@ class Snapshot(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
url = models.URLField(unique=True) url = models.URLField(unique=True)
timestamp = models.CharField(max_length=32, null=True, default=None) timestamp = models.CharField(max_length=32, null=True, default=None, db_index=True)
title = models.CharField(max_length=128, null=True, default=None) title = models.CharField(max_length=128, null=True, default=None, db_index=True)
tags = models.CharField(max_length=256, null=True, default=None) tags = models.CharField(max_length=256, null=True, default=None, db_index=True)
added = models.DateTimeField(auto_now_add=True) added = models.DateTimeField(auto_now_add=True, db_index=True)
updated = models.DateTimeField(null=True, default=None) updated = models.DateTimeField(null=True, default=None, db_index=True)
# bookmarked = models.DateTimeField() # bookmarked = models.DateTimeField()
keys = ('url', 'timestamp', 'title', 'tags', 'updated') keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@ -68,3 +68,11 @@ class Snapshot(models.Model):
@property @property
def link_dir(self): def link_dir(self):
return self.as_link().link_dir return self.as_link().link_dir
@property
def archive_path(self):
return self.as_link().archive_path
@property
def archive_size(self):
return self.as_link().archive_size

View file

@ -9,6 +9,8 @@ from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass, asdict, field, fields from dataclasses import dataclass, asdict, field, fields
from ..system import get_dir_size
class ArchiveError(Exception): class ArchiveError(Exception):
def __init__(self, message, hints=None): def __init__(self, message, hints=None):
super().__init__(message) super().__init__(message)
@ -227,6 +229,13 @@ class Link:
from ..config import ARCHIVE_DIR_NAME from ..config import ARCHIVE_DIR_NAME
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp) return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
@property
def archive_size(self) -> float:
try:
return get_dir_size(self.archive_path)[0]
except Exception:
return 0
### URL Helpers ### URL Helpers
@property @property
def url_hash(self): def url_hash(self):

View file

@ -89,6 +89,7 @@ from .config import (
get_real_name, get_real_name,
) )
from .cli.logging import ( from .cli.logging import (
TERM_WIDTH,
TimedProgress, TimedProgress,
log_archiving_started, log_archiving_started,
log_archiving_paused, log_archiving_paused,
@ -161,7 +162,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
{lightred}Example Use:{reset} {lightred}Example Use:{reset}
mkdir my-archive; cd my-archive/ mkdir my-archive; cd my-archive/
archivebox init archivebox init
archivebox info archivebox status
archivebox add https://example.com/some/page archivebox add https://example.com/some/page
archivebox add --depth=1 ~/Downloads/bookmarks_export.html archivebox add --depth=1 ~/Downloads/bookmarks_export.html
@ -364,7 +365,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items())) print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
print() print()
print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI)) print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
print(' archivebox info') print(' archivebox status')
print(' archivebox list --status=invalid') print(' archivebox list --status=invalid')
@ -387,16 +388,20 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
@enforce_types @enforce_types
def info(out_dir: str=OUTPUT_DIR) -> None: def status(out_dir: str=OUTPUT_DIR) -> None:
"""Print out some info and statistics about the archive collection""" """Print out some info and statistics about the archive collection"""
check_data_folder(out_dir=out_dir) check_data_folder(out_dir=out_dir)
print('{green}[*] Scanning archive collection main index...{reset}'.format(**ANSI)) from core.models import Snapshot
print(f' {out_dir}/*') from django.contrib.auth import get_user_model
User = get_user_model()
print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.') num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
size = printable_filesize(num_bytes) size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files') print(f' Index size: {size} across {num_files} files')
print() print()
links = list(load_main_index(out_dir=out_dir)) links = list(load_main_index(out_dir=out_dir))
@ -404,33 +409,23 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir)) num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir))
num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir)) num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir))
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir)) num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
users = get_admins().values_list('username', flat=True)
print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})') print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})')
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
print(f' > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})') print(f' > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)') print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
print(f' > Admin: {len(users)} users {", ".join(users)}'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
if num_html_links != len(links) or num_sql_links != len(links): if num_html_links != len(links) or num_sql_links != len(links):
print() print()
print(' {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI)) print(' {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI))
print(' archivebox init') print(' archivebox init')
if not users:
print()
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
print(' archivebox manage createsuperuser')
print() print()
print('{green}[*] Scanning archive collection link data directories...{reset}'.format(**ANSI)) print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
print(f' {ARCHIVE_DIR}/*') print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR) num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
size = printable_filesize(num_bytes) size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files in {num_dirs} directories') print(f' Size: {size} across {num_files} files in {num_dirs} directories')
print() print(ANSI['black'])
num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
num_archived = len(get_archived_folders(links, out_dir=out_dir)) num_archived = len(get_archived_folders(links, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
@ -454,23 +449,50 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})') print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})') print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})') print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
print(ANSI['reset'])
if num_indexed: if num_indexed:
print()
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI)) print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)') print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
if orphaned: if orphaned:
print()
print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI)) print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
print(' archivebox init') print(' archivebox init')
if num_invalid: if num_invalid:
print()
print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI)) print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
print(' archivebox init') print(' archivebox init')
print() print()
print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {LOGS_DIR}/*', ANSI['reset'])
users = get_admins().values_list('username', flat=True)
print(f' UI users {len(users)}: {", ".join(users)}')
last_login = User.objects.order_by('last_login').last()
print(f' Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
last_updated = Snapshot.objects.order_by('updated').last()
print(f' Last changed: {str(last_updated.updated)[:16]}')
if not users:
print()
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
print(' archivebox manage createsuperuser')
print()
for snapshot in Snapshot.objects.order_by('-updated')[:10]:
if not snapshot.updated:
continue
print(
ANSI['black'],
(
f' > {str(snapshot.updated)[:16]} '
f'[{snapshot.num_outputs} {("X", "")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
f'"{snapshot.title}": {snapshot.url}'
)[:TERM_WIDTH()],
ANSI['reset'],
)
print(ANSI['black'], ' ...', ANSI['reset'])
@enforce_types @enforce_types