mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
rename archivebox-info to archivebox-status
This commit is contained in:
parent
5c2bbe7efe
commit
b9e17fa0d1
5 changed files with 76 additions and 37 deletions
|
@ -1,30 +1,30 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
__package__ = 'archivebox.cli'
|
__package__ = 'archivebox.cli'
|
||||||
__command__ = 'archivebox info'
|
__command__ = 'archivebox status'
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from typing import Optional, List, IO
|
from typing import Optional, List, IO
|
||||||
|
|
||||||
from ..main import info, docstring
|
from ..main import status, docstring
|
||||||
from ..config import OUTPUT_DIR
|
from ..config import OUTPUT_DIR
|
||||||
from .logging import SmartFormatter, reject_stdin
|
from .logging import SmartFormatter, reject_stdin
|
||||||
|
|
||||||
|
|
||||||
@docstring(info.__doc__)
|
@docstring(status.__doc__)
|
||||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog=__command__,
|
prog=__command__,
|
||||||
description=info.__doc__,
|
description=status.__doc__,
|
||||||
add_help=True,
|
add_help=True,
|
||||||
formatter_class=SmartFormatter,
|
formatter_class=SmartFormatter,
|
||||||
)
|
)
|
||||||
parser.parse_args(args or ())
|
parser.parse_args(args or ())
|
||||||
reject_stdin(__command__, stdin)
|
reject_stdin(__command__, stdin)
|
||||||
|
|
||||||
info(out_dir=pwd or OUTPUT_DIR)
|
status(out_dir=pwd or OUTPUT_DIR)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
|
@ -785,14 +785,14 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
|
||||||
|
|
||||||
json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
|
json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
|
||||||
if not json_index_exists:
|
if not json_index_exists:
|
||||||
stderr('[X] No archive main index was found in current directory.', color='red')
|
stderr('[X] No archivebox index found in the current directory.', color='red')
|
||||||
stderr(f' {output_dir}')
|
stderr(f' {output_dir}', color='lightyellow')
|
||||||
stderr()
|
stderr()
|
||||||
stderr(' Are you running archivebox in the right folder?')
|
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
||||||
stderr(' cd path/to/your/archive/folder')
|
stderr(' cd path/to/your/archive/folder')
|
||||||
stderr(' archivebox [command]')
|
stderr(' archivebox [command]')
|
||||||
stderr()
|
stderr()
|
||||||
stderr(' To create a new archive collection or import existing data in this folder, run:')
|
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
||||||
stderr(' archivebox init')
|
stderr(' archivebox init')
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
|
|
@ -12,13 +12,13 @@ class Snapshot(models.Model):
|
||||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||||
|
|
||||||
url = models.URLField(unique=True)
|
url = models.URLField(unique=True)
|
||||||
timestamp = models.CharField(max_length=32, null=True, default=None)
|
timestamp = models.CharField(max_length=32, null=True, default=None, db_index=True)
|
||||||
|
|
||||||
title = models.CharField(max_length=128, null=True, default=None)
|
title = models.CharField(max_length=128, null=True, default=None, db_index=True)
|
||||||
tags = models.CharField(max_length=256, null=True, default=None)
|
tags = models.CharField(max_length=256, null=True, default=None, db_index=True)
|
||||||
|
|
||||||
added = models.DateTimeField(auto_now_add=True)
|
added = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||||
updated = models.DateTimeField(null=True, default=None)
|
updated = models.DateTimeField(null=True, default=None, db_index=True)
|
||||||
# bookmarked = models.DateTimeField()
|
# bookmarked = models.DateTimeField()
|
||||||
|
|
||||||
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
@ -68,3 +68,11 @@ class Snapshot(models.Model):
|
||||||
@property
|
@property
|
||||||
def link_dir(self):
|
def link_dir(self):
|
||||||
return self.as_link().link_dir
|
return self.as_link().link_dir
|
||||||
|
|
||||||
|
@property
|
||||||
|
def archive_path(self):
|
||||||
|
return self.as_link().archive_path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def archive_size(self):
|
||||||
|
return self.as_link().archive_size
|
||||||
|
|
|
@ -9,6 +9,8 @@ from typing import List, Dict, Any, Optional, Union
|
||||||
from dataclasses import dataclass, asdict, field, fields
|
from dataclasses import dataclass, asdict, field, fields
|
||||||
|
|
||||||
|
|
||||||
|
from ..system import get_dir_size
|
||||||
|
|
||||||
class ArchiveError(Exception):
|
class ArchiveError(Exception):
|
||||||
def __init__(self, message, hints=None):
|
def __init__(self, message, hints=None):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
@ -227,6 +229,13 @@ class Link:
|
||||||
from ..config import ARCHIVE_DIR_NAME
|
from ..config import ARCHIVE_DIR_NAME
|
||||||
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def archive_size(self) -> float:
|
||||||
|
try:
|
||||||
|
return get_dir_size(self.archive_path)[0]
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
### URL Helpers
|
### URL Helpers
|
||||||
@property
|
@property
|
||||||
def url_hash(self):
|
def url_hash(self):
|
||||||
|
|
|
@ -89,6 +89,7 @@ from .config import (
|
||||||
get_real_name,
|
get_real_name,
|
||||||
)
|
)
|
||||||
from .cli.logging import (
|
from .cli.logging import (
|
||||||
|
TERM_WIDTH,
|
||||||
TimedProgress,
|
TimedProgress,
|
||||||
log_archiving_started,
|
log_archiving_started,
|
||||||
log_archiving_paused,
|
log_archiving_paused,
|
||||||
|
@ -161,7 +162,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
|
||||||
{lightred}Example Use:{reset}
|
{lightred}Example Use:{reset}
|
||||||
mkdir my-archive; cd my-archive/
|
mkdir my-archive; cd my-archive/
|
||||||
archivebox init
|
archivebox init
|
||||||
archivebox info
|
archivebox status
|
||||||
|
|
||||||
archivebox add https://example.com/some/page
|
archivebox add https://example.com/some/page
|
||||||
archivebox add --depth=1 ~/Downloads/bookmarks_export.html
|
archivebox add --depth=1 ~/Downloads/bookmarks_export.html
|
||||||
|
@ -364,7 +365,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
|
||||||
print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
|
print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
|
||||||
print()
|
print()
|
||||||
print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
|
print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
|
||||||
print(' archivebox info')
|
print(' archivebox status')
|
||||||
print(' archivebox list --status=invalid')
|
print(' archivebox list --status=invalid')
|
||||||
|
|
||||||
|
|
||||||
|
@ -387,16 +388,20 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def info(out_dir: str=OUTPUT_DIR) -> None:
|
def status(out_dir: str=OUTPUT_DIR) -> None:
|
||||||
"""Print out some info and statistics about the archive collection"""
|
"""Print out some info and statistics about the archive collection"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(out_dir=out_dir)
|
||||||
|
|
||||||
print('{green}[*] Scanning archive collection main index...{reset}'.format(**ANSI))
|
from core.models import Snapshot
|
||||||
print(f' {out_dir}/*')
|
from django.contrib.auth import get_user_model
|
||||||
|
User = get_user_model()
|
||||||
|
|
||||||
|
print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
|
||||||
|
print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset'])
|
||||||
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
|
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
|
||||||
size = printable_filesize(num_bytes)
|
size = printable_filesize(num_bytes)
|
||||||
print(f' Size: {size} across {num_files} files')
|
print(f' Index size: {size} across {num_files} files')
|
||||||
print()
|
print()
|
||||||
|
|
||||||
links = list(load_main_index(out_dir=out_dir))
|
links = list(load_main_index(out_dir=out_dir))
|
||||||
|
@ -404,33 +409,23 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
|
||||||
num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir))
|
num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir))
|
||||||
num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir))
|
num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir))
|
||||||
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
|
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
|
||||||
users = get_admins().values_list('username', flat=True)
|
|
||||||
print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})')
|
print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})')
|
||||||
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
|
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
|
||||||
print(f' > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})')
|
print(f' > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})')
|
||||||
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
|
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
|
||||||
|
|
||||||
print(f' > Admin: {len(users)} users {", ".join(users)}'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
|
|
||||||
|
|
||||||
if num_html_links != len(links) or num_sql_links != len(links):
|
if num_html_links != len(links) or num_sql_links != len(links):
|
||||||
print()
|
print()
|
||||||
print(' {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI))
|
print(' {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI))
|
||||||
print(' archivebox init')
|
print(' archivebox init')
|
||||||
|
|
||||||
if not users:
|
|
||||||
print()
|
|
||||||
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
|
|
||||||
print(' archivebox manage createsuperuser')
|
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print('{green}[*] Scanning archive collection link data directories...{reset}'.format(**ANSI))
|
print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
|
||||||
print(f' {ARCHIVE_DIR}/*')
|
print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
|
||||||
|
|
||||||
num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
|
num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
|
||||||
size = printable_filesize(num_bytes)
|
size = printable_filesize(num_bytes)
|
||||||
print(f' Size: {size} across {num_files} files in {num_dirs} directories')
|
print(f' Size: {size} across {num_files} files in {num_dirs} directories')
|
||||||
print()
|
print(ANSI['black'])
|
||||||
|
|
||||||
num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
|
num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
|
||||||
num_archived = len(get_archived_folders(links, out_dir=out_dir))
|
num_archived = len(get_archived_folders(links, out_dir=out_dir))
|
||||||
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
|
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
|
||||||
|
@ -455,22 +450,49 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
|
||||||
print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
|
print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
|
||||||
print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
|
print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
|
||||||
|
|
||||||
|
print(ANSI['reset'])
|
||||||
|
|
||||||
if num_indexed:
|
if num_indexed:
|
||||||
print()
|
|
||||||
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
|
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
|
||||||
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
|
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
|
||||||
|
|
||||||
if orphaned:
|
if orphaned:
|
||||||
print()
|
|
||||||
print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
|
print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
|
||||||
print(' archivebox init')
|
print(' archivebox init')
|
||||||
|
|
||||||
if num_invalid:
|
if num_invalid:
|
||||||
print()
|
|
||||||
print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
|
print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
|
||||||
print(' archivebox init')
|
print(' archivebox init')
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI))
|
||||||
|
print(ANSI['lightyellow'], f' {LOGS_DIR}/*', ANSI['reset'])
|
||||||
|
users = get_admins().values_list('username', flat=True)
|
||||||
|
print(f' UI users {len(users)}: {", ".join(users)}')
|
||||||
|
last_login = User.objects.order_by('last_login').last()
|
||||||
|
print(f' Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
|
||||||
|
last_updated = Snapshot.objects.order_by('updated').last()
|
||||||
|
print(f' Last changed: {str(last_updated.updated)[:16]}')
|
||||||
|
|
||||||
|
if not users:
|
||||||
|
print()
|
||||||
|
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
|
||||||
|
print(' archivebox manage createsuperuser')
|
||||||
|
|
||||||
|
print()
|
||||||
|
for snapshot in Snapshot.objects.order_by('-updated')[:10]:
|
||||||
|
if not snapshot.updated:
|
||||||
|
continue
|
||||||
|
print(
|
||||||
|
ANSI['black'],
|
||||||
|
(
|
||||||
|
f' > {str(snapshot.updated)[:16]} '
|
||||||
|
f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
|
||||||
|
f'"{snapshot.title}": {snapshot.url}'
|
||||||
|
)[:TERM_WIDTH()],
|
||||||
|
ANSI['reset'],
|
||||||
|
)
|
||||||
|
print(ANSI['black'], ' ...', ANSI['reset'])
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue