From 75153252dc2acf6c8d40a6a6f294a07863dbb8de Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 25 Apr 2024 03:56:22 -0700 Subject: [PATCH] big overhaul of REST API, split into auth, core, and cli methods --- archivebox/api/__init__.py | 1 + archivebox/api/apps.py | 2 + archivebox/api/archive.py | 184 ----------------- archivebox/api/auth.py | 127 ++++++++---- archivebox/api/migrations/0001_initial.py | 13 +- archivebox/api/models.py | 64 ++++-- archivebox/api/routes_auth.py | 53 +++++ archivebox/api/routes_cli.py | 236 ++++++++++++++++++++++ archivebox/api/routes_core.py | 210 +++++++++++++++++++ archivebox/api/tests.py | 19 +- archivebox/api/urls.py | 111 ++++++++++ archivebox/config.py | 1 + archivebox/core/admin.py | 2 + archivebox/core/apps.py | 2 + archivebox/core/auth.py | 3 + archivebox/core/urls.py | 15 +- archivebox/main.py | 2 +- archivebox/templates/core/navigation.html | 3 +- archivebox/util.py | 3 +- pyproject.toml | 4 - 20 files changed, 790 insertions(+), 265 deletions(-) delete mode 100644 archivebox/api/archive.py create mode 100644 archivebox/api/routes_auth.py create mode 100644 archivebox/api/routes_cli.py create mode 100644 archivebox/api/routes_core.py create mode 100644 archivebox/api/urls.py diff --git a/archivebox/api/__init__.py b/archivebox/api/__init__.py index e69de29b..fbd4342f 100644 --- a/archivebox/api/__init__.py +++ b/archivebox/api/__init__.py @@ -0,0 +1 @@ +__package__ = 'archivebox.api' diff --git a/archivebox/api/apps.py b/archivebox/api/apps.py index 93127e4a..e64d943a 100644 --- a/archivebox/api/apps.py +++ b/archivebox/api/apps.py @@ -1,3 +1,5 @@ +__package__ = 'archivebox.api' + from django.apps import AppConfig diff --git a/archivebox/api/archive.py b/archivebox/api/archive.py deleted file mode 100644 index 27f07793..00000000 --- a/archivebox/api/archive.py +++ /dev/null @@ -1,184 +0,0 @@ -# archivebox_api.py -from typing import List, Optional -from enum import Enum -from pydantic import BaseModel -from ninja import Router -from main import ( - add, - remove, - update, - list_all, - ONLY_NEW, -) # Assuming these functions are defined in main.py - - -# Schemas - -class StatusChoices(str, Enum): - indexed = 'indexed' - archived = 'archived' - unarchived = 'unarchived' - present = 'present' - valid = 'valid' - invalid = 'invalid' - duplicate = 'duplicate' - orphaned = 'orphaned' - corrupted = 'corrupted' - unrecognized = 'unrecognized' - - -class AddURLSchema(BaseModel): - urls: List[str] - tag: str = "" - depth: int = 0 - update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW - update_all: bool = False - index_only: bool = False - overwrite: bool = False - init: bool = False - extractors: str = "" - parser: str = "auto" - - -class RemoveURLSchema(BaseModel): - yes: bool = False - delete: bool = False - before: Optional[float] = None - after: Optional[float] = None - filter_type: str = "exact" - filter_patterns: Optional[List[str]] = None - - -class UpdateSchema(BaseModel): - resume: Optional[float] = None - only_new: Optional[bool] = None - index_only: Optional[bool] = False - overwrite: Optional[bool] = False - before: Optional[float] = None - after: Optional[float] = None - status: Optional[StatusChoices] = None - filter_type: Optional[str] = 'exact' - filter_patterns: Optional[List[str]] = None - extractors: Optional[str] = "" - - -class ListAllSchema(BaseModel): - filter_patterns: Optional[List[str]] = None - filter_type: str = 'exact' - status: Optional[StatusChoices] = None - after: Optional[float] = None - before: Optional[float] = None - sort: Optional[str] = None - csv: Optional[str] = None - json: bool = False - html: bool = False - with_headers: bool = False - - -# API Router -router = Router() - - -@router.post("/add", response={200: dict}) -def api_add(request, payload: AddURLSchema): - try: - result = add( - urls=payload.urls, - tag=payload.tag, - depth=payload.depth, - update=payload.update, - update_all=payload.update_all, - index_only=payload.index_only, - overwrite=payload.overwrite, - init=payload.init, - extractors=payload.extractors, - parser=payload.parser, - ) - # Currently the add function returns a list of ALL items in the DB, ideally only return new items - return { - "status": "success", - "message": "URLs added successfully.", - "result": str(result), - } - except Exception as e: - # Handle exceptions raised by the add function or during processing - return {"status": "error", "message": str(e)} - - -@router.post("/remove", response={200: dict}) -def api_remove(request, payload: RemoveURLSchema): - try: - result = remove( - yes=payload.yes, - delete=payload.delete, - before=payload.before, - after=payload.after, - filter_type=payload.filter_type, - filter_patterns=payload.filter_patterns, - ) - return { - "status": "success", - "message": "URLs removed successfully.", - "result": result, - } - except Exception as e: - # Handle exceptions raised by the remove function or during processing - return {"status": "error", "message": str(e)} - - -@router.post("/update", response={200: dict}) -def api_update(request, payload: UpdateSchema): - try: - result = update( - resume=payload.resume, - only_new=payload.only_new, - index_only=payload.index_only, - overwrite=payload.overwrite, - before=payload.before, - after=payload.after, - status=payload.status, - filter_type=payload.filter_type, - filter_patterns=payload.filter_patterns, - extractors=payload.extractors, - ) - return { - "status": "success", - "message": "Archive updated successfully.", - "result": result, - } - except Exception as e: - # Handle exceptions raised by the update function or during processing - return {"status": "error", "message": str(e)} - - -@router.post("/list_all", response={200: dict}) -def api_list_all(request, payload: ListAllSchema): - try: - result = list_all( - filter_patterns=payload.filter_patterns, - filter_type=payload.filter_type, - status=payload.status, - after=payload.after, - before=payload.before, - sort=payload.sort, - csv=payload.csv, - json=payload.json, - html=payload.html, - with_headers=payload.with_headers, - ) - # TODO: This is kind of bad, make the format a choice field - if payload.json: - return {"status": "success", "format": "json", "data": result} - elif payload.html: - return {"status": "success", "format": "html", "data": result} - elif payload.csv: - return {"status": "success", "format": "csv", "data": result} - else: - return { - "status": "success", - "message": "List generated successfully.", - "data": result, - } - except Exception as e: - # Handle exceptions raised by the list_all function or during processing - return {"status": "error", "message": str(e)} diff --git a/archivebox/api/auth.py b/archivebox/api/auth.py index 79e55d84..8215bc1c 100644 --- a/archivebox/api/auth.py +++ b/archivebox/api/auth.py @@ -1,48 +1,107 @@ +__package__ = 'archivebox.api' + +from typing import Optional + +from django.http import HttpRequest +from django.contrib.auth import login from django.contrib.auth import authenticate -from ninja import Form, Router, Schema -from ninja.security import HttpBearer +from django.contrib.auth.models import AbstractBaseUser -from api.models import Token - -router = Router() +from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser -class GlobalAuth(HttpBearer): - def authenticate(self, request, token): +def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]: + """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user""" + from api.models import APIToken # lazy import model to avoid loading it at urls.py import time + + user = None + + submitted_empty_form = token in ('string', '', None) + if submitted_empty_form: + user = request.user # see if user is authed via django session and use that as the default + else: try: - return Token.objects.get(token=token).user - except Token.DoesNotExist: + token = APIToken.objects.get(token=token) + if token.is_valid(): + user = token.user + except APIToken.DoesNotExist: pass + if not user: + print('[❌] Failed to authenticate API user using API Key:', request) -class AuthSchema(Schema): - email: str - password: str + return None - -@router.post("/authenticate", auth=None) # overriding global auth -def get_token(request, auth_data: AuthSchema): - user = authenticate(username=auth_data.email, password=auth_data.password) - if user: - # Assuming a user can have multiple tokens and you want to create a new one every time - new_token = Token.objects.create(user=user) - return {"token": new_token.token, "expires": new_token.expiry_as_iso8601} +def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]: + """Given a username and password, check if they are valid and return the corresponding user""" + user = None + + submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None)) + if submitted_empty_form: + user = request.user # see if user is authed via django session and use that as the default else: - return {"error": "Invalid credentials"} + user = authenticate( + username=username, + password=password, + ) + + if not user: + print('[❌] Failed to authenticate API user using API Key:', request) + + return user -class TokenValidationSchema(Schema): - token: str +### Base Auth Types + +class APITokenAuthCheck: + """The base class for authentication methods that use an api.models.APIToken""" + def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]: + user = auth_using_token( + token=key, + request=request, + ) + if user is not None: + login(request, user, backend='django.contrib.auth.backends.ModelBackend') + return user + +class UserPassAuthCheck: + """The base class for authentication methods that use a username & password""" + def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]: + user = auth_using_password( + username=username, + password=password, + request=request, + ) + if user is not None: + login(request, user, backend='django.contrib.auth.backends.ModelBackend') + return user -@router.post("/validate_token", auth=None) # No authentication required for this endpoint -def validate_token(request, token_data: TokenValidationSchema): - try: - # Attempt to authenticate using the provided token - user = GlobalAuth().authenticate(request, token_data.token) - if user: - return {"status": "valid"} - else: - return {"status": "invalid"} - except Token.DoesNotExist: - return {"status": "invalid"} \ No newline at end of file +### Django-Ninja-Provided Auth Methods + +class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth): + """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)""" + pass + +class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery): + """Allow authenticating by passing api_key=xyz as a GET/POST query parameter""" + param_name = "api_key" + +class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader): + """Allow authenticating by passing X-API-Key=xyz as a request header""" + param_name = "X-API-Key" + +class BearerTokenAuth(APITokenAuthCheck, HttpBearer): + """Allow authenticating by passing Bearer=xyz as a request header""" + pass + + +### Enabled Auth Methods + +API_AUTH_METHODS = [ + QueryParamTokenAuth(), + HeaderTokenAuth(), + BearerTokenAuth(), + django_auth_superuser, + UsernameAndPasswordAuth(), +] diff --git a/archivebox/api/migrations/0001_initial.py b/archivebox/api/migrations/0001_initial.py index 5b8ab51d..07ec7f52 100644 --- a/archivebox/api/migrations/0001_initial.py +++ b/archivebox/api/migrations/0001_initial.py @@ -1,9 +1,10 @@ -# Generated by Django 3.1.14 on 2024-04-09 18:52 +# Generated by Django 4.2.11 on 2024-04-25 04:19 import api.models from django.conf import settings from django.db import migrations, models import django.db.models.deletion +import uuid class Migration(migrations.Migration): @@ -16,13 +17,13 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='Token', + name='APIToken', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('token', models.CharField(default=auth.models.hex_uuid, max_length=32, unique=True)), + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)), ('created', models.DateTimeField(auto_now_add=True)), - ('expiry', models.DateTimeField(blank=True, null=True)), - ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tokens', to=settings.AUTH_USER_MODEL)), + ('expires', models.DateTimeField(blank=True, null=True)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), ], ), ] diff --git a/archivebox/api/models.py b/archivebox/api/models.py index b0686aec..84aff120 100644 --- a/archivebox/api/models.py +++ b/archivebox/api/models.py @@ -1,30 +1,62 @@ +__package__ = 'archivebox.api' + import uuid +import secrets from datetime import timedelta from django.conf import settings from django.db import models from django.utils import timezone -from django.utils.translation import gettext_lazy as _ - -def hex_uuid(): - return uuid.uuid4().hex -class Token(models.Model): - user = models.ForeignKey( - settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="tokens" - ) - token = models.CharField(max_length=32, default=hex_uuid, unique=True) + +def generate_secret_token() -> str: + # returns cryptographically secure string with len() == 32 + return secrets.token_hex(16) + + +class APIToken(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + token = models.CharField(max_length=32, default=generate_secret_token, unique=True) + created = models.DateTimeField(auto_now_add=True) - expiry = models.DateTimeField(null=True, blank=True) + expires = models.DateTimeField(null=True, blank=True) + + class Meta: + verbose_name = "API Key" + verbose_name_plural = "API Keys" + + def __str__(self) -> str: + return self.token + + def __repr__(self) -> str: + return f'' + + def __json__(self) -> dict: + return { + "TYPE": "APIToken", + "id": str(self.id), + "user_id": str(self.user.id), + "user_username": self.user.username, + "token": self.token, + "created": self.created.isoformat(), + "expires": self.expires_as_iso8601, + } @property - def expiry_as_iso8601(self): + def expires_as_iso8601(self): """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none.""" - expiry_date = ( - self.expiry if self.expiry else timezone.now() + timedelta(days=365 * 100) - ) + expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100)) + return expiry_date.isoformat() - def __str__(self): - return self.token \ No newline at end of file + def is_valid(self, for_date=None): + for_date = for_date or timezone.now() + + if self.expires and self.expires < for_date: + return False + + return True + diff --git a/archivebox/api/routes_auth.py b/archivebox/api/routes_auth.py new file mode 100644 index 00000000..4a631137 --- /dev/null +++ b/archivebox/api/routes_auth.py @@ -0,0 +1,53 @@ +__package__ = 'archivebox.api' + +from typing import Optional + +from django.contrib.auth import authenticate +from ninja import Router, Schema + +from api.models import APIToken +from api.auth import auth_using_token, auth_using_password + + +router = Router(tags=['Authentication']) + + +class PasswordAuthSchema(Schema): + """Schema for a /get_api_token request""" + username: Optional[str] = None + password: Optional[str] = None + + +@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)') # auth=None because they are not authed yet +def get_api_token(request, auth_data: PasswordAuthSchema): + user = auth_using_password( + username=auth_data.username, + password=auth_data.password, + request=request, + ) + + if user: + # TODO: support multiple tokens in the future, for now we just have one per user + api_token, created = APIToken.objects.get_or_create(user=user) + + return api_token.__json__() + + return {"success": False, "errors": ["Invalid credentials"]} + + + +class TokenAuthSchema(Schema): + """Schema for a /check_api_token request""" + token: str + + +@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired') # auth=None because they are not authed yet +def check_api_token(request, token_data: TokenAuthSchema): + user = auth_using_token( + token=token_data.token, + request=request, + ) + if user: + return {"success": True, "user_id": str(user.id)} + + return {"success": False, "user_id": None} diff --git a/archivebox/api/routes_cli.py b/archivebox/api/routes_cli.py new file mode 100644 index 00000000..4bef5088 --- /dev/null +++ b/archivebox/api/routes_cli.py @@ -0,0 +1,236 @@ +__package__ = 'archivebox.api' + +from typing import List, Dict, Any, Optional +from enum import Enum + +# from pydantic import BaseModel +from archivebox.api.routes_core import paginate +from ninja import Router, Schema + +from ..main import ( + add, + remove, + update, + list_all, + schedule, +) +from ..util import ansi_to_html +from ..config import ONLY_NEW + + +# router for API that exposes archivebox cli subcommands as REST endpoints +router = Router(tags=['ArchiveBox CLI Sub-Commands']) + + +# Schemas + +JSONType = List[Any] | Dict[str, Any] | bool | int | str | None + +class CLICommandResponseSchema(Schema): + success: bool + errors: List[str] + result: JSONType + stdout: str + stderr: str + +class FilterTypeChoices(str, Enum): + exact = 'exact' + substring = 'substring' + regex = 'regex' + domain = 'domain' + tag = 'tag' + timestamp = 'timestamp' + +class StatusChoices(str, Enum): + indexed = 'indexed' + archived = 'archived' + unarchived = 'unarchived' + present = 'present' + valid = 'valid' + invalid = 'invalid' + duplicate = 'duplicate' + orphaned = 'orphaned' + corrupted = 'corrupted' + unrecognized = 'unrecognized' + + +class AddCommandSchema(Schema): + urls: List[str] + tag: str = "" + depth: int = 0 + update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW + update_all: bool = False + index_only: bool = False + overwrite: bool = False + init: bool = False + extractors: str = "" + parser: str = "auto" + +class UpdateCommandSchema(Schema): + resume: Optional[float] = 0 + only_new: bool = ONLY_NEW + index_only: bool = False + overwrite: bool = False + after: Optional[float] = 0 + before: Optional[float] = 999999999999999 + status: Optional[StatusChoices] = StatusChoices.unarchived + filter_type: Optional[str] = FilterTypeChoices.substring + filter_patterns: Optional[List[str]] = ['https://example.com'] + extractors: Optional[str] = "" + +class ScheduleCommandSchema(Schema): + import_path: Optional[str] = None + add: bool = False + every: Optional[str] = None + tag: str = '' + depth: int = 0 + overwrite: bool = False + update: bool = not ONLY_NEW + clear: bool = False + +class ListCommandSchema(Schema): + filter_patterns: Optional[List[str]] = ['https://example.com'] + filter_type: str = FilterTypeChoices.substring + status: Optional[StatusChoices] = StatusChoices.indexed + after: Optional[float] = 0 + before: Optional[float] = 999999999999999 + sort: str = 'added' + as_json: bool = True + as_html: bool = False + as_csv: str | bool = 'timestamp,url' + with_headers: bool = False + +class RemoveCommandSchema(Schema): + delete: bool = True + after: Optional[float] = 0 + before: Optional[float] = 999999999999999 + filter_type: str = FilterTypeChoices.exact + filter_patterns: Optional[List[str]] = ['https://example.com'] + + + + + +@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]') +def cli_add(request, args: AddCommandSchema): + result = add( + urls=args.urls, + tag=args.tag, + depth=args.depth, + update=args.update, + update_all=args.update_all, + index_only=args.index_only, + overwrite=args.overwrite, + init=args.init, + extractors=args.extractors, + parser=args.parser, + ) + + return { + "success": True, + "errors": [], + "result": result, + "stdout": ansi_to_html(request.stdout.getvalue().strip()), + "stderr": ansi_to_html(request.stderr.getvalue().strip()), + } + + +@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]') +def cli_update(request, args: UpdateCommandSchema): + result = update( + resume=args.resume, + only_new=args.only_new, + index_only=args.index_only, + overwrite=args.overwrite, + before=args.before, + after=args.after, + status=args.status, + filter_type=args.filter_type, + filter_patterns=args.filter_patterns, + extractors=args.extractors, + ) + return { + "success": True, + "errors": [], + "result": result, + "stdout": ansi_to_html(request.stdout.getvalue().strip()), + "stderr": ansi_to_html(request.stderr.getvalue().strip()), + } + + +@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]') +def cli_add(request, args: ScheduleCommandSchema): + result = schedule( + import_path=args.import_path, + add=args.add, + show=args.show, + clear=args.clear, + every=args.every, + tag=args.tag, + depth=args.depth, + overwrite=args.overwrite, + update=args.update, + ) + + return { + "success": True, + "errors": [], + "result": result, + "stdout": ansi_to_html(request.stdout.getvalue().strip()), + "stderr": ansi_to_html(request.stderr.getvalue().strip()), + } + + + +@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]') +def cli_list(request, args: ListCommandSchema): + result = list_all( + filter_patterns=args.filter_patterns, + filter_type=args.filter_type, + status=args.status, + after=args.after, + before=args.before, + sort=args.sort, + csv=args.as_csv, + json=args.as_json, + html=args.as_html, + with_headers=args.with_headers, + ) + + result_format = 'txt' + if args.as_json: + result_format = "json" + elif args.as_html: + result_format = "html" + elif args.as_csv: + result_format = "csv" + + return { + "success": True, + "errors": [], + "result": result, + "result_format": result_format, + "stdout": ansi_to_html(request.stdout.getvalue().strip()), + "stderr": ansi_to_html(request.stderr.getvalue().strip()), + } + + + +@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]') +def cli_remove(request, args: RemoveCommandSchema): + result = remove( + yes=True, # no way to interactively ask for confirmation via API, so we force yes + delete=args.delete, + before=args.before, + after=args.after, + filter_type=args.filter_type, + filter_patterns=args.filter_patterns, + ) + return { + "success": True, + "errors": [], + "result": result, + "stdout": ansi_to_html(request.stdout.getvalue().strip()), + "stderr": ansi_to_html(request.stderr.getvalue().strip()), + } + diff --git a/archivebox/api/routes_core.py b/archivebox/api/routes_core.py new file mode 100644 index 00000000..452614f6 --- /dev/null +++ b/archivebox/api/routes_core.py @@ -0,0 +1,210 @@ +__package__ = 'archivebox.api' + +from uuid import UUID +from typing import List, Optional, Union +from datetime import datetime + +from django.shortcuts import get_object_or_404 + +from ninja import Router, Schema, FilterSchema, Field, Query +from ninja.pagination import paginate + +from core.models import Snapshot, ArchiveResult, Tag + + +router = Router(tags=['Core Models']) + + + + +### ArchiveResult ######################################################################### + +class ArchiveResultSchema(Schema): + id: UUID + + snapshot_id: UUID + snapshot_url: str + snapshot_tags: str + + extractor: str + cmd: List[str] + pwd: str + cmd_version: str + output: str + status: str + + created: datetime + + @staticmethod + def resolve_id(obj): + return obj.uuid + + @staticmethod + def resolve_created(obj): + return obj.start_ts + + @staticmethod + def resolve_snapshot_url(obj): + return obj.snapshot.url + + @staticmethod + def resolve_snapshot_tags(obj): + return obj.snapshot.tags_str() + + +class ArchiveResultFilterSchema(FilterSchema): + id: Optional[UUID] = Field(None, q='uuid') + + search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains']) + snapshot_id: Optional[UUID] = Field(None, q='snapshot_id') + snapshot_url: Optional[str] = Field(None, q='snapshot__url') + snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name') + + status: Optional[str] = Field(None, q='status') + output: Optional[str] = Field(None, q='output__icontains') + extractor: Optional[str] = Field(None, q='extractor__icontains') + cmd: Optional[str] = Field(None, q='cmd__0__icontains') + pwd: Optional[str] = Field(None, q='pwd__icontains') + cmd_version: Optional[str] = Field(None, q='cmd_version') + + created: Optional[datetime] = Field(None, q='updated') + created__gte: Optional[datetime] = Field(None, q='updated__gte') + created__lt: Optional[datetime] = Field(None, q='updated__lt') + + +@router.get("/archiveresults", response=List[ArchiveResultSchema]) +@paginate +def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)): + qs = ArchiveResult.objects.all() + results = filters.filter(qs) + return results + + +@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema) +def get_archiveresult(request, archiveresult_id: str): + archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id) + return archiveresult + + +# @router.post("/archiveresult", response=ArchiveResultSchema) +# def create_archiveresult(request, payload: ArchiveResultSchema): +# archiveresult = ArchiveResult.objects.create(**payload.dict()) +# return archiveresult +# +# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema) +# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema): +# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id) +# +# for attr, value in payload.dict().items(): +# setattr(archiveresult, attr, value) +# archiveresult.save() +# +# return archiveresult +# +# @router.delete("/archiveresult/{archiveresult_id}") +# def delete_archiveresult(request, archiveresult_id: str): +# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id) +# archiveresult.delete() +# return {"success": True} + + + + + +### Snapshot ######################################################################### + + +class SnapshotSchema(Schema): + id: UUID + + url: str + tags: str + title: Optional[str] + timestamp: str + bookmarked: datetime + added: datetime + updated: datetime + archive_path: str + + archiveresults: List[ArchiveResultSchema] + + # @staticmethod + # def resolve_id(obj): + # return str(obj.id) + + @staticmethod + def resolve_tags(obj): + return obj.tags_str() + + @staticmethod + def resolve_archiveresults(obj, context): + if context['request'].with_archiveresults: + return obj.archiveresult_set.all().distinct() + return ArchiveResult.objects.none() + + +class SnapshotFilterSchema(FilterSchema): + id: Optional[UUID] = Field(None, q='id') + + search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains']) + url: Optional[str] = Field(None, q='url') + tag: Optional[str] = Field(None, q='tags__name') + title: Optional[str] = Field(None, q='title__icontains') + + timestamp: Optional[str] = Field(None, q='timestamp__startswith') + + added: Optional[datetime] = Field(None, q='added') + added__gte: Optional[datetime] = Field(None, q='added__gte') + added__lt: Optional[datetime] = Field(None, q='added__lt') + + +@router.get("/snapshots", response=List[SnapshotSchema]) +@paginate +def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True): + request.with_archiveresults = with_archiveresults + + qs = Snapshot.objects.all() + results = filters.filter(qs) + return results + +@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema) +def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): + request.with_archiveresults = with_archiveresults + snapshot = get_object_or_404(Snapshot, id=snapshot_id) + return snapshot + + +# @router.post("/snapshot", response=SnapshotSchema) +# def create_snapshot(request, payload: SnapshotSchema): +# snapshot = Snapshot.objects.create(**payload.dict()) +# return snapshot +# +# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema) +# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema): +# snapshot = get_object_or_404(Snapshot, id=snapshot_id) +# +# for attr, value in payload.dict().items(): +# setattr(snapshot, attr, value) +# snapshot.save() +# +# return snapshot +# +# @router.delete("/snapshot/{snapshot_id}") +# def delete_snapshot(request, snapshot_id: str): +# snapshot = get_object_or_404(Snapshot, id=snapshot_id) +# snapshot.delete() +# return {"success": True} + + + +### Tag ######################################################################### + + +class TagSchema(Schema): + name: str + slug: str + + +@router.get("/tags", response=List[TagSchema]) +def list_tags(request): + return Tag.objects.all() diff --git a/archivebox/api/tests.py b/archivebox/api/tests.py index 8b8b2b16..e6e8cce6 100644 --- a/archivebox/api/tests.py +++ b/archivebox/api/tests.py @@ -1,27 +1,30 @@ +__package__ = 'archivebox.api' + from django.test import TestCase from ninja.testing import TestClient -from archivebox.api.archive import router as archive_router -class ArchiveBoxAPITestCase(TestCase): +from .routes_cli import router + +class ArchiveBoxCLIAPITestCase(TestCase): def setUp(self): - self.client = TestClient(archive_router) + self.client = TestClient(router) def test_add_endpoint(self): - response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "test"}) + response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"}) self.assertEqual(response.status_code, 200) - self.assertEqual(response.json()["status"], "success") + self.assertTrue(response.json()["success"]) def test_remove_endpoint(self): response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]}) self.assertEqual(response.status_code, 200) - self.assertEqual(response.json()["status"], "success") + self.assertTrue(response.json()["success"]) def test_update_endpoint(self): response = self.client.post("/update", json={}) self.assertEqual(response.status_code, 200) - self.assertEqual(response.json()["status"], "success") + self.assertTrue(response.json()["success"]) def test_list_all_endpoint(self): response = self.client.post("/list_all", json={}) self.assertEqual(response.status_code, 200) - self.assertTrue("success" in response.json()["status"]) \ No newline at end of file + self.assertTrue(response.json()["success"]) diff --git a/archivebox/api/urls.py b/archivebox/api/urls.py new file mode 100644 index 00000000..5d3877e9 --- /dev/null +++ b/archivebox/api/urls.py @@ -0,0 +1,111 @@ +__package__ = 'archivebox.api' + +# import orjson + +from io import StringIO +from traceback import format_exception +from contextlib import redirect_stdout, redirect_stderr + +from django.urls import path +from django.http import HttpRequest, HttpResponse +from django.views.generic.base import RedirectView +from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied + +from ninja import NinjaAPI, Swagger + +# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/ + +from api.auth import API_AUTH_METHODS +from ..config import VERSION, COMMIT_HASH + +# from ninja.renderers import BaseRenderer + +# class ORJSONRenderer(BaseRenderer): +# media_type = "application/json" + +# def render(self, request, data, *, response_status): +# return { +# "success": True, +# "errors": [], +# "result": data, +# "stdout": ansi_to_html(stdout.getvalue().strip()), +# "stderr": ansi_to_html(stderr.getvalue().strip()), +# } +# return orjson.dumps(data) + + +class NinjaAPIWithIOCapture(NinjaAPI): + def create_temporal_response(self, request: HttpRequest) -> HttpResponse: + stdout, stderr = StringIO(), StringIO() + + with redirect_stderr(stderr): + with redirect_stdout(stdout): + request.stdout = stdout + request.stderr = stderr + + response = super().create_temporal_response(request) + + print('RESPONDING NOW', response) + + return response + +html_description=f''' +

Welcome to your ArchiveBox server's REST API [v1 ALPHA] homepage!

+
+WARNING: This API is still in an early development stage and may change! +
+ +Served by ArchiveBox v{VERSION} ({COMMIT_HASH[:8]}), API powered by django-ninja. +''' + +api = NinjaAPIWithIOCapture( + title='ArchiveBox API', + description=html_description, + version='1.0.0', + csrf=False, + auth=API_AUTH_METHODS, + urls_namespace="api", + docs=Swagger(settings={"persistAuthorization": True}), + # docs_decorator=login_required, + # renderer=ORJSONRenderer(), +) +api.add_router('/auth/', 'api.routes_auth.router') +api.add_router('/core/', 'api.routes_core.router') +api.add_router('/cli/', 'api.routes_cli.router') + + +@api.exception_handler(Exception) +def generic_exception_handler(request, err): + status = 503 + if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)): + status = 404 + + print(''.join(format_exception(err))) + + return api.create_response( + request, + { + "succeeded": False, + "errors": [ + ''.join(format_exception(err)), + # or send simpler exception-only summary without full traceback: + # f'{err.__class__.__name__}: {err}', + # *([str(err.__context__)] if getattr(err, '__context__', None) else []), + ], + }, + status=status, + ) + + +urlpatterns = [ + path("v1/", api.urls), + + path("v1", RedirectView.as_view(url='/api/v1/docs')), + path("", RedirectView.as_view(url='/api/v1/docs')), +] diff --git a/archivebox/config.py b/archivebox/config.py index 1a75229c..9d245d5c 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -1366,6 +1366,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C stderr(' archivebox init') raise SystemExit(2) + def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG): output_dir = out_dir or config['OUTPUT_DIR'] from .index.sql import list_migrations diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 172a8caf..3f4dcd7c 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -20,6 +20,7 @@ from core.models import Snapshot, ArchiveResult, Tag from core.forms import AddLinkForm from core.mixins import SearchResultsAdminMixin +from api.models import APIToken from index.html import snapshot_icons from logging_util import printable_filesize @@ -100,6 +101,7 @@ class ArchiveBoxAdmin(admin.AdminSite): archivebox_admin = ArchiveBoxAdmin() archivebox_admin.register(get_user_model()) +archivebox_admin.register(APIToken) archivebox_admin.disable_action('delete_selected') class ArchiveResultInline(admin.TabularInline): diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py index f3e35dbd..91a1b81b 100644 --- a/archivebox/core/apps.py +++ b/archivebox/core/apps.py @@ -1,3 +1,5 @@ +__package__ = 'archivebox.core' + from django.apps import AppConfig diff --git a/archivebox/core/auth.py b/archivebox/core/auth.py index fb15d5a8..e5bf896d 100644 --- a/archivebox/core/auth.py +++ b/archivebox/core/auth.py @@ -1,5 +1,8 @@ +__package__ = 'archivebox.core' + import os from django.conf import settings + from ..config import ( LDAP ) diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index a04dce28..0526633c 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -1,4 +1,4 @@ -from .admin import archivebox_admin +__package__ = 'archivebox.core' from django.urls import path, include from django.views import static @@ -6,14 +6,9 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.conf import settings from django.views.generic.base import RedirectView -from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView +from .admin import archivebox_admin +from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView -from ninja import NinjaAPI -from api.auth import GlobalAuth - -api = NinjaAPI(auth=GlobalAuth()) -api.add_router("/auth/", "api.auth.router") -api.add_router("/archive/", "api.archive.router") # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE @@ -43,10 +38,10 @@ urlpatterns = [ path('accounts/', include('django.contrib.auth.urls')), path('admin/', archivebox_admin.urls), - path("api/", api.urls), + path("api/", include('api.urls')), path('health/', HealthCheckView.as_view(), name='healthcheck'), - path('error/', lambda _: 1/0), + path('error/', lambda *_: 1/0), # path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django diff --git a/archivebox/main.py b/archivebox/main.py index 7389c032..b2cba3e1 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -695,7 +695,7 @@ def add(urls: Union[str, List[str]], if CAN_UPGRADE: hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n") - return all_links + return new_links @enforce_types def remove(filter_str: Optional[str]=None, diff --git a/archivebox/templates/core/navigation.html b/archivebox/templates/core/navigation.html index 8d9cb659..7dea3d96 100644 --- a/archivebox/templates/core/navigation.html +++ b/archivebox/templates/core/navigation.html @@ -6,6 +6,7 @@ Tags | Log     Docs | + API | Public | Admin     @@ -16,7 +17,7 @@ {% endblock %} {% block userlinks %} {% if user.has_usable_password %} - Account / + Account / {% endif %} {% trans 'Log out' %} {% endblock %} diff --git a/archivebox/util.py b/archivebox/util.py index b5cfebb4..d1b4daf8 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -358,7 +358,8 @@ def chrome_cleanup(): if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"): remove_file("/home/archivebox/.config/chromium/SingletonLock") -def ansi_to_html(text): +@enforce_types +def ansi_to_html(text: str) -> str: """ Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html """ diff --git a/pyproject.toml b/pyproject.toml index 0e3cd184..603d6d5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,22 +18,18 @@ dependencies = [ "django-ninja>=1.1.0", "django-extensions>=3.2.3", "mypy-extensions>=1.0.0", - # Python Helper Libraries "requests>=2.31.0", "dateparser>=1.0.0", "feedparser>=6.0.11", "w3lib>=1.22.0", - # Feature-Specific Dependencies "python-crontab>=2.5.1", # for: archivebox schedule "croniter>=0.3.34", # for: archivebox schedule "ipython>5.0.0", # for: archivebox shell - # Extractor Dependencies "yt-dlp>=2024.4.9", # for: media "playwright>=1.43.0; platform_machine != 'armv7l'", # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages - # TODO: add more extractors # - gallery-dl # - scihubdl