mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
add HTTP byte range request support to media file serving
This commit is contained in:
parent
ba6c1fd69b
commit
c76c50e71f
4 changed files with 181 additions and 8 deletions
|
@ -68,7 +68,7 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
|
||||||
else:
|
else:
|
||||||
return tries
|
return tries
|
||||||
|
|
||||||
raise Exception('Background threads failed to exit after {tries}s: {threads_summary}')
|
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
||||||
|
|
||||||
|
|
||||||
def list_subcommands() -> Dict[str, str]:
|
def list_subcommands() -> Dict[str, str]:
|
||||||
|
|
169
archivebox/core/serve_static.py
Normal file
169
archivebox/core/serve_static.py
Normal file
|
@ -0,0 +1,169 @@
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
import posixpath
|
||||||
|
import mimetypes
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.contrib.staticfiles import finders
|
||||||
|
from django.views import static
|
||||||
|
from django.http import StreamingHttpResponse, Http404, HttpResponse, HttpResponseNotModified
|
||||||
|
from django.utils._os import safe_join
|
||||||
|
from django.utils.http import http_date
|
||||||
|
from django.utils.translation import gettext as _
|
||||||
|
|
||||||
|
|
||||||
|
def serve_static_with_byterange_support(request, path, document_root=None, show_indexes=False):
|
||||||
|
"""
|
||||||
|
Overrides Django's built-in django.views.static.serve function to support byte range requests.
|
||||||
|
This allows you to do things like seek into the middle of a huge mp4 or WACZ without downloading the whole file.
|
||||||
|
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
|
||||||
|
"""
|
||||||
|
assert document_root
|
||||||
|
path = posixpath.normpath(path).lstrip("/")
|
||||||
|
fullpath = Path(safe_join(document_root, path))
|
||||||
|
if fullpath.is_dir():
|
||||||
|
if show_indexes:
|
||||||
|
return static.directory_index(path, fullpath)
|
||||||
|
raise Http404(_("Directory indexes are not allowed here."))
|
||||||
|
if not fullpath.exists():
|
||||||
|
raise Http404(_("“%(path)s” does not exist") % {"path": fullpath})
|
||||||
|
|
||||||
|
# Respect the If-Modified-Since header.
|
||||||
|
statobj = fullpath.stat()
|
||||||
|
if not static.was_modified_since(request.META.get("HTTP_IF_MODIFIED_SINCE"), statobj.st_mtime):
|
||||||
|
return HttpResponseNotModified()
|
||||||
|
|
||||||
|
content_type, encoding = mimetypes.guess_type(str(fullpath))
|
||||||
|
content_type = content_type or "application/octet-stream"
|
||||||
|
|
||||||
|
# setup resposne object
|
||||||
|
ranged_file = RangedFileReader(open(fullpath, "rb"))
|
||||||
|
response = StreamingHttpResponse(ranged_file, content_type=content_type)
|
||||||
|
response.headers["Last-Modified"] = http_date(statobj.st_mtime)
|
||||||
|
|
||||||
|
# handle byte-range requests by serving chunk of file
|
||||||
|
if stat.S_ISREG(statobj.st_mode):
|
||||||
|
size = statobj.st_size
|
||||||
|
response["Content-Length"] = size
|
||||||
|
response["Accept-Ranges"] = "bytes"
|
||||||
|
response["X-Django-Ranges-Supported"] = "1"
|
||||||
|
# Respect the Range header.
|
||||||
|
if "HTTP_RANGE" in request.META:
|
||||||
|
try:
|
||||||
|
ranges = parse_range_header(request.META['HTTP_RANGE'], size)
|
||||||
|
except ValueError:
|
||||||
|
ranges = None
|
||||||
|
# only handle syntactically valid headers, that are simple (no
|
||||||
|
# multipart byteranges)
|
||||||
|
if ranges is not None and len(ranges) == 1:
|
||||||
|
start, stop = ranges[0]
|
||||||
|
if stop > size:
|
||||||
|
# requested range not satisfiable
|
||||||
|
return HttpResponse(status=416)
|
||||||
|
ranged_file.start = start
|
||||||
|
ranged_file.stop = stop
|
||||||
|
response["Content-Range"] = "bytes %d-%d/%d" % (start, stop - 1, size)
|
||||||
|
response["Content-Length"] = stop - start
|
||||||
|
response.status_code = 206
|
||||||
|
if encoding:
|
||||||
|
response.headers["Content-Encoding"] = encoding
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def serve_static(request, path, **kwargs):
|
||||||
|
"""
|
||||||
|
Serve static files below a given point in the directory structure or
|
||||||
|
from locations inferred from the staticfiles finders.
|
||||||
|
|
||||||
|
To use, put a URL pattern such as::
|
||||||
|
|
||||||
|
from django.contrib.staticfiles import views
|
||||||
|
|
||||||
|
path('<path:path>', views.serve)
|
||||||
|
|
||||||
|
in your URLconf.
|
||||||
|
|
||||||
|
It uses the django.views.static.serve() view to serve the found files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
normalized_path = posixpath.normpath(path).lstrip("/")
|
||||||
|
absolute_path = finders.find(normalized_path)
|
||||||
|
if not absolute_path:
|
||||||
|
if path.endswith("/") or path == "":
|
||||||
|
raise Http404("Directory indexes are not allowed here.")
|
||||||
|
raise Http404("'%s' could not be found" % path)
|
||||||
|
document_root, path = os.path.split(absolute_path)
|
||||||
|
return serve_static_with_byterange_support(request, path, document_root=document_root, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_range_header(header, resource_size):
|
||||||
|
"""
|
||||||
|
Parses a range header into a list of two-tuples (start, stop) where `start`
|
||||||
|
is the starting byte of the range (inclusive) and `stop` is the ending byte
|
||||||
|
position of the range (exclusive).
|
||||||
|
Returns None if the value of the header is not syntatically valid.
|
||||||
|
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
|
||||||
|
"""
|
||||||
|
if not header or "=" not in header:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ranges = []
|
||||||
|
units, range_ = header.split("=", 1)
|
||||||
|
units = units.strip().lower()
|
||||||
|
|
||||||
|
if units != "bytes":
|
||||||
|
return None
|
||||||
|
|
||||||
|
for val in range_.split(","):
|
||||||
|
val = val.strip()
|
||||||
|
if "-" not in val:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if val.startswith("-"):
|
||||||
|
# suffix-byte-range-spec: this form specifies the last N bytes of an
|
||||||
|
# entity-body
|
||||||
|
start = resource_size + int(val)
|
||||||
|
if start < 0:
|
||||||
|
start = 0
|
||||||
|
stop = resource_size
|
||||||
|
else:
|
||||||
|
# byte-range-spec: first-byte-pos "-" [last-byte-pos]
|
||||||
|
start, stop = val.split("-", 1)
|
||||||
|
start = int(start)
|
||||||
|
# the +1 is here since we want the stopping point to be exclusive, whereas in
|
||||||
|
# the HTTP spec, the last-byte-pos is inclusive
|
||||||
|
stop = int(stop) + 1 if stop else resource_size
|
||||||
|
if start >= stop:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ranges.append((start, stop))
|
||||||
|
|
||||||
|
return ranges
|
||||||
|
|
||||||
|
|
||||||
|
class RangedFileReader:
|
||||||
|
"""
|
||||||
|
Wraps a file like object with an iterator that runs over part (or all) of
|
||||||
|
the file defined by start and stop. Blocks of block_size will be returned
|
||||||
|
from the starting position, up to, but not including the stop point.
|
||||||
|
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
|
||||||
|
"""
|
||||||
|
|
||||||
|
block_size = 8192
|
||||||
|
|
||||||
|
def __init__(self, file_like, start=0, stop=float("inf"), block_size=None):
|
||||||
|
self.f = file_like
|
||||||
|
self.block_size = block_size or RangedFileReader.block_size
|
||||||
|
self.start = start
|
||||||
|
self.stop = stop
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self.f.seek(self.start)
|
||||||
|
position = self.start
|
||||||
|
while position < self.stop:
|
||||||
|
data = self.f.read(min(self.block_size, self.stop - position))
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield data
|
||||||
|
position += self.block_size
|
|
@ -1,14 +1,13 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
from django.urls import path, include
|
from django.urls import path, re_path, include
|
||||||
from django.views import static
|
from django.views import static
|
||||||
from django.contrib.staticfiles.urls import staticfiles_urlpatterns
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.views.generic.base import RedirectView
|
from django.views.generic.base import RedirectView
|
||||||
|
|
||||||
from .admin import archivebox_admin
|
from .admin import archivebox_admin
|
||||||
from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||||
|
from .serve_static import serve_static
|
||||||
|
|
||||||
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
||||||
# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
|
# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
|
||||||
|
@ -18,13 +17,16 @@ from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthC
|
||||||
# print('DEBUG', settings.DEBUG)
|
# print('DEBUG', settings.DEBUG)
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('public/', PublicIndexView.as_view(), name='public-index'),
|
re_path(r"^static/(?P<path>.*)$", serve_static),
|
||||||
|
# re_path(r"^media/(?P<path>.*)$", static.serve, {"document_root": settings.MEDIA_ROOT}),
|
||||||
|
|
||||||
path('robots.txt', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'robots.txt'}),
|
path('robots.txt', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'robots.txt'}),
|
||||||
path('favicon.ico', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'favicon.ico'}),
|
path('favicon.ico', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'favicon.ico'}),
|
||||||
|
|
||||||
path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
|
path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
|
||||||
|
|
||||||
|
path('public/', PublicIndexView.as_view(), name='public-index'),
|
||||||
|
|
||||||
path('archive/', RedirectView.as_view(url='/')),
|
path('archive/', RedirectView.as_view(url='/')),
|
||||||
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
||||||
|
|
||||||
|
@ -41,7 +43,7 @@ urlpatterns = [
|
||||||
path("api/", include('api.urls'), name='api'),
|
path("api/", include('api.urls'), name='api'),
|
||||||
|
|
||||||
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
||||||
path('error/', lambda *_: 1/0),
|
path('error/', lambda *_: 1/0), # type: ignore
|
||||||
|
|
||||||
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
|
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
|
||||||
|
|
||||||
|
@ -49,7 +51,6 @@ urlpatterns = [
|
||||||
path('index.json', static.serve, {'document_root': settings.CONFIG.OUTPUT_DIR, 'path': 'index.json'}),
|
path('index.json', static.serve, {'document_root': settings.CONFIG.OUTPUT_DIR, 'path': 'index.json'}),
|
||||||
path('', HomepageView.as_view(), name='Home'),
|
path('', HomepageView.as_view(), name='Home'),
|
||||||
]
|
]
|
||||||
urlpatterns += staticfiles_urlpatterns()
|
|
||||||
|
|
||||||
if settings.DEBUG_TOOLBAR:
|
if settings.DEBUG_TOOLBAR:
|
||||||
urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
|
urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
|
||||||
|
|
|
@ -46,6 +46,7 @@ from ..main import add
|
||||||
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
|
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
|
||||||
from ..search import query_search_index
|
from ..search import query_search_index
|
||||||
from ..extractors.wget import wget_output_path
|
from ..extractors.wget import wget_output_path
|
||||||
|
from .serve_static import serve_static_with_byterange_support
|
||||||
|
|
||||||
|
|
||||||
class HomepageView(View):
|
class HomepageView(View):
|
||||||
|
@ -197,7 +198,9 @@ class SnapshotView(View):
|
||||||
# if they requested snapshot index, serve live rendered template instead of static html
|
# if they requested snapshot index, serve live rendered template instead of static html
|
||||||
response = self.render_live_index(request, snapshot)
|
response = self.render_live_index(request, snapshot)
|
||||||
else:
|
else:
|
||||||
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
|
response = serve_static_with_byterange_support(
|
||||||
|
request, archivefile, document_root=snapshot.link_dir, show_indexes=True,
|
||||||
|
)
|
||||||
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
||||||
return response
|
return response
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue