From c6faa9ab76ffade76a824a403d2fbffcf188d471 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 6 Feb 2024 22:22:03 -0800 Subject: [PATCH] add extra information to headers extractor output --- Dockerfile | 2 +- archivebox/util.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a80620a9..a74354fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,8 +15,8 @@ # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development). -# Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/ FROM python:3.11-slim-bookworm +# Uses Debian 12 w/ faster-updating apt-lists added below: https://packages.debian.org/bookworm-backports/ LABEL name="archivebox" \ maintainer="Nick Sweeting " \ diff --git a/archivebox/util.py b/archivebox/util.py index d7df7f3c..faa720b5 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -210,7 +210,11 @@ def get_headers(url: str, timeout: int=None) -> str: return pyjson.dumps( { + 'URL': url, 'Status-Code': response.status_code, + 'Elapsed': response.elapsed, + 'Encoding': response.encoding, + 'Apparent-Encoding': response.apparent_encoding, **dict(response.headers), }, indent=4,