mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Replaced get method
This commit is contained in:
parent
de18810a2d
commit
f0915a56aa
1 changed files with 16 additions and 6 deletions
|
@ -15,6 +15,7 @@ from datetime import datetime
|
||||||
from dateparser import parse as dateparser
|
from dateparser import parse as dateparser
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from requests.exceptions import RequestException
|
||||||
from base32_crockford import encode as base32_encode # type: ignore
|
from base32_crockford import encode as base32_encode # type: ignore
|
||||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||||
|
|
||||||
|
@ -178,12 +179,21 @@ def get_headers(url: str, timeout: int=None) -> str:
|
||||||
"""Download the contents of a remote url and return the headers"""
|
"""Download the contents of a remote url and return the headers"""
|
||||||
from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
|
from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
|
||||||
timeout = timeout or TIMEOUT
|
timeout = timeout or TIMEOUT
|
||||||
response = requests.get(
|
|
||||||
url,
|
try:
|
||||||
headers={'User-Agent': WGET_USER_AGENT},
|
response = requests.head(
|
||||||
verify=CHECK_SSL_VALIDITY,
|
url,
|
||||||
timeout=timeout,
|
headers={'User-Agent': WGET_USER_AGENT},
|
||||||
)
|
verify=CHECK_SSL_VALIDITY,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
except RequestException:
|
||||||
|
response = requests.get(
|
||||||
|
url,
|
||||||
|
headers={'User-Agent': WGET_USER_AGENT},
|
||||||
|
verify=CHECK_SSL_VALIDITY,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
return pyjson.dumps(dict(response.headers), indent=4)
|
return pyjson.dumps(dict(response.headers), indent=4)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue