From e9e4adfc341b3e3637ce5af33e3f3fc8a6481d6d Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 7 Jan 2021 09:07:29 -0500 Subject: [PATCH 1/2] fix: wget_output_path failing on some extractors. Add a new condition --- archivebox/extractors/wget.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 331f636b..b7adbea0 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -180,5 +180,9 @@ def wget_output_path(link: Link) -> Optional[str]: if str(search_dir) == link.link_dir: break + + search_dir = Path(link.link_dir) / domain(link.url).replace(":", "+") / urldecode(full_path) + if not search_dir.is_dir(): + return str(search_dir.relative_to(link.link_dir)) return None From 6031ffa3b245530d0f0544d52454af5956718ec5 Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 7 Jan 2021 09:22:46 -0500 Subject: [PATCH 2/2] fix: Mercury extractor error was incorrectly initialized --- archivebox/extractors/mercury.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index 741c3291..07c02420 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -28,7 +28,7 @@ def ShellError(cmd: List[str], result: CompletedProcess, lines: int=20) -> Archi # parse out last line of stderr return ArchiveError( f'Got {cmd[0]} response code: {result.returncode}).', - *( + " ".join( line.strip() for line in (result.stdout + result.stderr).decode().rsplit('\n', lines)[-lines:] if line.strip()