From 2e2b4f81500914fd10da67b51b0cfb7ede90df01 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 18 Aug 2020 08:23:57 -0400 Subject: [PATCH] fix url is too long to be a path error --- archivebox/parsers/generic_txt.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py index 22c805dd..dd0fe7f5 100644 --- a/archivebox/parsers/generic_txt.py +++ b/archivebox/parsers/generic_txt.py @@ -25,14 +25,18 @@ def parse_generic_txt_export(text_file: IO[str]) -> Iterable[Link]: continue # if the line is a local file path that resolves, then we can archive it - if Path(line).exists(): - yield Link( - url=line, - timestamp=str(datetime.now().timestamp()), - title=None, - tags=None, - sources=[text_file.name], - ) + try: + if Path(line).exists(): + yield Link( + url=line, + timestamp=str(datetime.now().timestamp()), + title=None, + tags=None, + sources=[text_file.name], + ) + except (OSError, PermissionError): + # nvm, not a valid path... + pass # otherwise look for anything that looks like a URL in the line for url in re.findall(URL_REGEX, line):