mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
add comment about why DOM is preferred over singlefile for readability parsing
This commit is contained in:
parent
fcdc41a1ab
commit
5b07a1126c
2 changed files with 3 additions and 0 deletions
|
@ -99,6 +99,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
||||||
except (Exception, OSError) as err:
|
except (Exception, OSError) as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
|
||||||
|
# prefer Chrome dom output to singlefile because singlefile often contains huge url(data:image/...base64) strings that make the html too long to parse with readability
|
||||||
cmd = [cmd[0], './{dom,singlefile}.html']
|
cmd = [cmd[0], './{dom,singlefile}.html']
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
|
@ -177,6 +177,7 @@
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
$ = django.jQuery;
|
$ = django.jQuery;
|
||||||
$.fn.reverse = [].reverse;
|
$.fn.reverse = [].reverse;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue