mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 15:14:31 -04:00
better function naming
This commit is contained in:
parent
328a59749b
commit
eb003f6a26
2 changed files with 7 additions and 11 deletions
|
@ -28,7 +28,7 @@ from config import (
|
||||||
from util import (
|
from util import (
|
||||||
check_dependencies,
|
check_dependencies,
|
||||||
download_url,
|
download_url,
|
||||||
save_source,
|
save_stdin_source,
|
||||||
pretty_path,
|
pretty_path,
|
||||||
migrate_data,
|
migrate_data,
|
||||||
check_links_structure,
|
check_links_structure,
|
||||||
|
@ -204,8 +204,7 @@ if __name__ == '__main__':
|
||||||
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||||
source = download_url(source)
|
source = download_url(source)
|
||||||
elif stdin_raw_text:
|
elif stdin_raw_text:
|
||||||
source = save_source(stdin_raw_text)
|
source = save_stdin_source(stdin_raw_text)
|
||||||
|
|
||||||
|
|
||||||
# Step 1: Parse the links and dedupe them with existing archive
|
# Step 1: Parse the links and dedupe them with existing archive
|
||||||
all_links, new_links = load_links(archive_path=out_dir, import_path=source)
|
all_links, new_links = load_links(archive_path=out_dir, import_path=source)
|
||||||
|
@ -213,15 +212,12 @@ if __name__ == '__main__':
|
||||||
# Step 2: Write new index
|
# Step 2: Write new index
|
||||||
write_links_index(out_dir=out_dir, links=all_links)
|
write_links_index(out_dir=out_dir, links=all_links)
|
||||||
|
|
||||||
# Step 3: Verify folder structure is 1:1 with index
|
# Step 3: Run the archive methods for each link
|
||||||
# cleanup_archive(out_dir, links)
|
|
||||||
|
|
||||||
# Step 4: Run the archive methods for each link
|
|
||||||
if ONLY_NEW:
|
if ONLY_NEW:
|
||||||
update_archive(out_dir, new_links, source=source, resume=resume, append=True)
|
update_archive(out_dir, new_links, source=source, resume=resume, append=True)
|
||||||
else:
|
else:
|
||||||
update_archive(out_dir, all_links, source=source, resume=resume, append=True)
|
update_archive(out_dir, all_links, source=source, resume=resume, append=True)
|
||||||
|
|
||||||
# Step 5: Re-write links index with updated titles, icons, and resources
|
# Step 4: Re-write links index with updated titles, icons, and resources
|
||||||
all_links, _ = load_links(archive_path=out_dir)
|
all_links, _ = load_links(archive_path=out_dir)
|
||||||
write_links_index(out_dir=out_dir, links=all_links)
|
write_links_index(out_dir=out_dir, links=all_links)
|
||||||
|
|
|
@ -205,7 +205,7 @@ def pretty_path(path):
|
||||||
return path.replace(REPO_DIR + '/', '')
|
return path.replace(REPO_DIR + '/', '')
|
||||||
|
|
||||||
|
|
||||||
def save_source(raw_text):
|
def save_stdin_source(raw_text):
|
||||||
if not os.path.exists(SOURCES_DIR):
|
if not os.path.exists(SOURCES_DIR):
|
||||||
os.makedirs(SOURCES_DIR)
|
os.makedirs(SOURCES_DIR)
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ def fetch_page_content(url, timeout=TIMEOUT):
|
||||||
return resp.read().decode(encoding)
|
return resp.read().decode(encoding)
|
||||||
|
|
||||||
|
|
||||||
def download_url(url, timeout=TIMEOUT):
|
def save_remote_source(url, timeout=TIMEOUT):
|
||||||
"""download a given url's content into downloads/domain.txt"""
|
"""download a given url's content into downloads/domain.txt"""
|
||||||
|
|
||||||
if not os.path.exists(SOURCES_DIR):
|
if not os.path.exists(SOURCES_DIR):
|
||||||
|
@ -265,7 +265,7 @@ def download_url(url, timeout=TIMEOUT):
|
||||||
|
|
||||||
with open(source_path, 'w', encoding='utf-8') as f:
|
with open(source_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(downloaded_xml)
|
f.write(downloaded_xml)
|
||||||
|
|
||||||
print(' > {}'.format(pretty_path(source_path)))
|
print(' > {}'.format(pretty_path(source_path)))
|
||||||
|
|
||||||
return source_path
|
return source_path
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue