From 79b19ddf3598c5f782b53969974748972a85b24f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 30 Jun 2020 01:12:06 -0400 Subject: [PATCH] use atomic writes for config file writing as well --- archivebox/config/__init__.py | 67 +++++++++++++++++------------------ archivebox/extractors/dom.py | 6 ++-- archivebox/extractors/git.py | 1 - archivebox/extractors/pdf.py | 1 + 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index ed03d056..130bb5ec 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -320,65 +320,64 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]: return config_file_vars return None + def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.')) config_path = os.path.join(out_dir, CONFIG_FILENAME) + if not os.path.exists(config_path): - with open(config_path, 'w+') as f: - f.write(CONFIG_HEADER) + atomic_write(config_path, CONFIG_HEADER) config_file = ConfigParser() config_file.optionxform = str config_file.read(config_path) + with open(config_path, 'r') as old: + atomic_write(f'{config_path}.bak', old.read()) + find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0] - with open(f'{config_path}.old', 'w+') as old: - with open(config_path, 'r') as new: - old.write(new.read()) + # Set up sections in empty config file + for key, val in config.items(): + section = find_section(key) + if section in config_file: + existing_config = dict(config_file[section]) + else: + existing_config = {} + config_file[section] = {**existing_config, key: val} - with open(config_path, 'w+') as f: - for key, val in config.items(): - section = find_section(key) - if section in config_file: - existing_config = dict(config_file[section]) - else: - existing_config = {} + # always make sure there's a SECRET_KEY defined for Django + existing_secret_key = None + if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']: + existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY'] - config_file[section] = {**existing_config, key: val} + if (not existing_secret_key) or ('not a valid secret' in existing_secret_key): + from django.utils.crypto import get_random_string + chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.' + random_secret_key = get_random_string(50, chars) + if 'SERVER_CONFIG' in config_file: + config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key + else: + config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key} - # always make sure there's a SECRET_KEY defined for Django - existing_secret_key = None - if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']: - existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY'] - - if (not existing_secret_key) or ('not a valid secret' in existing_secret_key): - from django.utils.crypto import get_random_string - chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.' - random_secret_key = get_random_string(50, chars) - if 'SERVER_CONFIG' in config_file: - config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key - else: - config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key} - - f.write(CONFIG_HEADER) - config_file.write(f) + atomic_write(config_path, '\n'.join((CONFIG_HEADER, config_file))) try: + # validate the config by attempting to re-parse it CONFIG = load_all_config() return { key.upper(): CONFIG.get(key.upper()) for key in config.keys() } except: - with open(f'{config_path}.old', 'r') as old: - with open(config_path, 'w+') as new: - new.write(old.read()) + # something went horribly wrong, rever to the previous version + with open(f'{config_path}.bak', 'r') as old: + atomic_write(config_path, old.read()) - if os.path.exists(f'{config_path}.old'): - os.remove(f'{config_path}.old') + if os.path.exists(f'{config_path}.bak'): + os.remove(f'{config_path}.bak') return {} diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py index b46137b6..63e24692 100644 --- a/archivebox/extractors/dom.py +++ b/archivebox/extractors/dom.py @@ -5,7 +5,7 @@ import os from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from ..system import run, chmod_file, atomic_write from ..util import ( enforce_types, is_static_file, @@ -46,8 +46,8 @@ def save_dom(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') try: - with open(output_path, 'w+') as f: - result = run(cmd, stdout=f, cwd=out_dir, timeout=timeout) + result = run(cmd, cwd=out_dir, timeout=timeout) + atomic_write(output_path, result.stdout) if result.returncode: hints = result.stderr.decode() diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index 75674ab8..1534ce34 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -65,7 +65,6 @@ def save_git(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A timer = TimedProgress(timeout, prefix=' ') try: result = run(cmd, cwd=output_path, timeout=timeout + 1) - if result.returncode == 128: # ignore failed re-download when the folder already exists pass diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py index 3786c4cc..bd8093bf 100644 --- a/archivebox/extractors/pdf.py +++ b/archivebox/extractors/pdf.py @@ -58,6 +58,7 @@ def save_pdf(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A finally: timer.end() + return ArchiveResult( cmd=cmd, pwd=out_dir,