diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 130bb5ec..9cb6d677 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -324,6 +324,8 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]: def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" + from ..system import atomic_write + out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.')) config_path = os.path.join(out_dir, CONFIG_FILENAME) @@ -362,8 +364,9 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: else: config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key} - - atomic_write(config_path, '\n'.join((CONFIG_HEADER, config_file))) + with open(config_path, 'w+') as new: + config_file.write(new) + try: # validate the config by attempting to re-parse it CONFIG = load_all_config() diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index a8f28ce1..c6a4f33c 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -45,7 +45,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None) ('media', should_save_media, save_media), ('archive_org', should_save_archive_dot_org, save_archive_dot_org), ) - + out_dir = out_dir or link.link_dir try: is_new = not os.path.exists(out_dir) @@ -61,7 +61,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None) try: if method_name not in link.history: link.history[method_name] = [] - + if should_run(link, out_dir) or overwrite: log_archive_method_started(method_name) @@ -83,7 +83,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None) write_link_details(link, out_dir=link.link_dir) patch_main_index(link) - + # # If any changes were made, update the main links index json and html # was_changed = stats['succeeded'] or stats['failed'] # if was_changed: diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 50d0111d..2e0957e0 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -78,6 +78,7 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']), link.url, ] + status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') try: @@ -111,7 +112,7 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> raise ArchiveError('500 Internal Server Error', hints) raise ArchiveError('Got an error from the server', hints) - # chmod_file(output, cwd=out_dir) + chmod_file(output, cwd=out_dir) except Exception as err: status = 'failed' output = err diff --git a/archivebox/main.py b/archivebox/main.py index 7c824527..a1aba118 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -882,7 +882,7 @@ def config(config_options_str: Optional[str]=None, print(' {}'.format(printable_config(side_effect_changes, prefix=' '))) if failed_options: stderr() - stderr('[X] These options failed to set:', color='red') + stderr('[X] These options failed to set (check for typos):', color='red') stderr(' {}'.format('\n '.join(failed_options))) raise SystemExit(bool(failed_options)) elif reset: diff --git a/archivebox/system.py b/archivebox/system.py index 1a994cb1..d6206557 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -16,8 +16,7 @@ from .util import enforce_types, ExtendedEncoder from .config import OUTPUT_PERMISSIONS - -def run(*args, input=None, capture_output=True, text=False, timeout=None, check=False, **kwargs): +def run(*args, input=None, capture_output=True, text=False, **kwargs): """Patched of subprocess.run to fix blocking io making timeout=innefective""" if input is not None: @@ -29,12 +28,13 @@ def run(*args, input=None, capture_output=True, text=False, timeout=None, check= raise ValueError('stdout and stderr arguments may not be used ' 'with capture_output.') - return subprocess_run(*args, input=input, capture_output=capture_output, text=text, timeout=timeout, check=check, **kwargs) + return subprocess_run(*args, input=input, capture_output=capture_output, text=text, **kwargs) + @enforce_types def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], overwrite: bool=True) -> None: """Safe atomic write to filesystem by writing to temp file + atomic rename""" - + mode = 'wb+' if isinstance(contents, bytes) else 'w' # print('\n> Atomic Write:', mode, path, len(contents), f'overwrite={overwrite}') @@ -44,8 +44,9 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over elif isinstance(contents, (bytes, str)): f.write(contents) + @enforce_types -def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, timeout: int=30) -> None: +def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None: """chmod -R /""" root = Path(cwd) / path @@ -93,6 +94,7 @@ def get_dir_size(path: str, recursive: bool=True, pattern: Optional[str]=None) - CRON_COMMENT = 'archivebox_schedule' + @enforce_types def dedupe_cron_jobs(cron: CronTab) -> CronTab: deduped: Set[Tuple[str, str]] = set()