From 81d16e96fd29d3395e20609627f2bb4f72604ee3 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 1 Oct 2024 21:43:35 -0700 Subject: [PATCH] fix toml_util circular import in abx --- archivebox/abx/archivebox/base_configset.py | 2 +- archivebox/abx/archivebox/toml_util.py | 114 ++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 archivebox/abx/archivebox/toml_util.py diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/abx/archivebox/base_configset.py index 8a8298a3..4a7e8c13 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -14,7 +14,7 @@ from pydantic_pkgr.base_types import func_takes_args_or_kwargs import abx from .base_hook import BaseHook, HookType -from archivebox.misc import toml_util +from . import toml_util PACKAGE_DIR = Path(__file__).resolve().parent.parent diff --git a/archivebox/abx/archivebox/toml_util.py b/archivebox/abx/archivebox/toml_util.py new file mode 100644 index 00000000..d4784335 --- /dev/null +++ b/archivebox/abx/archivebox/toml_util.py @@ -0,0 +1,114 @@ +from typing import Any, List, Callable + +import json +import ast +import inspect +import toml +import re +import configparser + +from pathlib import Path, PosixPath + +from pydantic.json_schema import GenerateJsonSchema +from pydantic_core import to_jsonable_python + +JSONValue = str | bool | int | None | List['JSONValue'] + +TOML_HEADER = "# Converted from INI to TOML format: https://toml.io/en/\n\n" + +def load_ini_value(val: str) -> JSONValue: + """Convert lax INI values into strict TOML-compliant (JSON) values""" + if val.lower() in ('true', 'yes', '1'): + return True + if val.lower() in ('false', 'no', '0'): + return False + if val.isdigit(): + return int(val) + + try: + return ast.literal_eval(val) + except Exception: + pass + + try: + return json.loads(val) + except Exception: + pass + + return val + + +def convert(ini_str: str) -> str: + """Convert a string of INI config into its TOML equivalent (warning: strips comments)""" + + config = configparser.ConfigParser() + config.optionxform = str # capitalize key names + config.read_string(ini_str) + + # Initialize an empty dictionary to store the TOML representation + toml_dict = {} + + # Iterate over each section in the INI configuration + for section in config.sections(): + toml_dict[section] = {} + + # Iterate over each key-value pair in the section + for key, value in config.items(section): + parsed_value = load_ini_value(value) + + # Convert the parsed value to its TOML-compatible JSON representation + toml_dict[section.upper()][key.upper()] = json.dumps(parsed_value) + + # Build the TOML string + toml_str = TOML_HEADER + for section, items in toml_dict.items(): + toml_str += f"[{section}]\n" + for key, value in items.items(): + toml_str += f"{key} = {value}\n" + toml_str += "\n" + + return toml_str.strip() + + + +class JSONSchemaWithLambdas(GenerateJsonSchema): + """ + Encode lambda functions in default values properly. + Usage: + >>> json.dumps(value, encoder=JSONSchemaWithLambdas()) + """ + def encode_default(self, default: Any) -> Any: + config = self._config + if isinstance(default, Callable): + return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}' + return to_jsonable_python( + default, + timedelta_mode=config.ser_json_timedelta, + bytes_mode=config.ser_json_bytes, + serialize_unknown=True + ) + + # for computed_field properties render them like this instead: + # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '), + + +def better_toml_dump_str(val: Any) -> str: + try: + return toml.encoder._dump_str(val) # type: ignore + except Exception: + # if we hit any of toml's numerous encoding bugs, + # fall back to using json representation of string + return json.dumps(str(val)) + +class CustomTOMLEncoder(toml.encoder.TomlEncoder): + """ + Custom TomlEncoder to work around https://github.com/uiri/toml's many encoding bugs. + More info: https://github.com/fabiocaccamo/python-benedict/issues/439 + >>> toml.dumps(value, encoder=CustomTOMLEncoder()) + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dump_funcs[Path] = lambda x: json.dumps(str(x)) + self.dump_funcs[PosixPath] = lambda x: json.dumps(str(x)) + self.dump_funcs[str] = better_toml_dump_str + self.dump_funcs[re.RegexFlag] = better_toml_dump_str