|
| 1 | +# coding=utf-8 |
| 2 | +# |
| 3 | +# PyPWA, a scientific analysis toolkit. |
| 4 | +# Copyright (C) 2016 JLab |
| 5 | +# |
| 6 | +# This program is free software: you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation, either version 3 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License |
| 17 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 18 | + |
| 19 | +""" |
| 20 | +Intelligently loads user created configuration files |
| 21 | +==================================================== |
| 22 | +This loads configuration files in YAML or JSON and attempts to correct any user |
| 23 | +errors that might be in the configuration file before converting the values |
| 24 | +into the correct data types. |
| 25 | +
|
| 26 | +Layout: |
| 27 | +------- |
| 28 | +- Data Types and globals: Custom data types used for configuration, static |
| 29 | + globals for the script |
| 30 | +- Parse: Parses the configuration file and attempts to correct values if |
| 31 | + possible. Supports YAML and JSON |
| 32 | +- Write: Returns a configuration file in YAML, or JSON if specified. |
| 33 | +
|
| 34 | +""" |
| 35 | + |
| 36 | +import json |
| 37 | +import logging |
| 38 | +from pathlib import Path |
| 39 | +from typing import Any, Dict, Union, Optional, List |
| 40 | + |
| 41 | +import numpy as npy |
| 42 | +import yaml |
| 43 | + |
| 44 | +from PyPWA import AUTHOR, VERSION |
| 45 | + |
| 46 | +"""fuzzywuzzy is an optional dependency |
| 47 | +Fuzzywuzzy handles correcting string values when there is a known list |
| 48 | +of potential values for that string. It isn't necessary for the program |
| 49 | +to run, but does correct for potential user spelling errors in the |
| 50 | +configuration file. |
| 51 | +
|
| 52 | +This is used to correct for options with a set number of expected string |
| 53 | +values, and for correcting dictionary keys. |
| 54 | +""" |
| 55 | + |
| 56 | +try: |
| 57 | + _FUZZING = True # This is broken in a really bad way |
| 58 | + import fuzzywuzzy.process |
| 59 | + |
| 60 | +except ImportError: |
| 61 | + _FUZZING = False |
| 62 | + |
| 63 | +__credits__ = ["Mark Jones"] |
| 64 | +__author__ = AUTHOR |
| 65 | +__version__ = VERSION |
| 66 | + |
| 67 | + |
| 68 | +""" |
| 69 | +Data Types and Globals |
| 70 | +""" |
| 71 | + |
| 72 | +_OPTIONS = Dict[str, Any] |
| 73 | +_TEMPLATE = Dict[str, Union[type, Dict[str, type], List[str]]] |
| 74 | + |
| 75 | +# Controls the threshold for when a value is accepted or not |
| 76 | +_FUZZY_STRING_CONFIDENCE_LEVEL = 75 |
| 77 | +_LOGGER = logging.getLogger(__name__) |
| 78 | + |
| 79 | +if not _FUZZING: |
| 80 | + _LOGGER.debug("Fuzzing is not enabled, fuzzywuzzy not found.") |
| 81 | + |
| 82 | +""" |
| 83 | +Parse |
| 84 | +""" |
| 85 | + |
| 86 | + |
| 87 | +def parse(location: Path, template: Optional[_TEMPLATE] = None) -> _OPTIONS: |
| 88 | + # Easier to ask for forgiveness than to ask for permission |
| 89 | + with location.open() as stream: |
| 90 | + try: |
| 91 | + parsed = yaml.load(stream, Loader=yaml.FullLoader) |
| 92 | + except Exception as yaml_error: |
| 93 | + stream.seek(0) |
| 94 | + try: |
| 95 | + parsed = json.load(stream) |
| 96 | + except Exception as json_error: |
| 97 | + raise ValueError( |
| 98 | + f"Failed to parse the configuration file!\n" |
| 99 | + f"YAML Error: \n" |
| 100 | + f"{yaml_error} \n\n" |
| 101 | + f"JSON Error: \n" |
| 102 | + f"{json_error}" |
| 103 | + ) |
| 104 | + |
| 105 | + # If we're provided a template, we'll use it to correct the dictionary |
| 106 | + if isinstance(template, dict): |
| 107 | + parsed = _correct_keys(parsed, template) |
| 108 | + parsed = _correct_values(parsed, template) |
| 109 | + |
| 110 | + return parsed |
| 111 | + |
| 112 | + |
| 113 | +def _correct_keys(parsed: _OPTIONS, template: _TEMPLATE) -> _OPTIONS: |
| 114 | + if _FUZZING: |
| 115 | + corrected = dict() |
| 116 | + correct_keys = list(template.keys()) |
| 117 | + |
| 118 | + for key in parsed.keys(): |
| 119 | + fuzz = fuzzywuzzy.process.extractOne(key, correct_keys) |
| 120 | + if fuzz[1] >= _FUZZY_STRING_CONFIDENCE_LEVEL: |
| 121 | + found = fuzz[0] |
| 122 | + else: |
| 123 | + _LOGGER.info(f"Failed to find: {key}. Fuzz results: {fuzz!r}") |
| 124 | + found = key |
| 125 | + |
| 126 | + if found in correct_keys and isinstance(template[found], dict): |
| 127 | + corrected[found] = _correct_keys(parsed[key], template[found]) |
| 128 | + else: |
| 129 | + corrected[found] = parsed[key] |
| 130 | + |
| 131 | + return corrected |
| 132 | + else: |
| 133 | + return parsed |
| 134 | + |
| 135 | + |
| 136 | +def _correct_values(parsed: _OPTIONS, template: _TEMPLATE) -> _OPTIONS: |
| 137 | + corrected = dict() |
| 138 | + |
| 139 | + for key in parsed.keys(): |
| 140 | + # Skip keys that are not in the template |
| 141 | + if key not in template: |
| 142 | + corrected[key] = parsed[key] |
| 143 | + continue |
| 144 | + |
| 145 | + if isinstance(parsed[key], type(None)): |
| 146 | + corrected[key] = None |
| 147 | + |
| 148 | + elif template[key] == int: |
| 149 | + corrected[key] = int(parsed[key]) |
| 150 | + |
| 151 | + elif template[key] == float: |
| 152 | + corrected[key] = npy.float64(parsed[key]) |
| 153 | + |
| 154 | + elif template[key] == bool: |
| 155 | + corrected[key] = bool(parsed[key]) |
| 156 | + |
| 157 | + # Some parsers might cast strings of numerical values to a numerical |
| 158 | + # type when we actually want the string |
| 159 | + elif template[key] == str: |
| 160 | + corrected[key] = str(parsed[key]) |
| 161 | + |
| 162 | + # If the value is already a list, it wont wrap it in another list |
| 163 | + elif template[key] == list: |
| 164 | + corrected[key] = list(parsed[key]) |
| 165 | + |
| 166 | + # Same as above |
| 167 | + elif template[key] == set: |
| 168 | + corrected[key] = set(parsed[key]) |
| 169 | + |
| 170 | + # If a list of potential options are provided, this will correct the |
| 171 | + # value to be one of those provided options if possible |
| 172 | + elif isinstance(template[key], list): |
| 173 | + if _FUZZING: |
| 174 | + fuzz = fuzzywuzzy.process.extractOne(parsed[key], template[key]) |
| 175 | + if fuzz[1] >= _FUZZY_STRING_CONFIDENCE_LEVEL: |
| 176 | + corrected[key] = fuzz[0] |
| 177 | + else: |
| 178 | + raise ValueError( |
| 179 | + f"{parsed[key]!r} not found in {template[key]!r}! " |
| 180 | + f"Fuzzing results: {fuzz}" |
| 181 | + ) |
| 182 | + else: |
| 183 | + corrected[key] = parsed[key] |
| 184 | + |
| 185 | + # Handle nested options |
| 186 | + elif isinstance(template[key], dict): |
| 187 | + corrected[key] = _correct_values(parsed[key], template[key]) |
| 188 | + |
| 189 | + # Handle improperly configured configuration template |
| 190 | + else: |
| 191 | + raise ValueError(f"Unknown template type: {template[key]}") |
| 192 | + |
| 193 | + return corrected |
| 194 | + |
| 195 | + |
| 196 | +""" |
| 197 | +Write |
| 198 | +""" |
| 199 | + |
| 200 | + |
| 201 | +def write(filename: Union[Path, str], configuration: _OPTIONS): |
| 202 | + filename = Path(filename).absolute() |
| 203 | + |
| 204 | + with filename.open('w') as stream: |
| 205 | + if filename.suffix == ".json": |
| 206 | + stream.write(json.dumps(configuration, indent=4)) |
| 207 | + else: |
| 208 | + stream.write(yaml.dump(configuration)) |
0 commit comments