diff --git a/python/wolfxl/calc/__init__.py b/python/wolfxl/calc/__init__.py index 51530cc..0492769 100644 --- a/python/wolfxl/calc/__init__.py +++ b/python/wolfxl/calc/__init__.py @@ -1,7 +1,7 @@ """wolfxl.calc - Formula evaluation engine for wolfxl workbooks.""" from wolfxl.calc._evaluator import WorkbookEvaluator -from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, is_supported +from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, RangeValue, is_supported from wolfxl.calc._graph import DependencyGraph from wolfxl.calc._parser import FormulaParser, all_references, expand_range from wolfxl.calc._protocol import CalcEngine, CellDelta, RecalcResult @@ -13,6 +13,7 @@ "FUNCTION_WHITELIST_V1", "FormulaParser", "FunctionRegistry", + "RangeValue", "RecalcResult", "WorkbookEvaluator", "all_references", diff --git a/python/wolfxl/calc/_evaluator.py b/python/wolfxl/calc/_evaluator.py index 812a6bc..127f2d0 100644 --- a/python/wolfxl/calc/_evaluator.py +++ b/python/wolfxl/calc/_evaluator.py @@ -3,17 +3,21 @@ Replaces fragile regex-based dispatch with a proper recursive descent parser that handles balanced parentheses, operator precedence, and arbitrarily nested expressions like ``=ROUND(SUM(A1:A5)*IF(B1>0,1.1,1.0),2)``. + +When the ``formulas`` library is installed (via ``wolfxl[calc]``), unsupported +functions fall back to the library's Excel function implementations. """ from __future__ import annotations +import inspect import logging import re from typing import TYPE_CHECKING, Any -from wolfxl.calc._functions import FunctionRegistry +from wolfxl.calc._functions import FunctionRegistry, RangeValue from wolfxl.calc._graph import DependencyGraph -from wolfxl.calc._parser import expand_range +from wolfxl.calc._parser import expand_range, range_shape from wolfxl.calc._protocol import CellDelta, RecalcResult if TYPE_CHECKING: @@ -21,6 +25,24 @@ logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# formulas library availability +# --------------------------------------------------------------------------- + +_formulas_available: bool | None = None + + +def _check_formulas() -> bool: + global _formulas_available + if _formulas_available is None: + try: + import formulas # noqa: F401 + + _formulas_available = True + except ImportError: + _formulas_available = False + return _formulas_available + # --------------------------------------------------------------------------- # Expression parsing helpers @@ -121,7 +143,7 @@ def _find_top_level_split(expr: str) -> tuple[str, str, str] | None: matched_op = ch elif ch == '=' and not (i >= 1 and expr[i - 1] in ('>', '<', '!')): matched_op = ch - elif pass_type == "add" and ch in ('+', '-'): + elif pass_type == "add" and ch in ('+', '-', '&'): matched_op = ch elif pass_type == "mul" and ch in ('*', '/'): matched_op = ch @@ -169,7 +191,9 @@ def _has_top_level_colon(expr: str) -> bool: def _binary_op(left: Any, op: str, right: Any) -> Any: - """Evaluate an arithmetic binary operation.""" + """Evaluate an arithmetic or string binary operation.""" + if op == '&': + return str(left if left is not None else "") + str(right if right is not None else "") if not isinstance(left, (int, float)) or not isinstance(right, (int, float)): return None if op == '+': @@ -261,6 +285,8 @@ def __init__(self) -> None: self._graph = DependencyGraph() self._functions = FunctionRegistry() self._loaded = False + self._use_formulas = _check_formulas() + self._compiled_cache: dict[str, Any] = {} # formula -> compiled callable def load(self, workbook: Workbook) -> None: """Scan workbook, store cell values, build dependency graph.""" @@ -357,7 +383,11 @@ def recalculate( # ------------------------------------------------------------------ def _evaluate_formula(self, cell_ref: str, formula: str) -> Any: - """Evaluate a single formula string (starting with ``=``).""" + """Evaluate a single formula string (starting with ``=``). + + Tries the builtin recursive descent evaluator first. If that returns + None (unsupported function), falls back to the ``formulas`` library. + """ body = formula.strip() if body.startswith('='): body = body[1:] @@ -365,6 +395,13 @@ def _evaluate_formula(self, cell_ref: str, formula: str) -> Any: result = self._eval_expr(body.strip(), sheet) if result is not None: return result + + # Fallback: try the formulas library for unsupported functions + if self._use_formulas: + fb = self._formulas_fallback(formula, sheet) + if fb is not None: + return fb + logger.debug("Cannot evaluate formula %r in %s", formula, cell_ref) return None @@ -392,7 +429,7 @@ def _eval_expr(self, expr: str, sheet: str) -> Any: left_str, op, right_str = split left_val = self._eval_expr(left_str, sheet) right_val = self._eval_expr(right_str, sheet) - if op in ('+', '-', '*', '/'): + if op in ('+', '-', '*', '/', '&'): return _binary_op(left_val, op, right_val) return _compare(left_val, right_val, op) @@ -457,7 +494,10 @@ def _resolve_cell_ref(self, expr: str, sheet: str) -> Any: return self._cell_values.get(ref) def _resolve_range(self, arg: str, sheet: str) -> list[Any]: - """Resolve a range like ``A1:A5`` to a list of cell values.""" + """Resolve a range like ``A1:A5`` to a flat list of cell values. + + Kept for the ``formulas`` library fallback which needs flat lists. + """ clean = arg.strip().replace('$', '') if '!' not in clean: range_ref = f"{sheet}!{clean.upper()}" @@ -468,6 +508,20 @@ def _resolve_range(self, arg: str, sheet: str) -> list[Any]: cells = expand_range(range_ref) return [self._cell_values.get(c) for c in cells] + def _resolve_range_2d(self, arg: str, sheet: str) -> RangeValue: + """Resolve a range to a :class:`RangeValue` preserving 2D shape.""" + clean = arg.strip().replace('$', '') + if '!' not in clean: + range_ref = f"{sheet}!{clean.upper()}" + else: + parts = clean.split('!', 1) + ref_sheet = parts[0].strip("'") + range_ref = f"{ref_sheet}!{parts[1].upper()}" + cells = expand_range(range_ref) + n_rows, n_cols = range_shape(range_ref) + values = [self._cell_values.get(c) for c in cells] + return RangeValue(values=values, n_rows=n_rows, n_cols=n_cols) + # ------------------------------------------------------------------ # Function dispatch # ------------------------------------------------------------------ @@ -533,18 +587,125 @@ def _parse_function_args(self, args_str: str, sheet: str) -> list[Any]: def _resolve_arg(self, arg: str, sheet: str) -> Any: """Resolve a single function argument. - Range references (containing ``:`` at depth 0) return a list of - cell values. Everything else delegates to ``_eval_expr``. + Range references (containing ``:`` at depth 0) return a + :class:`RangeValue` with 2D shape metadata. Everything else + delegates to ``_eval_expr``. """ if not arg: return None # Range reference at top level if _has_top_level_colon(arg) and not arg.startswith('"'): - return self._resolve_range(arg, sheet) + return self._resolve_range_2d(arg, sheet) return self._eval_expr(arg, sheet) + # ------------------------------------------------------------------ + # formulas library fallback + # ------------------------------------------------------------------ + + def _formulas_fallback(self, formula: str, sheet: str) -> Any: + """Evaluate a formula via the ``formulas`` library. + + Compiles the formula into a callable, resolves its cell reference + parameters from ``_cell_values``, and returns the scalar result. + """ + import formulas as fm + import numpy as np + + # Compile (with caching) + compiled = self._compiled_cache.get(formula) + if compiled is None: + try: + result = fm.Parser().ast(formula) + if result and len(result) > 1: + compiled = result[1].compile() + self._compiled_cache[formula] = compiled + except Exception: + logger.debug("formulas: cannot compile %r", formula) + return None + if compiled is None: + return None + + # Resolve parameters: the compiled function's signature tells us + # which cell references it needs (e.g., "A1:A5", "B1") + try: + params = list(inspect.signature(compiled).parameters.keys()) + except (ValueError, TypeError): + params = [] + + if not params: + # No cell references - purely constant formula (e.g., =PMT(0.05/12,360,200000)) + try: + raw = compiled() + return self._normalize_formulas_result(raw) + except Exception as e: + logger.debug("formulas: error evaluating %r: %s", formula, e) + return None + + # Map parameter names to cell values + args: list[Any] = [] + for param in params: + # Param names from formulas lib use the formula's raw ref tokens + # like "A1:A5" or "A1" (no sheet prefix for same-sheet refs) + if ':' in param: + # Range parameter - resolve to numpy array + # Qualify with sheet name for range_shape parsing + qualified = param if '!' in param else f"{sheet}!{param}" + values = self._resolve_range(param, sheet) + flat = np.array([v if v is not None else 0 for v in values]) + n_rows, n_cols = range_shape(qualified) + if n_cols > 1 and flat.size == n_rows * n_cols: + flat = flat.reshape(n_rows, n_cols) + args.append(flat) + else: + # Single cell parameter + val = self._resolve_cell_ref(param, sheet) + if isinstance(val, (int, float)): + args.append(np.float64(val)) + elif isinstance(val, str): + args.append(val) + else: + args.append(np.float64(0) if val is None else val) + + try: + raw = compiled(*args) + return self._normalize_formulas_result(raw) + except Exception as e: + logger.debug("formulas: error evaluating %r: %s", formula, e) + return None + + @staticmethod + def _normalize_formulas_result(raw: Any) -> Any: + """Convert a ``formulas`` library result to a plain Python value.""" + if raw is None: + return None + # numpy scalar types + if hasattr(raw, 'item'): + try: + val = raw.item() + if isinstance(val, float) and val == int(val): + return int(val) + return val + except (ValueError, TypeError): + pass + # numpy array with single element + if hasattr(raw, 'shape') and hasattr(raw, 'flat'): + try: + if raw.size == 1: + val = raw.flat[0] + if hasattr(val, 'item'): + val = val.item() + if isinstance(val, float) and val == int(val): + return int(val) + return val + except (ValueError, TypeError, IndexError): + pass + # Already a plain Python type + if isinstance(raw, (int, float, str, bool)): + return raw + return raw + @staticmethod def _sheet_from_ref(cell_ref: str) -> str: """Extract sheet name from a canonical cell reference.""" diff --git a/python/wolfxl/calc/_functions.py b/python/wolfxl/calc/_functions.py index 9708fc5..9e48867 100644 --- a/python/wolfxl/calc/_functions.py +++ b/python/wolfxl/calc/_functions.py @@ -2,9 +2,61 @@ from __future__ import annotations +import fnmatch import math +import re +from dataclasses import dataclass from typing import Any, Callable + +# --------------------------------------------------------------------------- +# RangeValue: shape-aware 2D range container +# --------------------------------------------------------------------------- + + +@dataclass +class RangeValue: + """A resolved cell range that preserves 2D shape metadata. + + Iterable and sized for backward compat with functions that expect lists. + """ + + values: list[Any] + n_rows: int + n_cols: int + + def get(self, row: int, col: int) -> Any: + """Get value at 1-based (row, col) position.""" + if row < 1 or row > self.n_rows or col < 1 or col > self.n_cols: + return None + idx = (row - 1) * self.n_cols + (col - 1) + return self.values[idx] if idx < len(self.values) else None + + def column(self, col: int) -> list[Any]: + """Extract a 1-based column as a list.""" + if col < 1 or col > self.n_cols: + return [] + return [self.values[(r * self.n_cols) + (col - 1)] + for r in range(self.n_rows) + if (r * self.n_cols) + (col - 1) < len(self.values)] + + def row(self, row: int) -> list[Any]: + """Extract a 1-based row as a list.""" + if row < 1 or row > self.n_rows: + return [] + start = (row - 1) * self.n_cols + return self.values[start:start + self.n_cols] + + def as_flat(self) -> list[Any]: + """Return values as a flat list.""" + return list(self.values) + + def __iter__(self): + return iter(self.values) + + def __len__(self): + return len(self.values) + # --------------------------------------------------------------------------- # Whitelist: functions the calc engine will attempt to evaluate. # Organized by category for readability. @@ -28,20 +80,24 @@ "OR": "logic", "NOT": "logic", "IFERROR": "logic", - # Lookup (6) + # Lookup (7) "VLOOKUP": "lookup", "HLOOKUP": "lookup", "INDEX": "lookup", "MATCH": "lookup", "OFFSET": "lookup", "CHOOSE": "lookup", - # Statistical (6) + "XLOOKUP": "lookup", + # Statistical (9) "AVERAGE": "statistical", "COUNT": "statistical", "COUNTA": "statistical", "COUNTIF": "statistical", + "COUNTIFS": "statistical", "MIN": "statistical", "MAX": "statistical", + "SUMIF": "statistical", + "SUMIFS": "statistical", # Financial (7) "PV": "financial", "FV": "financial", @@ -74,7 +130,9 @@ def _coerce_numeric(values: list[Any]) -> list[float]: """Flatten and coerce values to floats, skipping None/str/bool.""" result: list[float] = [] for v in values: - if isinstance(v, (list, tuple)): + if isinstance(v, RangeValue): + result.extend(_coerce_numeric(v.values)) + elif isinstance(v, (list, tuple)): result.extend(_coerce_numeric(list(v))) elif isinstance(v, bool): # In Excel, TRUE=1, FALSE=0 in numeric context @@ -155,7 +213,7 @@ def _builtin_and(args: list[Any]) -> bool: if not args: raise ValueError("AND requires at least 1 argument") for a in args: - if isinstance(a, (list, tuple)): + if isinstance(a, (RangeValue, list, tuple)): if not all(bool(x) for x in a if x is not None): return False elif not a: @@ -167,7 +225,7 @@ def _builtin_or(args: list[Any]) -> bool: if not args: raise ValueError("OR requires at least 1 argument") for a in args: - if isinstance(a, (list, tuple)): + if isinstance(a, (RangeValue, list, tuple)): if any(bool(x) for x in a if x is not None): return True elif a: @@ -190,7 +248,7 @@ def _builtin_counta(args: list[Any]) -> float: """COUNTA - counts non-empty values.""" count = 0 for v in args: - if isinstance(v, (list, tuple)): + if isinstance(v, (RangeValue, list, tuple)): count += sum(1 for x in v if x is not None) elif v is not None: count += 1 @@ -218,6 +276,487 @@ def _builtin_average(args: list[Any]) -> float: return sum(nums) / len(nums) +# --------------------------------------------------------------------------- +# Additional math builtins +# --------------------------------------------------------------------------- + + +def _builtin_rounddown(args: list[Any]) -> float: + if len(args) < 1 or len(args) > 2: + raise ValueError("ROUNDDOWN requires 1 or 2 arguments") + nums = _coerce_numeric([args[0]]) + if not nums: + raise ValueError("ROUNDDOWN: non-numeric argument") + digits = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 0 + if digits == 0: + return float(math.trunc(nums[0])) + factor = 10 ** digits + return math.trunc(nums[0] * factor) / factor + + +def _builtin_mod(args: list[Any]) -> float: + if len(args) != 2: + raise ValueError("MOD requires exactly 2 arguments") + nums = _coerce_numeric(args) + if len(nums) != 2: + raise ValueError("MOD: non-numeric argument") + if nums[1] == 0: + raise ValueError("MOD: division by zero") + # Excel MOD: result has the sign of the divisor + return nums[0] - nums[1] * math.floor(nums[0] / nums[1]) + + +def _builtin_power(args: list[Any]) -> float | str: + if len(args) != 2: + raise ValueError("POWER requires exactly 2 arguments") + nums = _coerce_numeric(args) + if len(nums) != 2: + raise ValueError("POWER: non-numeric argument") + # Excel returns #NUM! for negative base with fractional exponent + if nums[0] < 0 and not float(nums[1]).is_integer(): + return "#NUM!" + return nums[0] ** nums[1] + + +def _builtin_sqrt(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("SQRT requires exactly 1 argument") + nums = _coerce_numeric(args) + if not nums: + raise ValueError("SQRT: non-numeric argument") + if nums[0] < 0: + raise ValueError("SQRT: negative argument") + return math.sqrt(nums[0]) + + +def _builtin_sign(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("SIGN requires exactly 1 argument") + nums = _coerce_numeric(args) + if not nums: + raise ValueError("SIGN: non-numeric argument") + if nums[0] > 0: + return 1.0 + if nums[0] < 0: + return -1.0 + return 0.0 + + +# --------------------------------------------------------------------------- +# Text builtins +# --------------------------------------------------------------------------- + + +def _coerce_string(val: Any) -> str: + if val is None: + return "" + return str(val) + + +def _builtin_left(args: list[Any]) -> str: + if len(args) < 1 or len(args) > 2: + raise ValueError("LEFT requires 1 or 2 arguments") + text = _coerce_string(args[0]) + num_chars = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 1 + if num_chars < 0: + return "#VALUE!" + return text[:num_chars] + + +def _builtin_right(args: list[Any]) -> str: + if len(args) < 1 or len(args) > 2: + raise ValueError("RIGHT requires 1 or 2 arguments") + text = _coerce_string(args[0]) + num_chars = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 1 + return text[-num_chars:] if num_chars > 0 else "" + + +def _builtin_mid(args: list[Any]) -> str: + if len(args) != 3: + raise ValueError("MID requires exactly 3 arguments") + text = _coerce_string(args[0]) + start = int(_coerce_numeric([args[1]])[0]) + num_chars = int(_coerce_numeric([args[2]])[0]) + if start < 1 or num_chars < 0: + return "#VALUE!" + # Excel MID is 1-indexed + return text[start - 1 : start - 1 + num_chars] + + +def _builtin_len(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("LEN requires exactly 1 argument") + return float(len(_coerce_string(args[0]))) + + +def _builtin_concatenate(args: list[Any]) -> str: + if not args: + raise ValueError("CONCATENATE requires at least 1 argument") + return "".join(_coerce_string(a) for a in args) + + +# --------------------------------------------------------------------------- +# Criteria matching engine (shared by SUMIF, SUMIFS, COUNTIF, COUNTIFS) +# --------------------------------------------------------------------------- + +_CRITERIA_OP_RE = re.compile(r"^(>=|<=|<>|>|<|=)(.*)$") + + +def _parse_criteria(criteria: Any) -> Callable[[Any], bool]: + """Parse an Excel criteria value into a predicate function. + + Supports: + - Numeric exact match: ``100`` matches cells equal to 100 + - String exact match (case-insensitive): ``"Sales"`` + - Operator prefix: ``">100"``, ``"<=50"``, ``"<>0"`` + - Wildcards: ``"apple*"``, ``"?pple"`` (via fnmatch) + """ + if isinstance(criteria, (int, float)): + target = float(criteria) + return lambda v: isinstance(v, (int, float)) and float(v) == target + + crit_str = str(criteria) + + # Check for operator prefix + m = _CRITERIA_OP_RE.match(crit_str) + if m: + op, val_str = m.group(1), m.group(2).strip() + try: + threshold = float(val_str) + except (ValueError, TypeError): + # String comparison with operator + val_lower = val_str.lower() + if op == ">": + return lambda v: str(v).lower() > val_lower if v is not None else False + if op == "<": + return lambda v: str(v).lower() < val_lower if v is not None else False + if op == ">=": + return lambda v: str(v).lower() >= val_lower if v is not None else False + if op == "<=": + return lambda v: str(v).lower() <= val_lower if v is not None else False + if op == "<>": + return lambda v: str(v).lower() != val_lower if v is not None else True + if op == "=": + return lambda v: str(v).lower() == val_lower if v is not None else False + return lambda v: False + + if op == ">": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) > t + if op == "<": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) < t + if op == ">=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) >= t + if op == "<=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) <= t + if op == "<>": + return lambda v, t=threshold: not (isinstance(v, (int, float)) and float(v) == t) + if op == "=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) == t + + # Wildcard check (contains * or ? not escaped) + if "*" in crit_str or "?" in crit_str: + pattern = crit_str.lower() + return lambda v, p=pattern: fnmatch.fnmatch(str(v).lower(), p) if v is not None else False + + # Plain string exact match (case-insensitive) + lower = crit_str.lower() + return lambda v, l=lower: str(v).lower() == l if v is not None else False + + +def _match_criteria(criteria: Any, value: Any) -> bool: + """Convenience: check whether *value* satisfies *criteria*.""" + return _parse_criteria(criteria)(value) + + +# --------------------------------------------------------------------------- +# Lookup builtins (INDEX, MATCH, XLOOKUP, CHOOSE) +# --------------------------------------------------------------------------- + + +def _builtin_index(args: list[Any]) -> Any: + """INDEX(array, row_num [, col_num]).""" + if len(args) < 2 or len(args) > 3: + raise ValueError("INDEX requires 2 or 3 arguments") + array = args[0] + row_num = args[1] + col_num = args[2] if len(args) > 2 else None + + # Safety net: if row_num is None (e.g. from unsupported nested func), bail + if row_num is None: + return None + + row_num = int(float(row_num)) + + if isinstance(array, RangeValue): + if col_num is not None: + col_num = int(float(col_num)) + if row_num < 1 or row_num > array.n_rows or col_num < 1 or col_num > array.n_cols: + return "#REF!" + return array.get(row_num, col_num) + # 1D horizontal range: row_num acts as column index + if array.n_rows == 1: + if row_num < 1 or row_num > array.n_cols: + return "#REF!" + return array.get(1, row_num) + # 1D column or multi-col: row_num selects row, return first col + if row_num < 1 or row_num > array.n_rows: + return "#REF!" + if array.n_cols == 1: + return array.get(row_num, 1) + # Multi-col without col_num: return first column value + return array.get(row_num, 1) + + # Plain list fallback + if isinstance(array, (list, tuple)): + if row_num < 1 or row_num > len(array): + return "#REF!" + return array[row_num - 1] + + return None + + +def _builtin_match(args: list[Any]) -> Any: + """MATCH(lookup_value, lookup_array, [match_type]). + + match_type: 0=exact, 1=largest<=, -1=smallest>=. Default 0. + """ + if len(args) < 2 or len(args) > 3: + raise ValueError("MATCH requires 2 or 3 arguments") + lookup_value = args[0] + lookup_array = args[1] + match_type = int(float(args[2])) if len(args) > 2 and args[2] is not None else 0 + + # Flatten to list + if isinstance(lookup_array, RangeValue): + values = lookup_array.values + elif isinstance(lookup_array, (list, tuple)): + values = list(lookup_array) + else: + return "#N/A" + + if match_type == 0: + # Exact match - case-insensitive for strings + for i, v in enumerate(values): + if v is None: + continue + if isinstance(lookup_value, str) and isinstance(v, str): + if lookup_value.lower() == v.lower(): + return i + 1 # 1-based + elif isinstance(lookup_value, (int, float)) and isinstance(v, (int, float)): + if float(lookup_value) == float(v): + return i + 1 + elif lookup_value == v: + return i + 1 + return "#N/A" + + if match_type == 1: + # Largest value <= lookup (assumes sorted ascending) + best_idx = None + for i, v in enumerate(values): + if isinstance(v, (int, float)) and isinstance(lookup_value, (int, float)): + if float(v) <= float(lookup_value): + best_idx = i + 1 + return best_idx if best_idx is not None else "#N/A" + + if match_type == -1: + # Smallest value >= lookup (assumes sorted descending) + best_idx = None + for i, v in enumerate(values): + if isinstance(v, (int, float)) and isinstance(lookup_value, (int, float)): + if float(v) >= float(lookup_value): + best_idx = i + 1 + return best_idx if best_idx is not None else "#N/A" + + return "#N/A" + + +def _builtin_xlookup(args: list[Any]) -> Any: + """XLOOKUP(lookup_value, lookup_array, return_array, [if_not_found], [match_mode], [search_mode]). + + Only exact match (match_mode=0, search_mode=1) is built in. + Other modes return None to fall through to formulas lib. + """ + if len(args) < 3 or len(args) > 6: + raise ValueError("XLOOKUP requires 3 to 6 arguments") + lookup_value = args[0] + lookup_array = args[1] + return_array = args[2] + if_not_found = args[3] if len(args) > 3 else "#N/A" + match_mode = int(float(args[4])) if len(args) > 4 and args[4] is not None else 0 + search_mode = int(float(args[5])) if len(args) > 5 and args[5] is not None else 1 + + # Only handle exact match with forward search + if match_mode != 0 or search_mode not in (1, -1): + return None # fall through to formulas lib + + # Flatten arrays + if isinstance(lookup_array, RangeValue): + lookup_vals = lookup_array.values + elif isinstance(lookup_array, (list, tuple)): + lookup_vals = list(lookup_array) + else: + return if_not_found + + if isinstance(return_array, RangeValue): + return_vals = return_array.values + elif isinstance(return_array, (list, tuple)): + return_vals = list(return_array) + else: + return if_not_found + + search_range = range(len(lookup_vals)) if search_mode == 1 else range(len(lookup_vals) - 1, -1, -1) + + for i in search_range: + v = lookup_vals[i] + if v is None: + continue + matched = False + if isinstance(lookup_value, str) and isinstance(v, str): + matched = lookup_value.lower() == v.lower() + elif isinstance(lookup_value, (int, float)) and isinstance(v, (int, float)): + matched = float(lookup_value) == float(v) + else: + matched = lookup_value == v + if matched: + return return_vals[i] if i < len(return_vals) else if_not_found + + return if_not_found + + +def _builtin_choose(args: list[Any]) -> Any: + """CHOOSE(index_num, value1, value2, ...).""" + if len(args) < 2: + raise ValueError("CHOOSE requires at least 2 arguments") + index_num = int(float(args[0])) + if index_num < 1 or index_num > len(args) - 1: + return "#VALUE!" + return args[index_num] + + +# --------------------------------------------------------------------------- +# Conditional aggregation builtins (SUMIF, SUMIFS, COUNTIF, COUNTIFS) +# --------------------------------------------------------------------------- + + +def _builtin_sumif(args: list[Any]) -> float: + """SUMIF(criteria_range, criteria, [sum_range]).""" + if len(args) < 2 or len(args) > 3: + raise ValueError("SUMIF requires 2 or 3 arguments") + criteria_range = args[0] + criteria = args[1] + sum_range = args[2] if len(args) > 2 else None + + # Flatten ranges + if isinstance(criteria_range, RangeValue): + crit_vals = criteria_range.values + elif isinstance(criteria_range, (list, tuple)): + crit_vals = list(criteria_range) + else: + crit_vals = [criteria_range] + + if sum_range is None: + sum_vals = crit_vals + elif isinstance(sum_range, RangeValue): + sum_vals = sum_range.values + elif isinstance(sum_range, (list, tuple)): + sum_vals = list(sum_range) + else: + sum_vals = [sum_range] + + predicate = _parse_criteria(criteria) + total = 0.0 + for i, cv in enumerate(crit_vals): + if predicate(cv): + sv = sum_vals[i] if i < len(sum_vals) else 0 + if isinstance(sv, (int, float)): + total += float(sv) + return total + + +def _builtin_sumifs(args: list[Any]) -> float: + """SUMIFS(sum_range, criteria_range1, criteria1, ...). + + Note: sum_range is FIRST (unlike SUMIF where it's last). + """ + if len(args) < 3 or len(args) % 2 == 0: + raise ValueError("SUMIFS requires sum_range + pairs of (criteria_range, criteria)") + sum_range = args[0] + + # Flatten sum_range + if isinstance(sum_range, RangeValue): + sum_vals = sum_range.values + elif isinstance(sum_range, (list, tuple)): + sum_vals = list(sum_range) + else: + sum_vals = [sum_range] + + # Build predicate pairs + predicates: list[tuple[list[Any], Callable[[Any], bool]]] = [] + for j in range(1, len(args), 2): + crit_range = args[j] + criteria = args[j + 1] + if isinstance(crit_range, RangeValue): + cv = crit_range.values + elif isinstance(crit_range, (list, tuple)): + cv = list(crit_range) + else: + cv = [crit_range] + predicates.append((cv, _parse_criteria(criteria))) + + total = 0.0 + for i in range(len(sum_vals)): + if all(pred(cv[i]) if i < len(cv) else False for cv, pred in predicates): + sv = sum_vals[i] + if isinstance(sv, (int, float)): + total += float(sv) + return total + + +def _builtin_countif(args: list[Any]) -> float: + """COUNTIF(range, criteria).""" + if len(args) != 2: + raise ValueError("COUNTIF requires exactly 2 arguments") + count_range = args[0] + criteria = args[1] + + if isinstance(count_range, RangeValue): + values = count_range.values + elif isinstance(count_range, (list, tuple)): + values = list(count_range) + else: + values = [count_range] + + predicate = _parse_criteria(criteria) + return float(sum(1 for v in values if predicate(v))) + + +def _builtin_countifs(args: list[Any]) -> float: + """COUNTIFS(criteria_range1, criteria1, [criteria_range2, criteria2, ...]).""" + if len(args) < 2 or len(args) % 2 != 0: + raise ValueError("COUNTIFS requires pairs of (criteria_range, criteria)") + + # Build predicate pairs + predicates: list[tuple[list[Any], Callable[[Any], bool]]] = [] + for j in range(0, len(args), 2): + crit_range = args[j] + criteria = args[j + 1] + if isinstance(crit_range, RangeValue): + cv = crit_range.values + elif isinstance(crit_range, (list, tuple)): + cv = list(crit_range) + else: + cv = [crit_range] + predicates.append((cv, _parse_criteria(criteria))) + + # Length of first criteria range determines row count + n = len(predicates[0][0]) if predicates else 0 + count = 0 + for i in range(n): + if all(pred(cv[i]) if i < len(cv) else False for cv, pred in predicates): + count += 1 + return float(count) + + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -227,7 +766,12 @@ def _builtin_average(args: list[Any]) -> float: "ABS": _builtin_abs, "ROUND": _builtin_round, "ROUNDUP": _builtin_roundup, + "ROUNDDOWN": _builtin_rounddown, "INT": _builtin_int, + "MOD": _builtin_mod, + "POWER": _builtin_power, + "SQRT": _builtin_sqrt, + "SIGN": _builtin_sign, "IF": _builtin_if, "IFERROR": _builtin_iferror, "AND": _builtin_and, @@ -238,6 +782,19 @@ def _builtin_average(args: list[Any]) -> float: "MIN": _builtin_min, "MAX": _builtin_max, "AVERAGE": _builtin_average, + "LEFT": _builtin_left, + "RIGHT": _builtin_right, + "MID": _builtin_mid, + "LEN": _builtin_len, + "CONCATENATE": _builtin_concatenate, + "INDEX": _builtin_index, + "MATCH": _builtin_match, + "XLOOKUP": _builtin_xlookup, + "CHOOSE": _builtin_choose, + "SUMIF": _builtin_sumif, + "SUMIFS": _builtin_sumifs, + "COUNTIF": _builtin_countif, + "COUNTIFS": _builtin_countifs, } diff --git a/python/wolfxl/calc/_parser.py b/python/wolfxl/calc/_parser.py index b004761..f8454fb 100644 --- a/python/wolfxl/calc/_parser.py +++ b/python/wolfxl/calc/_parser.py @@ -153,6 +153,27 @@ def expand_range(range_ref: str) -> list[str]: return cells +def range_shape(range_ref: str) -> tuple[int, int]: + """Return ``(n_rows, n_cols)`` for a range reference like ``A1:C3``. + + The *range_ref* may include a sheet prefix (``Sheet1!A1:C3``). + """ + ref_part = range_ref + if "!" in range_ref: + _, ref_part = range_ref.rsplit("!", 1) + + parts = ref_part.split(":") + if len(parts) != 2: + raise ValueError(f"Invalid range: {range_ref!r}") + + start_row, start_col = a1_to_rowcol(parts[0].replace("$", "")) + end_row, end_col = a1_to_rowcol(parts[1].replace("$", "")) + # Use min/max for consistency with expand_range() + n_rows = max(start_row, end_row) - min(start_row, end_row) + 1 + n_cols = max(start_col, end_col) - min(start_col, end_col) + 1 + return (n_rows, n_cols) + + # --------------------------------------------------------------------------- # All-references extraction (combines singles + expanded ranges) # --------------------------------------------------------------------------- diff --git a/tests/test_calc_formulas_integration.py b/tests/test_calc_formulas_integration.py new file mode 100644 index 0000000..da08e01 --- /dev/null +++ b/tests/test_calc_formulas_integration.py @@ -0,0 +1,515 @@ +"""Tests for formulas library integration and extended builtin functions. + +Tests that the formulas library fallback works for functions not in +the builtin registry, and that the new math/text builtins work correctly. +""" + +from __future__ import annotations + +import pytest + +import wolfxl +from wolfxl.calc._evaluator import WorkbookEvaluator + +_has_formulas = pytest.importorskip is not None # always True, but we check below +try: + import formulas # noqa: F401 + _has_formulas = True +except ImportError: + _has_formulas = False + +_requires_formulas = pytest.mark.skipif( + not _has_formulas, reason="formulas library not installed (install wolfxl[calc])" +) + + +# --------------------------------------------------------------------------- +# New builtin math functions +# --------------------------------------------------------------------------- + + +class TestBuiltinRounddown: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 3.777 + ws["B1"] = "=ROUNDDOWN(A1,2)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 3.77 + + def test_zero_digits(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 3.777 + ws["B1"] = "=ROUNDDOWN(A1,0)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 3.0 + + def test_negative(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -3.777 + ws["B1"] = "=ROUNDDOWN(A1,2)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == -3.77 + + +class TestBuiltinMod: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 10 + ws["B1"] = "=MOD(A1,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1.0 + + def test_negative_dividend(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -10 + ws["B1"] = "=MOD(A1,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + # Excel MOD: result has sign of divisor + assert results["Sheet!B1"] == 2.0 + + +class TestBuiltinPower: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 2 + ws["B1"] = "=POWER(A1,10)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1024.0 + + def test_fractional_exponent(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 4 + ws["B1"] = "=POWER(A1,0.5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 2.0 + + def test_negative_base_fractional_exponent(self) -> None: + """POWER(-1, 0.5) should return #NUM! (complex result).""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -1 + ws["B1"] = "=POWER(A1,0.5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#NUM!" + + +class TestBuiltinSqrt: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 144 + ws["B1"] = "=SQRT(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 12.0 + + +class TestBuiltinSign: + def test_positive(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 42 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1.0 + + def test_negative(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -7 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == -1.0 + + def test_zero(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 0 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 0.0 + + +# --------------------------------------------------------------------------- +# New builtin text functions +# --------------------------------------------------------------------------- + + +class TestBuiltinLeft: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=LEFT(A1,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "Hello" + + def test_default_one_char(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = '=LEFT(A1)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "H" + + def test_negative_num_chars(self) -> None: + """LEFT with negative count returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=LEFT(A1,-1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + + +class TestBuiltinRight: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=RIGHT(A1,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "World" + + +class TestBuiltinMid: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=MID(A1,7,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "World" + + def test_start_below_one(self) -> None: + """MID with start < 1 returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=MID(A1,0,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + + def test_negative_num_chars(self) -> None: + """MID with negative num_chars returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=MID(A1,1,-1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + + +class TestBuiltinLen: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = '=LEN(A1)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 5.0 + + +class TestBuiltinConcatenate: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["A2"] = " " + ws["A3"] = "World" + ws["B1"] = '=CONCATENATE(A1,A2,A3)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "Hello World" + + +# --------------------------------------------------------------------------- +# formulas library fallback: constant formulas (no cell refs) +# --------------------------------------------------------------------------- + + +@_requires_formulas +class TestFormulasConstantFallback: + """Formulas that use non-builtin functions with only literal arguments.""" + + def test_pmt(self) -> None: + """PMT(rate, nper, pv) - monthly mortgage payment.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "=PMT(0.05/12,360,200000)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + # Expected: ~-1073.64 + val = results["Sheet!A1"] + assert val is not None, "PMT formula returned None - formulas lib not available?" + assert abs(val - (-1073.6432460242797)) < 0.01 + + def test_sln(self) -> None: + """SLN(cost, salvage, life) - straight-line depreciation.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "=SLN(30000,7500,10)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results["Sheet!A1"] + assert val is not None + assert val == 2250 or val == 2250.0 + + +# --------------------------------------------------------------------------- +# formulas library fallback: cell ref formulas +# --------------------------------------------------------------------------- + + +@_requires_formulas +class TestFormulasCellRefFallback: + """Formulas that use non-builtin functions with cell references.""" + + def test_vlookup(self) -> None: + """VLOOKUP via formulas library fallback.""" + wb = wolfxl.Workbook() + ws = wb.active + # Lookup table in B1:C3 + ws["B1"] = 1 + ws["C1"] = 100 + ws["B2"] = 2 + ws["C2"] = 200 + ws["B3"] = 3 + ws["C3"] = 300 + # Lookup value + ws["A1"] = 2 + ws["D1"] = "=VLOOKUP(A1,B1:C3,2,FALSE)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results.get("Sheet!D1") + assert val is not None, "VLOOKUP returned None - formulas lib not available?" + assert val == 200 or val == 200.0 + + def test_npv(self) -> None: + """NPV with cell range reference.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -10000 + ws["A2"] = 3000 + ws["A3"] = 4000 + ws["A4"] = 5000 + ws["A5"] = 6000 + ws["B1"] = "=NPV(0.1,A1:A5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results.get("Sheet!B1") + assert val is not None, "NPV returned None - formulas lib not available?" + # NPV at 10% discount: ~3534.28 + assert abs(val - 3534.28) < 1.0 + + +# --------------------------------------------------------------------------- +# formulas library fallback: perturbation through financial formulas +# --------------------------------------------------------------------------- + + +@_requires_formulas +class TestFormulasFallbackPerturbation: + """Verify perturbation propagates through formulas-lib-evaluated cells.""" + + def test_pmt_perturbation(self) -> None: + """Perturbing the loan amount should change the PMT result.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 200000 # loan amount + ws["A2"] = "=A1*0.05/12" # monthly rate (builtin handles this) + ws["A3"] = "=PMT(0.05/12,360,A1)" # PMT via formulas fallback + ev = WorkbookEvaluator() + ev.load(wb) + ev.calculate() + + # PMT with cell ref may or may not work depending on formulas lib + # handling. If A3 evaluates, perturbation should propagate. + result = ev.recalculate({"Sheet!A1": 250000}) + # A2 uses builtins (will propagate) + # A3 may or may not propagate depending on formulas lib + assert result.total_formula_cells >= 2 + + +# --------------------------------------------------------------------------- +# Builtin coverage: all 25 builtins registered +# --------------------------------------------------------------------------- + + +class TestBuiltinRegistryCoverage: + def test_33_builtins_registered(self) -> None: + """All 33 builtin functions should be in the registry.""" + from wolfxl.calc._functions import FunctionRegistry + + reg = FunctionRegistry() + expected = { + "SUM", "ABS", "ROUND", "ROUNDUP", "ROUNDDOWN", "INT", + "MOD", "POWER", "SQRT", "SIGN", + "IF", "IFERROR", "AND", "OR", "NOT", + "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", + "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + "INDEX", "MATCH", "XLOOKUP", "CHOOSE", + "SUMIF", "SUMIFS", "COUNTIF", "COUNTIFS", + } + assert expected == reg.supported_functions + + def test_each_builtin_callable_from_evaluator(self) -> None: + """Smoke test: each builtin resolves in the evaluator function registry.""" + ev = WorkbookEvaluator() + for name in [ + "SUM", "ABS", "ROUND", "ROUNDUP", "ROUNDDOWN", "INT", + "MOD", "POWER", "SQRT", "SIGN", + "IF", "IFERROR", "AND", "OR", "NOT", + "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", + "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + "INDEX", "MATCH", "XLOOKUP", "CHOOSE", + "SUMIF", "SUMIFS", "COUNTIF", "COUNTIFS", + ]: + assert ev._functions.has(name), f"Missing builtin: {name}" + + +# --------------------------------------------------------------------------- +# Combined: builtins + formulas lib in same workbook +# --------------------------------------------------------------------------- + + +@_requires_formulas +class TestCombinedEvaluation: + """Workbook mixing builtin-evaluated and formulas-lib-evaluated formulas.""" + + def test_income_statement_with_sln(self) -> None: + """An income statement that uses SLN for depreciation calculation.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 100000 # revenue + ws["A2"] = 60000 # COGS + ws["A3"] = "=A1-A2" # gross profit (builtin) + ws["A4"] = 15000 # opex + ws["A5"] = "=SLN(50000,5000,10)" # depreciation via formulas lib + ws["A6"] = "=A3-A4-A5" # operating income (builtin, depends on formulas result) + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + + assert results["Sheet!A3"] == 40000.0 # builtin + + # SLN result (via formulas library fallback) + sln_val = results.get("Sheet!A5") + assert sln_val is not None, "SLN returned None - formulas lib not available?" + assert sln_val == 4500 or sln_val == 4500.0 + # Operating income depends on SLN + assert results["Sheet!A6"] == 40000 - 15000 - 4500 + + def test_text_extraction_chain(self) -> None: + """Chain of text functions all handled by builtins.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "2026-01-15" + ws["B1"] = '=LEFT(A1,4)' # "2026" + ws["C1"] = '=MID(A1,6,2)' # "01" + ws["D1"] = '=RIGHT(A1,2)' # "15" + ws["E1"] = '=LEN(A1)' # 10 + ws["F1"] = '=CONCATENATE(B1,"/",C1,"/",D1)' # "2026/01/15" + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "2026" + assert results["Sheet!C1"] == "01" + assert results["Sheet!D1"] == "15" + assert results["Sheet!E1"] == 10.0 + assert results["Sheet!F1"] == "2026/01/15" + + def test_math_chain(self) -> None: + """Chain of math functions mixing old and new builtins.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -7.8 + ws["B1"] = "=ABS(A1)" # 7.8 (old builtin) + ws["C1"] = "=SQRT(B1)" # ~2.793 (new builtin) + ws["D1"] = "=POWER(C1,2)" # ~7.8 (new builtin, should round-trip) + ws["E1"] = "=SIGN(A1)" # -1 (new builtin) + ws["F1"] = "=MOD(8,3)" # 2 (new builtin) + ws["G1"] = "=ROUNDDOWN(C1,1)" # 2.7 (new builtin) + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 7.8 + assert abs(results["Sheet!C1"] - 2.7928480087537886) < 1e-10 + assert abs(results["Sheet!D1"] - 7.8) < 1e-10 + assert results["Sheet!E1"] == -1.0 + assert results["Sheet!F1"] == 2.0 + assert results["Sheet!G1"] == 2.7 + + def test_perturbation_through_new_builtins(self) -> None: + """Perturbation should propagate through new builtin functions.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 100 + ws["B1"] = "=SQRT(A1)" + ws["C1"] = "=POWER(B1,3)" + ws["D1"] = "=ROUNDDOWN(C1,0)" + + ev = WorkbookEvaluator() + ev.load(wb) + ev.calculate() + + result = ev.recalculate({"Sheet!A1": 144}) + assert result.propagation_ratio == 1.0 + delta_map = {d.cell_ref: d for d in result.deltas} + assert delta_map["Sheet!B1"].new_value == 12.0 + assert delta_map["Sheet!C1"].new_value == 1728.0 + assert delta_map["Sheet!D1"].new_value == 1728.0 diff --git a/tests/test_calc_functions.py b/tests/test_calc_functions.py index 47ca7a5..2d5aead 100644 --- a/tests/test_calc_functions.py +++ b/tests/test_calc_functions.py @@ -12,8 +12,8 @@ class TestWhitelist: - def test_whitelist_has_39_functions(self) -> None: - assert len(FUNCTION_WHITELIST_V1) == 39 + def test_whitelist_has_43_functions(self) -> None: + assert len(FUNCTION_WHITELIST_V1) == 43 def test_all_categories_represented(self) -> None: categories = set(FUNCTION_WHITELIST_V1.values()) diff --git a/tests/test_calc_lookup_conditional.py b/tests/test_calc_lookup_conditional.py new file mode 100644 index 0000000..eb98233 --- /dev/null +++ b/tests/test_calc_lookup_conditional.py @@ -0,0 +1,500 @@ +"""Tests for lookup and conditional aggregation builtins. + +Covers: INDEX, MATCH, XLOOKUP, CHOOSE, SUMIF, SUMIFS, COUNTIF, COUNTIFS, +the ``&`` string concatenation operator, RangeValue backward compatibility, +and perturbation propagation through lookup/conditional chains. +""" + +from __future__ import annotations + +import pytest + +import wolfxl +from wolfxl.calc._evaluator import WorkbookEvaluator +from wolfxl.calc._functions import RangeValue, _match_criteria, _parse_criteria + + +# --------------------------------------------------------------------------- +# Helper: build workbook with data + formulas +# --------------------------------------------------------------------------- + + +def _make_wb(data: dict[str, object], formulas: dict[str, str]) -> wolfxl.Workbook: + wb = wolfxl.Workbook() + ws = wb.active + for ref, val in data.items(): + ws[ref] = val + for ref, formula in formulas.items(): + ws[ref] = formula + return wb + + +def _calc(wb: wolfxl.Workbook) -> dict[str, object]: + ev = WorkbookEvaluator() + ev.load(wb) + return ev.calculate() + + +# --------------------------------------------------------------------------- +# RangeValue unit tests +# --------------------------------------------------------------------------- + + +class TestRangeValue: + def test_get_2d(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=2, n_cols=3) + assert rv.get(1, 1) == 1 + assert rv.get(1, 3) == 3 + assert rv.get(2, 2) == 5 + + def test_get_out_of_bounds(self) -> None: + rv = RangeValue(values=[1, 2, 3], n_rows=3, n_cols=1) + assert rv.get(4, 1) is None + assert rv.get(0, 1) is None + + def test_column_extraction(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=3, n_cols=2) + assert rv.column(1) == [1, 3, 5] + assert rv.column(2) == [2, 4, 6] + + def test_row_extraction(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=3, n_cols=2) + assert rv.row(1) == [1, 2] + assert rv.row(3) == [5, 6] + + def test_iterable_and_len(self) -> None: + rv = RangeValue(values=[10, 20, 30], n_rows=3, n_cols=1) + assert list(rv) == [10, 20, 30] + assert len(rv) == 3 + + +# --------------------------------------------------------------------------- +# RangeValue backward compatibility with existing builtins +# --------------------------------------------------------------------------- + + +class TestRangeValueBackwardCompat: + def test_sum_with_range_value(self) -> None: + """SUM should still work when args contain RangeValue.""" + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30}, + {"B1": "=SUM(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 60.0 + + def test_and_with_range_value(self) -> None: + wb = _make_wb( + {"A1": True, "A2": True, "A3": True}, + {"B1": "=AND(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] is True + + def test_or_with_range_value(self) -> None: + wb = _make_wb( + {"A1": False, "A2": True, "A3": False}, + {"B1": "=OR(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] is True + + def test_counta_with_range_value(self) -> None: + wb = _make_wb( + {"A1": "hello", "A2": None, "A3": 42}, + {"B1": "=COUNTA(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + +# --------------------------------------------------------------------------- +# INDEX tests +# --------------------------------------------------------------------------- + + +class TestBuiltinIndex: + def test_1d_column_vector(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40, "A5": 50}, + {"B1": "=INDEX(A1:A5,3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 30 + + def test_2d_array(self) -> None: + wb = _make_wb( + {"A1": 1, "B1": 2, "C1": 3, + "A2": 4, "B2": 5, "C2": 6, + "A3": 7, "B3": 8, "C3": 9}, + {"D1": "=INDEX(A1:C3,2,2)"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 5 + + def test_1d_horizontal(self) -> None: + wb = _make_wb( + {"A1": 100, "B1": 200, "C1": 300}, + {"D1": "=INDEX(A1:C1,2)"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 200 + + def test_nested_index_match(self) -> None: + """The critical INDEX/MATCH pattern used in financial models.""" + wb = _make_wb( + {"A1": "Revenue", "A2": "COGS", "A3": "OpEx", "A4": "Tax", "A5": "NetInc", + "B1": 1000, "B2": 600, "B3": 200, "B4": 50, "B5": 150, + "C1": "COGS"}, + {"D1": "=INDEX(B1:B5,MATCH(C1,A1:A5,0))"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 600 + + def test_out_of_bounds(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20}, + {"B1": "=INDEX(A1:A2,5)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "#REF!" + + +# --------------------------------------------------------------------------- +# MATCH tests +# --------------------------------------------------------------------------- + + +class TestBuiltinMatch: + def test_exact_match_numeric(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40, "A5": 50}, + {"B1": "=MATCH(30,A1:A5,0)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 3 + + def test_case_insensitive_string(self) -> None: + wb = _make_wb( + {"A1": "Apple", "A2": "Banana", "A3": "Cherry"}, + {"B1": '=MATCH("banana",A1:A3,0)'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2 + + def test_not_found(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3}, + {"B1": "=MATCH(99,A1:A3,0)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "#N/A" + + def test_approximate_match_ascending(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40}, + {"B1": "=MATCH(25,A1:A4,1)"}, + ) + results = _calc(wb) + # Largest <= 25 is 20 at position 2 + assert results["Sheet!B1"] == 2 + + +# --------------------------------------------------------------------------- +# XLOOKUP tests +# --------------------------------------------------------------------------- + + +class TestBuiltinXlookup: + def test_basic_exact(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3, + "B1": "Red", "B2": "Green", "B3": "Blue"}, + {"C1": "=XLOOKUP(2,A1:A3,B1:B3)"}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == "Green" + + def test_not_found_default(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3, + "B1": "Red", "B2": "Green", "B3": "Blue"}, + {"C1": '=XLOOKUP(99,A1:A3,B1:B3,"Not found")'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == "Not found" + + def test_string_lookup(self) -> None: + wb = _make_wb( + {"A1": "Revenue", "A2": "COGS", "A3": "OpEx", + "B1": 1000, "B2": 600, "B3": 200}, + {"C1": '=XLOOKUP("COGS",A1:A3,B1:B3)'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 600 + + +# --------------------------------------------------------------------------- +# CHOOSE tests +# --------------------------------------------------------------------------- + + +class TestBuiltinChoose: + def test_basic_selection(self) -> None: + wb = _make_wb( + {}, + {"A1": '=CHOOSE(2,"a","b","c")'}, + ) + results = _calc(wb) + assert results["Sheet!A1"] == "b" + + def test_with_cell_refs(self) -> None: + wb = _make_wb( + {"A1": 3, "B1": 100, "B2": 200, "B3": 300}, + {"C1": "=CHOOSE(A1,B1,B2,B3)"}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 300 + + +# --------------------------------------------------------------------------- +# SUMIF tests +# --------------------------------------------------------------------------- + + +class TestBuiltinSumif: + def test_operator_criteria(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20, + "B1": 1, "B2": 2, "B3": 3, "B4": 4, "B5": 5}, + {"C1": '=SUMIF(A1:A5,">50",B1:B5)'}, + ) + results = _calc(wb) + # A2=60 and A4=80 match >50 -> B2+B4 = 2+4 = 6 + assert results["Sheet!C1"] == 6.0 + + def test_string_exact_match(self) -> None: + wb = _make_wb( + {"A1": "Sales", "A2": "Marketing", "A3": "Sales", "A4": "Engineering", + "B1": 100, "B2": 200, "B3": 300, "B4": 400}, + {"C1": '=SUMIF(A1:A4,"Sales",B1:B4)'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 400.0 # 100 + 300 + + def test_wildcard_criteria(self) -> None: + wb = _make_wb( + {"A1": "apple", "A2": "apricot", "A3": "banana", "A4": "avocado", + "B1": 10, "B2": 20, "B3": 30, "B4": 40}, + {"C1": '=SUMIF(A1:A4,"a*",B1:B4)'}, + ) + results = _calc(wb) + # apple, apricot, avocado match "a*" -> 10+20+40 = 70 + assert results["Sheet!C1"] == 70.0 + + def test_no_sum_range(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20}, + {"B1": '=SUMIF(A1:A5,">50")'}, + ) + results = _calc(wb) + # Sums criteria range itself: 60 + 80 = 140 + assert results["Sheet!B1"] == 140.0 + + +# --------------------------------------------------------------------------- +# SUMIFS tests +# --------------------------------------------------------------------------- + + +class TestBuiltinSumifs: + def test_two_criteria(self) -> None: + wb = _make_wb( + {"A1": 20, "A2": 5, "A3": 30, "A4": 15, "A5": 25, + "B1": "Sales", "B2": "Sales", "B3": "Marketing", "B4": "Sales", "B5": "Sales", + "C1": 100, "C2": 200, "C3": 300, "C4": 400, "C5": 500}, + {"D1": '=SUMIFS(C1:C5,A1:A5,">10",B1:B5,"Sales")'}, + ) + results = _calc(wb) + # A>10 AND B="Sales": rows 1 (20,Sales,100), 4 (15,Sales,400), 5 (25,Sales,500) = 1000 + assert results["Sheet!D1"] == 1000.0 + + def test_numeric_criteria_pair(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 1, "A4": 2, + "B1": 10, "B2": 10, "B3": 20, "B4": 20, + "C1": 100, "C2": 200, "C3": 300, "C4": 400}, + {"D1": "=SUMIFS(C1:C4,A1:A4,2,B1:B4,20)"}, + ) + results = _calc(wb) + # A=2 AND B=20: row 4 -> C4=400 + assert results["Sheet!D1"] == 400.0 + + +# --------------------------------------------------------------------------- +# COUNTIF tests +# --------------------------------------------------------------------------- + + +class TestBuiltinCountif: + def test_count_gt_50(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20}, + {"B1": '=COUNTIF(A1:A5,">50")'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + def test_string_match(self) -> None: + wb = _make_wb( + {"A1": "Yes", "A2": "No", "A3": "yes", "A4": "YES"}, + {"B1": '=COUNTIF(A1:A4,"Yes")'}, + ) + results = _calc(wb) + # Case-insensitive: all 3 "yes" variants match + assert results["Sheet!B1"] == 3.0 + + def test_wildcard(self) -> None: + wb = _make_wb( + {"A1": "abc", "A2": "def", "A3": "abx", "A4": None}, + {"B1": '=COUNTIF(A1:A4,"ab*")'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + +# --------------------------------------------------------------------------- +# COUNTIFS tests +# --------------------------------------------------------------------------- + + +class TestBuiltinCountifs: + def test_dual_criteria(self) -> None: + wb = _make_wb( + {"A1": "Sales", "A2": "Marketing", "A3": "Sales", "A4": "Sales", + "B1": 100, "B2": 200, "B3": 50, "B4": 150}, + {"C1": '=COUNTIFS(A1:A4,"Sales",B1:B4,">80")'}, + ) + results = _calc(wb) + # Sales AND >80: rows 1 (Sales,100) and 4 (Sales,150) = 2 + assert results["Sheet!C1"] == 2.0 + + +# --------------------------------------------------------------------------- +# & string concatenation operator tests +# --------------------------------------------------------------------------- + + +class TestAmpersandOperator: + def test_basic_string_concat(self) -> None: + wb = _make_wb( + {}, + {"A1": '="Hello"&" "&"World"'}, + ) + results = _calc(wb) + assert results["Sheet!A1"] == "Hello World" + + def test_dynamic_criteria_with_sumif(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, + "B1": 1, "B2": 2, "B3": 3, "B4": 4, + "C1": 50}, + {"D1": '=SUMIF(A1:A4,">"&C1,B1:B4)'}, + ) + results = _calc(wb) + # ">"&50 = ">50" -> A2=60, A4=80 match -> B2+B4 = 2+4 = 6 + assert results["Sheet!D1"] == 6.0 + + def test_cell_ref_concat(self) -> None: + wb = _make_wb( + {"A1": "Hello", "A2": " World"}, + {"B1": "=A1&A2"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "Hello World" + + +# --------------------------------------------------------------------------- +# Criteria engine unit tests +# --------------------------------------------------------------------------- + + +class TestCriteriaEngine: + def test_numeric_exact(self) -> None: + assert _match_criteria(100, 100) is True + assert _match_criteria(100, 99) is False + + def test_operator_gt(self) -> None: + pred = _parse_criteria(">50") + assert pred(60) is True + assert pred(50) is False + assert pred(40) is False + + def test_operator_not_equal(self) -> None: + pred = _parse_criteria("<>0") + assert pred(1) is True + assert pred(0) is False + + def test_wildcard(self) -> None: + pred = _parse_criteria("app*") + assert pred("apple") is True + assert pred("application") is True + assert pred("banana") is False + + def test_none_handling(self) -> None: + pred = _parse_criteria(">0") + assert pred(None) is False + + +# --------------------------------------------------------------------------- +# Perturbation propagation tests +# --------------------------------------------------------------------------- + + +class TestPerturbationPropagation: + def test_perturbation_through_index_match(self) -> None: + """Perturbing a data cell should propagate through INDEX/MATCH.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "X" + ws["A2"] = "Y" + ws["A3"] = "Z" + ws["B1"] = 100 + ws["B2"] = 200 + ws["B3"] = 300 + ws["C1"] = "Y" + ws["D1"] = "=INDEX(B1:B3,MATCH(C1,A1:A3,0))" + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!D1"] == 200 + + # Perturb B2 (the cell INDEX/MATCH resolves to) + recalc = ev.recalculate({"Sheet!B2": 999}) + delta_map = {d.cell_ref: d for d in recalc.deltas} + assert "Sheet!D1" in delta_map + assert delta_map["Sheet!D1"].new_value == 999 + + def test_perturbation_through_sumif(self) -> None: + """Perturbing a sum_range cell should propagate through SUMIF.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Sales" + ws["A2"] = "Marketing" + ws["A3"] = "Sales" + ws["B1"] = 100 + ws["B2"] = 200 + ws["B3"] = 300 + ws["C1"] = '=SUMIF(A1:A3,"Sales",B1:B3)' + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!C1"] == 400.0 + + # Perturb B1 -> should change SUMIF result + recalc = ev.recalculate({"Sheet!B1": 500}) + delta_map = {d.cell_ref: d for d in recalc.deltas} + assert "Sheet!C1" in delta_map + assert delta_map["Sheet!C1"].new_value == 800.0 # 500 + 300