From 83aede1f2e2b69abed4953ac28e13cbdc4428dc1 Mon Sep 17 00:00:00 2001 From: Wolfgang Schoenberger <221313372+wolfiesch@users.noreply.github.com> Date: Fri, 20 Feb 2026 01:02:07 -0800 Subject: [PATCH 1/3] feat(calc): formulas library fallback + 10 new builtin functions Wire the `formulas` library as a formula-level fallback for functions not covered by builtins (financial: PMT, SLN, NPV; lookup: VLOOKUP, HLOOKUP, INDEX/MATCH; etc). Multi-column ranges are reshaped to 2D numpy arrays so lookup functions receive proper table_array shapes. Add 10 new builtin implementations (25 total): - Math: ROUNDDOWN, MOD, POWER, SQRT, SIGN - Text: LEFT, RIGHT, MID, LEN, CONCATENATE Add range_shape() utility for computing (n_rows, n_cols) from range references. 28 new tests covering builtins, fallback, perturbation propagation, and combined evaluation chains. Co-Authored-By: Claude Opus 4.6 --- python/wolfxl/calc/_evaluator.py | 145 +++++++- python/wolfxl/calc/_functions.py | 122 +++++++ python/wolfxl/calc/_parser.py | 20 ++ tests/test_calc_formulas_integration.py | 452 ++++++++++++++++++++++++ 4 files changed, 737 insertions(+), 2 deletions(-) create mode 100644 tests/test_calc_formulas_integration.py diff --git a/python/wolfxl/calc/_evaluator.py b/python/wolfxl/calc/_evaluator.py index 812a6bc..12d0c17 100644 --- a/python/wolfxl/calc/_evaluator.py +++ b/python/wolfxl/calc/_evaluator.py @@ -3,17 +3,21 @@ Replaces fragile regex-based dispatch with a proper recursive descent parser that handles balanced parentheses, operator precedence, and arbitrarily nested expressions like ``=ROUND(SUM(A1:A5)*IF(B1>0,1.1,1.0),2)``. + +When the ``formulas`` library is installed (via ``wolfxl[calc]``), unsupported +functions fall back to the library's Excel function implementations. """ from __future__ import annotations +import inspect import logging import re from typing import TYPE_CHECKING, Any from wolfxl.calc._functions import FunctionRegistry from wolfxl.calc._graph import DependencyGraph -from wolfxl.calc._parser import expand_range +from wolfxl.calc._parser import expand_range, range_shape from wolfxl.calc._protocol import CellDelta, RecalcResult if TYPE_CHECKING: @@ -21,6 +25,24 @@ logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# formulas library availability +# --------------------------------------------------------------------------- + +_formulas_available: bool | None = None + + +def _check_formulas() -> bool: + global _formulas_available + if _formulas_available is None: + try: + import formulas # noqa: F401 + + _formulas_available = True + except ImportError: + _formulas_available = False + return _formulas_available + # --------------------------------------------------------------------------- # Expression parsing helpers @@ -261,6 +283,8 @@ def __init__(self) -> None: self._graph = DependencyGraph() self._functions = FunctionRegistry() self._loaded = False + self._use_formulas = _check_formulas() + self._compiled_cache: dict[str, Any] = {} # formula -> compiled callable def load(self, workbook: Workbook) -> None: """Scan workbook, store cell values, build dependency graph.""" @@ -357,7 +381,11 @@ def recalculate( # ------------------------------------------------------------------ def _evaluate_formula(self, cell_ref: str, formula: str) -> Any: - """Evaluate a single formula string (starting with ``=``).""" + """Evaluate a single formula string (starting with ``=``). + + Tries the builtin recursive descent evaluator first. If that returns + None (unsupported function), falls back to the ``formulas`` library. + """ body = formula.strip() if body.startswith('='): body = body[1:] @@ -365,6 +393,13 @@ def _evaluate_formula(self, cell_ref: str, formula: str) -> Any: result = self._eval_expr(body.strip(), sheet) if result is not None: return result + + # Fallback: try the formulas library for unsupported functions + if self._use_formulas: + fb = self._formulas_fallback(formula, sheet) + if fb is not None: + return fb + logger.debug("Cannot evaluate formula %r in %s", formula, cell_ref) return None @@ -545,6 +580,112 @@ def _resolve_arg(self, arg: str, sheet: str) -> Any: return self._eval_expr(arg, sheet) + # ------------------------------------------------------------------ + # formulas library fallback + # ------------------------------------------------------------------ + + def _formulas_fallback(self, formula: str, sheet: str) -> Any: + """Evaluate a formula via the ``formulas`` library. + + Compiles the formula into a callable, resolves its cell reference + parameters from ``_cell_values``, and returns the scalar result. + """ + import formulas as fm + import numpy as np + + # Compile (with caching) + compiled = self._compiled_cache.get(formula) + if compiled is None: + try: + result = fm.Parser().ast(formula) + if result and len(result) > 1: + compiled = result[1].compile() + self._compiled_cache[formula] = compiled + except Exception: + logger.debug("formulas: cannot compile %r", formula) + return None + if compiled is None: + return None + + # Resolve parameters: the compiled function's signature tells us + # which cell references it needs (e.g., "A1:A5", "B1") + try: + params = list(inspect.signature(compiled).parameters.keys()) + except (ValueError, TypeError): + params = [] + + if not params: + # No cell references - purely constant formula (e.g., =PMT(0.05/12,360,200000)) + try: + raw = compiled() + return self._normalize_formulas_result(raw) + except Exception as e: + logger.debug("formulas: error evaluating %r: %s", formula, e) + return None + + # Map parameter names to cell values + args: list[Any] = [] + for param in params: + # Param names from formulas lib use the formula's raw ref tokens + # like "A1:A5" or "A1" (no sheet prefix for same-sheet refs) + if ':' in param: + # Range parameter - resolve to numpy array + # Qualify with sheet name for range_shape parsing + qualified = param if '!' in param else f"{sheet}!{param}" + values = self._resolve_range(param, sheet) + flat = np.array([v if v is not None else 0 for v in values]) + n_rows, n_cols = range_shape(qualified) + if n_cols > 1 and flat.size == n_rows * n_cols: + flat = flat.reshape(n_rows, n_cols) + args.append(flat) + else: + # Single cell parameter + val = self._resolve_cell_ref(param, sheet) + if isinstance(val, (int, float)): + args.append(np.float64(val)) + elif isinstance(val, str): + args.append(val) + else: + args.append(np.float64(0) if val is None else val) + + try: + raw = compiled(*args) + return self._normalize_formulas_result(raw) + except Exception as e: + logger.debug("formulas: error evaluating %r: %s", formula, e) + return None + + @staticmethod + def _normalize_formulas_result(raw: Any) -> Any: + """Convert a ``formulas`` library result to a plain Python value.""" + if raw is None: + return None + # numpy scalar types + if hasattr(raw, 'item'): + try: + val = raw.item() + if isinstance(val, float) and val == int(val): + return int(val) + return val + except (ValueError, TypeError): + pass + # numpy array with single element + if hasattr(raw, 'shape') and hasattr(raw, 'flat'): + try: + if raw.size == 1: + val = raw.flat[0] + if hasattr(val, 'item'): + val = val.item() + if isinstance(val, float) and val == int(val): + return int(val) + return val + except (ValueError, TypeError, IndexError): + pass + # Already a plain Python type + if isinstance(raw, (int, float, str, bool)): + return raw + return raw + @staticmethod def _sheet_from_ref(cell_ref: str) -> str: """Extract sheet name from a canonical cell reference.""" diff --git a/python/wolfxl/calc/_functions.py b/python/wolfxl/calc/_functions.py index 9708fc5..74b71ae 100644 --- a/python/wolfxl/calc/_functions.py +++ b/python/wolfxl/calc/_functions.py @@ -218,6 +218,118 @@ def _builtin_average(args: list[Any]) -> float: return sum(nums) / len(nums) +# --------------------------------------------------------------------------- +# Additional math builtins +# --------------------------------------------------------------------------- + + +def _builtin_rounddown(args: list[Any]) -> float: + if len(args) < 1 or len(args) > 2: + raise ValueError("ROUNDDOWN requires 1 or 2 arguments") + nums = _coerce_numeric([args[0]]) + if not nums: + raise ValueError("ROUNDDOWN: non-numeric argument") + digits = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 0 + if digits == 0: + return float(math.trunc(nums[0])) + factor = 10 ** digits + return math.trunc(nums[0] * factor) / factor + + +def _builtin_mod(args: list[Any]) -> float: + if len(args) != 2: + raise ValueError("MOD requires exactly 2 arguments") + nums = _coerce_numeric(args) + if len(nums) != 2: + raise ValueError("MOD: non-numeric argument") + if nums[1] == 0: + raise ValueError("MOD: division by zero") + # Excel MOD: result has the sign of the divisor + return nums[0] - nums[1] * math.floor(nums[0] / nums[1]) + + +def _builtin_power(args: list[Any]) -> float: + if len(args) != 2: + raise ValueError("POWER requires exactly 2 arguments") + nums = _coerce_numeric(args) + if len(nums) != 2: + raise ValueError("POWER: non-numeric argument") + return nums[0] ** nums[1] + + +def _builtin_sqrt(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("SQRT requires exactly 1 argument") + nums = _coerce_numeric(args) + if not nums: + raise ValueError("SQRT: non-numeric argument") + if nums[0] < 0: + raise ValueError("SQRT: negative argument") + return math.sqrt(nums[0]) + + +def _builtin_sign(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("SIGN requires exactly 1 argument") + nums = _coerce_numeric(args) + if not nums: + raise ValueError("SIGN: non-numeric argument") + if nums[0] > 0: + return 1.0 + if nums[0] < 0: + return -1.0 + return 0.0 + + +# --------------------------------------------------------------------------- +# Text builtins +# --------------------------------------------------------------------------- + + +def _coerce_string(val: Any) -> str: + if val is None: + return "" + return str(val) + + +def _builtin_left(args: list[Any]) -> str: + if len(args) < 1 or len(args) > 2: + raise ValueError("LEFT requires 1 or 2 arguments") + text = _coerce_string(args[0]) + num_chars = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 1 + return text[:num_chars] + + +def _builtin_right(args: list[Any]) -> str: + if len(args) < 1 or len(args) > 2: + raise ValueError("RIGHT requires 1 or 2 arguments") + text = _coerce_string(args[0]) + num_chars = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 1 + return text[-num_chars:] if num_chars > 0 else "" + + +def _builtin_mid(args: list[Any]) -> str: + if len(args) != 3: + raise ValueError("MID requires exactly 3 arguments") + text = _coerce_string(args[0]) + start = int(_coerce_numeric([args[1]])[0]) + num_chars = int(_coerce_numeric([args[2]])[0]) + # Excel MID is 1-indexed + return text[start - 1 : start - 1 + num_chars] + + +def _builtin_len(args: list[Any]) -> float: + if len(args) != 1: + raise ValueError("LEN requires exactly 1 argument") + return float(len(_coerce_string(args[0]))) + + +def _builtin_concatenate(args: list[Any]) -> str: + if not args: + raise ValueError("CONCATENATE requires at least 1 argument") + return "".join(_coerce_string(a) for a in args) + + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -227,7 +339,12 @@ def _builtin_average(args: list[Any]) -> float: "ABS": _builtin_abs, "ROUND": _builtin_round, "ROUNDUP": _builtin_roundup, + "ROUNDDOWN": _builtin_rounddown, "INT": _builtin_int, + "MOD": _builtin_mod, + "POWER": _builtin_power, + "SQRT": _builtin_sqrt, + "SIGN": _builtin_sign, "IF": _builtin_if, "IFERROR": _builtin_iferror, "AND": _builtin_and, @@ -238,6 +355,11 @@ def _builtin_average(args: list[Any]) -> float: "MIN": _builtin_min, "MAX": _builtin_max, "AVERAGE": _builtin_average, + "LEFT": _builtin_left, + "RIGHT": _builtin_right, + "MID": _builtin_mid, + "LEN": _builtin_len, + "CONCATENATE": _builtin_concatenate, } diff --git a/python/wolfxl/calc/_parser.py b/python/wolfxl/calc/_parser.py index b004761..567588d 100644 --- a/python/wolfxl/calc/_parser.py +++ b/python/wolfxl/calc/_parser.py @@ -153,6 +153,26 @@ def expand_range(range_ref: str) -> list[str]: return cells +def range_shape(range_ref: str) -> tuple[int, int]: + """Return ``(n_rows, n_cols)`` for a range reference like ``A1:C3``. + + The *range_ref* may include a sheet prefix (``Sheet1!A1:C3``). + """ + ref_part = range_ref + if "!" in range_ref: + _, ref_part = range_ref.rsplit("!", 1) + + parts = ref_part.split(":") + if len(parts) != 2: + raise ValueError(f"Invalid range: {range_ref!r}") + + start_row, start_col = a1_to_rowcol(parts[0].replace("$", "")) + end_row, end_col = a1_to_rowcol(parts[1].replace("$", "")) + n_rows = abs(end_row - start_row) + 1 + n_cols = abs(end_col - start_col) + 1 + return (n_rows, n_cols) + + # --------------------------------------------------------------------------- # All-references extraction (combines singles + expanded ranges) # --------------------------------------------------------------------------- diff --git a/tests/test_calc_formulas_integration.py b/tests/test_calc_formulas_integration.py new file mode 100644 index 0000000..f3e6860 --- /dev/null +++ b/tests/test_calc_formulas_integration.py @@ -0,0 +1,452 @@ +"""Tests for formulas library integration and extended builtin functions. + +Tests that the formulas library fallback works for functions not in +the builtin registry, and that the new math/text builtins work correctly. +""" + +from __future__ import annotations + +import pytest + +import wolfxl +from wolfxl.calc._evaluator import WorkbookEvaluator + + +# --------------------------------------------------------------------------- +# New builtin math functions +# --------------------------------------------------------------------------- + + +class TestBuiltinRounddown: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 3.777 + ws["B1"] = "=ROUNDDOWN(A1,2)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 3.77 + + def test_zero_digits(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 3.777 + ws["B1"] = "=ROUNDDOWN(A1,0)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 3.0 + + def test_negative(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -3.777 + ws["B1"] = "=ROUNDDOWN(A1,2)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == -3.77 + + +class TestBuiltinMod: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 10 + ws["B1"] = "=MOD(A1,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1.0 + + def test_negative_dividend(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -10 + ws["B1"] = "=MOD(A1,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + # Excel MOD: result has sign of divisor + assert results["Sheet!B1"] == 2.0 + + +class TestBuiltinPower: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 2 + ws["B1"] = "=POWER(A1,10)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1024.0 + + def test_fractional_exponent(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 4 + ws["B1"] = "=POWER(A1,0.5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 2.0 + + +class TestBuiltinSqrt: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 144 + ws["B1"] = "=SQRT(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 12.0 + + +class TestBuiltinSign: + def test_positive(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 42 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 1.0 + + def test_negative(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -7 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == -1.0 + + def test_zero(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 0 + ws["B1"] = "=SIGN(A1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 0.0 + + +# --------------------------------------------------------------------------- +# New builtin text functions +# --------------------------------------------------------------------------- + + +class TestBuiltinLeft: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=LEFT(A1,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "Hello" + + def test_default_one_char(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = '=LEFT(A1)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "H" + + +class TestBuiltinRight: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=RIGHT(A1,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "World" + + +class TestBuiltinMid: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello World" + ws["B1"] = '=MID(A1,7,5)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "World" + + +class TestBuiltinLen: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = '=LEN(A1)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 5.0 + + +class TestBuiltinConcatenate: + def test_basic(self) -> None: + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["A2"] = " " + ws["A3"] = "World" + ws["B1"] = '=CONCATENATE(A1,A2,A3)' + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "Hello World" + + +# --------------------------------------------------------------------------- +# formulas library fallback: constant formulas (no cell refs) +# --------------------------------------------------------------------------- + + +class TestFormulasConstantFallback: + """Formulas that use non-builtin functions with only literal arguments.""" + + def test_pmt(self) -> None: + """PMT(rate, nper, pv) - monthly mortgage payment.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "=PMT(0.05/12,360,200000)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + # Expected: ~-1073.64 + val = results["Sheet!A1"] + assert val is not None, "PMT formula returned None - formulas lib not available?" + assert abs(val - (-1073.6432460242797)) < 0.01 + + def test_sln(self) -> None: + """SLN(cost, salvage, life) - straight-line depreciation.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "=SLN(30000,7500,10)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results["Sheet!A1"] + assert val is not None + assert val == 2250 or val == 2250.0 + + +# --------------------------------------------------------------------------- +# formulas library fallback: cell ref formulas +# --------------------------------------------------------------------------- + + +class TestFormulasCellRefFallback: + """Formulas that use non-builtin functions with cell references.""" + + def test_vlookup(self) -> None: + """VLOOKUP via formulas library fallback.""" + wb = wolfxl.Workbook() + ws = wb.active + # Lookup table in B1:C3 + ws["B1"] = 1 + ws["C1"] = 100 + ws["B2"] = 2 + ws["C2"] = 200 + ws["B3"] = 3 + ws["C3"] = 300 + # Lookup value + ws["A1"] = 2 + ws["D1"] = "=VLOOKUP(A1,B1:C3,2,FALSE)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results.get("Sheet!D1") + assert val is not None, "VLOOKUP returned None - formulas lib not available?" + assert val == 200 or val == 200.0 + + def test_npv(self) -> None: + """NPV with cell range reference.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -10000 + ws["A2"] = 3000 + ws["A3"] = 4000 + ws["A4"] = 5000 + ws["A5"] = 6000 + ws["B1"] = "=NPV(0.1,A1:A5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + val = results.get("Sheet!B1") + assert val is not None, "NPV returned None - formulas lib not available?" + # NPV at 10% discount: ~3534.28 + assert abs(val - 3534.28) < 1.0 + + +# --------------------------------------------------------------------------- +# formulas library fallback: perturbation through financial formulas +# --------------------------------------------------------------------------- + + +class TestFormulasFallbackPerturbation: + """Verify perturbation propagates through formulas-lib-evaluated cells.""" + + def test_pmt_perturbation(self) -> None: + """Perturbing the loan amount should change the PMT result.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 200000 # loan amount + ws["A2"] = "=A1*0.05/12" # monthly rate (builtin handles this) + ws["A3"] = "=PMT(0.05/12,360,A1)" # PMT via formulas fallback + ev = WorkbookEvaluator() + ev.load(wb) + ev.calculate() + + # PMT with cell ref may or may not work depending on formulas lib + # handling. If A3 evaluates, perturbation should propagate. + result = ev.recalculate({"Sheet!A1": 250000}) + # A2 uses builtins (will propagate) + # A3 may or may not propagate depending on formulas lib + assert result.total_formula_cells >= 2 + + +# --------------------------------------------------------------------------- +# Builtin coverage: all 25 builtins registered +# --------------------------------------------------------------------------- + + +class TestBuiltinRegistryCoverage: + def test_25_builtins_registered(self) -> None: + """All 25 builtin functions should be in the registry.""" + from wolfxl.calc._functions import FunctionRegistry + + reg = FunctionRegistry() + expected = { + "SUM", "ABS", "ROUND", "ROUNDUP", "ROUNDDOWN", "INT", + "MOD", "POWER", "SQRT", "SIGN", + "IF", "IFERROR", "AND", "OR", "NOT", + "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", + "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + } + assert expected == reg.supported_functions + + def test_each_builtin_callable_from_evaluator(self) -> None: + """Smoke test: each builtin resolves in the evaluator function registry.""" + ev = WorkbookEvaluator() + for name in [ + "SUM", "ABS", "ROUND", "ROUNDUP", "ROUNDDOWN", "INT", + "MOD", "POWER", "SQRT", "SIGN", + "IF", "IFERROR", "AND", "OR", "NOT", + "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", + "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + ]: + assert ev._functions.has(name), f"Missing builtin: {name}" + + +# --------------------------------------------------------------------------- +# Combined: builtins + formulas lib in same workbook +# --------------------------------------------------------------------------- + + +class TestCombinedEvaluation: + """Workbook mixing builtin-evaluated and formulas-lib-evaluated formulas.""" + + def test_income_statement_with_sln(self) -> None: + """An income statement that uses SLN for depreciation calculation.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 100000 # revenue + ws["A2"] = 60000 # COGS + ws["A3"] = "=A1-A2" # gross profit (builtin) + ws["A4"] = 15000 # opex + ws["A5"] = "=SLN(50000,5000,10)" # depreciation via formulas lib + ws["A6"] = "=A3-A4-A5" # operating income (builtin, depends on formulas result) + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + + assert results["Sheet!A3"] == 40000.0 # builtin + + # SLN result (via formulas library fallback) + sln_val = results.get("Sheet!A5") + assert sln_val is not None, "SLN returned None - formulas lib not available?" + assert sln_val == 4500 or sln_val == 4500.0 + # Operating income depends on SLN + assert results["Sheet!A6"] == 40000 - 15000 - 4500 + + def test_text_extraction_chain(self) -> None: + """Chain of text functions all handled by builtins.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "2026-01-15" + ws["B1"] = '=LEFT(A1,4)' # "2026" + ws["C1"] = '=MID(A1,6,2)' # "01" + ws["D1"] = '=RIGHT(A1,2)' # "15" + ws["E1"] = '=LEN(A1)' # 10 + ws["F1"] = '=CONCATENATE(B1,"/",C1,"/",D1)' # "2026/01/15" + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "2026" + assert results["Sheet!C1"] == "01" + assert results["Sheet!D1"] == "15" + assert results["Sheet!E1"] == 10.0 + assert results["Sheet!F1"] == "2026/01/15" + + def test_math_chain(self) -> None: + """Chain of math functions mixing old and new builtins.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -7.8 + ws["B1"] = "=ABS(A1)" # 7.8 (old builtin) + ws["C1"] = "=SQRT(B1)" # ~2.793 (new builtin) + ws["D1"] = "=POWER(C1,2)" # ~7.8 (new builtin, should round-trip) + ws["E1"] = "=SIGN(A1)" # -1 (new builtin) + ws["F1"] = "=MOD(8,3)" # 2 (new builtin) + ws["G1"] = "=ROUNDDOWN(C1,1)" # 2.7 (new builtin) + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == 7.8 + assert abs(results["Sheet!C1"] - 2.7928480087537886) < 1e-10 + assert abs(results["Sheet!D1"] - 7.8) < 1e-10 + assert results["Sheet!E1"] == -1.0 + assert results["Sheet!F1"] == 2.0 + assert results["Sheet!G1"] == 2.7 + + def test_perturbation_through_new_builtins(self) -> None: + """Perturbation should propagate through new builtin functions.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = 100 + ws["B1"] = "=SQRT(A1)" + ws["C1"] = "=POWER(B1,3)" + ws["D1"] = "=ROUNDDOWN(C1,0)" + + ev = WorkbookEvaluator() + ev.load(wb) + ev.calculate() + + result = ev.recalculate({"Sheet!A1": 144}) + assert result.propagation_ratio == 1.0 + delta_map = {d.cell_ref: d for d in result.deltas} + assert delta_map["Sheet!B1"].new_value == 12.0 + assert delta_map["Sheet!C1"].new_value == 1728.0 + assert delta_map["Sheet!D1"].new_value == 1728.0 From 92295820ebb8c74a4908e7fd77757be31c78d8b4 Mon Sep 17 00:00:00 2001 From: Wolfgang Schoenberger <221313372+wolfiesch@users.noreply.github.com> Date: Fri, 20 Feb 2026 01:25:13 -0800 Subject: [PATCH 2/3] feat(calc): add lookup & conditional aggregation builtins (INDEX, MATCH, XLOOKUP, CHOOSE, SUMIF, SUMIFS, COUNTIF, COUNTIFS) Adds 8 new builtin functions for lookup and conditional aggregation, bringing the builtin count from 25 to 33 and the whitelist from 39 to 43. These are the most commonly used Excel functions in financial models and are required for LRBench-Agent formula evaluation. Key changes: - RangeValue dataclass: shape-aware 2D range container that preserves (n_rows, n_cols) metadata while staying iterable for backward compat - & string concatenation operator: enables dynamic criteria like ">"&B1 - Criteria matching engine: shared by SUMIF/SUMIFS/COUNTIF/COUNTIFS, supports operators (>100), wildcards (a*), and exact match - 43 new tests covering all builtins, backward compat, and perturbation propagation through INDEX/MATCH and SUMIF chains Co-Authored-By: Claude Opus 4.6 --- python/wolfxl/calc/__init__.py | 3 +- python/wolfxl/calc/_evaluator.py | 36 +- python/wolfxl/calc/_functions.py | 440 ++++++++++++++++++++- tests/test_calc_formulas_integration.py | 8 +- tests/test_calc_functions.py | 4 +- tests/test_calc_lookup_conditional.py | 500 ++++++++++++++++++++++++ 6 files changed, 972 insertions(+), 19 deletions(-) create mode 100644 tests/test_calc_lookup_conditional.py diff --git a/python/wolfxl/calc/__init__.py b/python/wolfxl/calc/__init__.py index 51530cc..0492769 100644 --- a/python/wolfxl/calc/__init__.py +++ b/python/wolfxl/calc/__init__.py @@ -1,7 +1,7 @@ """wolfxl.calc - Formula evaluation engine for wolfxl workbooks.""" from wolfxl.calc._evaluator import WorkbookEvaluator -from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, is_supported +from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, RangeValue, is_supported from wolfxl.calc._graph import DependencyGraph from wolfxl.calc._parser import FormulaParser, all_references, expand_range from wolfxl.calc._protocol import CalcEngine, CellDelta, RecalcResult @@ -13,6 +13,7 @@ "FUNCTION_WHITELIST_V1", "FormulaParser", "FunctionRegistry", + "RangeValue", "RecalcResult", "WorkbookEvaluator", "all_references", diff --git a/python/wolfxl/calc/_evaluator.py b/python/wolfxl/calc/_evaluator.py index 12d0c17..127f2d0 100644 --- a/python/wolfxl/calc/_evaluator.py +++ b/python/wolfxl/calc/_evaluator.py @@ -15,7 +15,7 @@ import re from typing import TYPE_CHECKING, Any -from wolfxl.calc._functions import FunctionRegistry +from wolfxl.calc._functions import FunctionRegistry, RangeValue from wolfxl.calc._graph import DependencyGraph from wolfxl.calc._parser import expand_range, range_shape from wolfxl.calc._protocol import CellDelta, RecalcResult @@ -143,7 +143,7 @@ def _find_top_level_split(expr: str) -> tuple[str, str, str] | None: matched_op = ch elif ch == '=' and not (i >= 1 and expr[i - 1] in ('>', '<', '!')): matched_op = ch - elif pass_type == "add" and ch in ('+', '-'): + elif pass_type == "add" and ch in ('+', '-', '&'): matched_op = ch elif pass_type == "mul" and ch in ('*', '/'): matched_op = ch @@ -191,7 +191,9 @@ def _has_top_level_colon(expr: str) -> bool: def _binary_op(left: Any, op: str, right: Any) -> Any: - """Evaluate an arithmetic binary operation.""" + """Evaluate an arithmetic or string binary operation.""" + if op == '&': + return str(left if left is not None else "") + str(right if right is not None else "") if not isinstance(left, (int, float)) or not isinstance(right, (int, float)): return None if op == '+': @@ -427,7 +429,7 @@ def _eval_expr(self, expr: str, sheet: str) -> Any: left_str, op, right_str = split left_val = self._eval_expr(left_str, sheet) right_val = self._eval_expr(right_str, sheet) - if op in ('+', '-', '*', '/'): + if op in ('+', '-', '*', '/', '&'): return _binary_op(left_val, op, right_val) return _compare(left_val, right_val, op) @@ -492,7 +494,10 @@ def _resolve_cell_ref(self, expr: str, sheet: str) -> Any: return self._cell_values.get(ref) def _resolve_range(self, arg: str, sheet: str) -> list[Any]: - """Resolve a range like ``A1:A5`` to a list of cell values.""" + """Resolve a range like ``A1:A5`` to a flat list of cell values. + + Kept for the ``formulas`` library fallback which needs flat lists. + """ clean = arg.strip().replace('$', '') if '!' not in clean: range_ref = f"{sheet}!{clean.upper()}" @@ -503,6 +508,20 @@ def _resolve_range(self, arg: str, sheet: str) -> list[Any]: cells = expand_range(range_ref) return [self._cell_values.get(c) for c in cells] + def _resolve_range_2d(self, arg: str, sheet: str) -> RangeValue: + """Resolve a range to a :class:`RangeValue` preserving 2D shape.""" + clean = arg.strip().replace('$', '') + if '!' not in clean: + range_ref = f"{sheet}!{clean.upper()}" + else: + parts = clean.split('!', 1) + ref_sheet = parts[0].strip("'") + range_ref = f"{ref_sheet}!{parts[1].upper()}" + cells = expand_range(range_ref) + n_rows, n_cols = range_shape(range_ref) + values = [self._cell_values.get(c) for c in cells] + return RangeValue(values=values, n_rows=n_rows, n_cols=n_cols) + # ------------------------------------------------------------------ # Function dispatch # ------------------------------------------------------------------ @@ -568,15 +587,16 @@ def _parse_function_args(self, args_str: str, sheet: str) -> list[Any]: def _resolve_arg(self, arg: str, sheet: str) -> Any: """Resolve a single function argument. - Range references (containing ``:`` at depth 0) return a list of - cell values. Everything else delegates to ``_eval_expr``. + Range references (containing ``:`` at depth 0) return a + :class:`RangeValue` with 2D shape metadata. Everything else + delegates to ``_eval_expr``. """ if not arg: return None # Range reference at top level if _has_top_level_colon(arg) and not arg.startswith('"'): - return self._resolve_range(arg, sheet) + return self._resolve_range_2d(arg, sheet) return self._eval_expr(arg, sheet) diff --git a/python/wolfxl/calc/_functions.py b/python/wolfxl/calc/_functions.py index 74b71ae..6646684 100644 --- a/python/wolfxl/calc/_functions.py +++ b/python/wolfxl/calc/_functions.py @@ -2,9 +2,61 @@ from __future__ import annotations +import fnmatch import math +import re +from dataclasses import dataclass from typing import Any, Callable + +# --------------------------------------------------------------------------- +# RangeValue: shape-aware 2D range container +# --------------------------------------------------------------------------- + + +@dataclass +class RangeValue: + """A resolved cell range that preserves 2D shape metadata. + + Iterable and sized for backward compat with functions that expect lists. + """ + + values: list[Any] + n_rows: int + n_cols: int + + def get(self, row: int, col: int) -> Any: + """Get value at 1-based (row, col) position.""" + if row < 1 or row > self.n_rows or col < 1 or col > self.n_cols: + return None + idx = (row - 1) * self.n_cols + (col - 1) + return self.values[idx] if idx < len(self.values) else None + + def column(self, col: int) -> list[Any]: + """Extract a 1-based column as a list.""" + if col < 1 or col > self.n_cols: + return [] + return [self.values[(r * self.n_cols) + (col - 1)] + for r in range(self.n_rows) + if (r * self.n_cols) + (col - 1) < len(self.values)] + + def row(self, row: int) -> list[Any]: + """Extract a 1-based row as a list.""" + if row < 1 or row > self.n_rows: + return [] + start = (row - 1) * self.n_cols + return self.values[start:start + self.n_cols] + + def as_flat(self) -> list[Any]: + """Return values as a flat list.""" + return list(self.values) + + def __iter__(self): + return iter(self.values) + + def __len__(self): + return len(self.values) + # --------------------------------------------------------------------------- # Whitelist: functions the calc engine will attempt to evaluate. # Organized by category for readability. @@ -28,20 +80,24 @@ "OR": "logic", "NOT": "logic", "IFERROR": "logic", - # Lookup (6) + # Lookup (7) "VLOOKUP": "lookup", "HLOOKUP": "lookup", "INDEX": "lookup", "MATCH": "lookup", "OFFSET": "lookup", "CHOOSE": "lookup", - # Statistical (6) + "XLOOKUP": "lookup", + # Statistical (9) "AVERAGE": "statistical", "COUNT": "statistical", "COUNTA": "statistical", "COUNTIF": "statistical", + "COUNTIFS": "statistical", "MIN": "statistical", "MAX": "statistical", + "SUMIF": "statistical", + "SUMIFS": "statistical", # Financial (7) "PV": "financial", "FV": "financial", @@ -74,7 +130,9 @@ def _coerce_numeric(values: list[Any]) -> list[float]: """Flatten and coerce values to floats, skipping None/str/bool.""" result: list[float] = [] for v in values: - if isinstance(v, (list, tuple)): + if isinstance(v, RangeValue): + result.extend(_coerce_numeric(v.values)) + elif isinstance(v, (list, tuple)): result.extend(_coerce_numeric(list(v))) elif isinstance(v, bool): # In Excel, TRUE=1, FALSE=0 in numeric context @@ -155,7 +213,7 @@ def _builtin_and(args: list[Any]) -> bool: if not args: raise ValueError("AND requires at least 1 argument") for a in args: - if isinstance(a, (list, tuple)): + if isinstance(a, (RangeValue, list, tuple)): if not all(bool(x) for x in a if x is not None): return False elif not a: @@ -167,7 +225,7 @@ def _builtin_or(args: list[Any]) -> bool: if not args: raise ValueError("OR requires at least 1 argument") for a in args: - if isinstance(a, (list, tuple)): + if isinstance(a, (RangeValue, list, tuple)): if any(bool(x) for x in a if x is not None): return True elif a: @@ -190,7 +248,7 @@ def _builtin_counta(args: list[Any]) -> float: """COUNTA - counts non-empty values.""" count = 0 for v in args: - if isinstance(v, (list, tuple)): + if isinstance(v, (RangeValue, list, tuple)): count += sum(1 for x in v if x is not None) elif v is not None: count += 1 @@ -330,6 +388,368 @@ def _builtin_concatenate(args: list[Any]) -> str: return "".join(_coerce_string(a) for a in args) +# --------------------------------------------------------------------------- +# Criteria matching engine (shared by SUMIF, SUMIFS, COUNTIF, COUNTIFS) +# --------------------------------------------------------------------------- + +_CRITERIA_OP_RE = re.compile(r"^(>=|<=|<>|>|<|=)(.*)$") + + +def _parse_criteria(criteria: Any) -> Callable[[Any], bool]: + """Parse an Excel criteria value into a predicate function. + + Supports: + - Numeric exact match: ``100`` matches cells equal to 100 + - String exact match (case-insensitive): ``"Sales"`` + - Operator prefix: ``">100"``, ``"<=50"``, ``"<>0"`` + - Wildcards: ``"apple*"``, ``"?pple"`` (via fnmatch) + """ + if isinstance(criteria, (int, float)): + target = float(criteria) + return lambda v: isinstance(v, (int, float)) and float(v) == target + + crit_str = str(criteria) + + # Check for operator prefix + m = _CRITERIA_OP_RE.match(crit_str) + if m: + op, val_str = m.group(1), m.group(2).strip() + try: + threshold = float(val_str) + except (ValueError, TypeError): + # String comparison with operator + val_lower = val_str.lower() + if op == ">": + return lambda v: str(v).lower() > val_lower if v is not None else False + if op == "<": + return lambda v: str(v).lower() < val_lower if v is not None else False + if op == ">=": + return lambda v: str(v).lower() >= val_lower if v is not None else False + if op == "<=": + return lambda v: str(v).lower() <= val_lower if v is not None else False + if op == "<>": + return lambda v: str(v).lower() != val_lower if v is not None else True + if op == "=": + return lambda v: str(v).lower() == val_lower if v is not None else False + return lambda v: False + + if op == ">": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) > t + if op == "<": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) < t + if op == ">=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) >= t + if op == "<=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) <= t + if op == "<>": + return lambda v, t=threshold: not (isinstance(v, (int, float)) and float(v) == t) + if op == "=": + return lambda v, t=threshold: isinstance(v, (int, float)) and float(v) == t + + # Wildcard check (contains * or ? not escaped) + if "*" in crit_str or "?" in crit_str: + pattern = crit_str.lower() + return lambda v, p=pattern: fnmatch.fnmatch(str(v).lower(), p) if v is not None else False + + # Plain string exact match (case-insensitive) + lower = crit_str.lower() + return lambda v, l=lower: str(v).lower() == l if v is not None else False + + +def _match_criteria(criteria: Any, value: Any) -> bool: + """Convenience: check whether *value* satisfies *criteria*.""" + return _parse_criteria(criteria)(value) + + +# --------------------------------------------------------------------------- +# Lookup builtins (INDEX, MATCH, XLOOKUP, CHOOSE) +# --------------------------------------------------------------------------- + + +def _builtin_index(args: list[Any]) -> Any: + """INDEX(array, row_num [, col_num]).""" + if len(args) < 2 or len(args) > 3: + raise ValueError("INDEX requires 2 or 3 arguments") + array = args[0] + row_num = args[1] + col_num = args[2] if len(args) > 2 else None + + # Safety net: if row_num is None (e.g. from unsupported nested func), bail + if row_num is None: + return None + + row_num = int(float(row_num)) + + if isinstance(array, RangeValue): + if col_num is not None: + col_num = int(float(col_num)) + if row_num < 1 or row_num > array.n_rows or col_num < 1 or col_num > array.n_cols: + return "#REF!" + return array.get(row_num, col_num) + # 1D horizontal range: row_num acts as column index + if array.n_rows == 1: + if row_num < 1 or row_num > array.n_cols: + return "#REF!" + return array.get(1, row_num) + # 1D column or multi-col: row_num selects row, return first col + if row_num < 1 or row_num > array.n_rows: + return "#REF!" + if array.n_cols == 1: + return array.get(row_num, 1) + # Multi-col without col_num: return first column value + return array.get(row_num, 1) + + # Plain list fallback + if isinstance(array, (list, tuple)): + if row_num < 1 or row_num > len(array): + return "#REF!" + return array[row_num - 1] + + return None + + +def _builtin_match(args: list[Any]) -> Any: + """MATCH(lookup_value, lookup_array, [match_type]). + + match_type: 0=exact, 1=largest<=, -1=smallest>=. Default 0. + """ + if len(args) < 2 or len(args) > 3: + raise ValueError("MATCH requires 2 or 3 arguments") + lookup_value = args[0] + lookup_array = args[1] + match_type = int(float(args[2])) if len(args) > 2 and args[2] is not None else 0 + + # Flatten to list + if isinstance(lookup_array, RangeValue): + values = lookup_array.values + elif isinstance(lookup_array, (list, tuple)): + values = list(lookup_array) + else: + return "#N/A" + + if match_type == 0: + # Exact match - case-insensitive for strings + for i, v in enumerate(values): + if v is None: + continue + if isinstance(lookup_value, str) and isinstance(v, str): + if lookup_value.lower() == v.lower(): + return i + 1 # 1-based + elif isinstance(lookup_value, (int, float)) and isinstance(v, (int, float)): + if float(lookup_value) == float(v): + return i + 1 + elif lookup_value == v: + return i + 1 + return "#N/A" + + if match_type == 1: + # Largest value <= lookup (assumes sorted ascending) + best_idx = None + for i, v in enumerate(values): + if isinstance(v, (int, float)) and isinstance(lookup_value, (int, float)): + if float(v) <= float(lookup_value): + best_idx = i + 1 + return best_idx if best_idx is not None else "#N/A" + + if match_type == -1: + # Smallest value >= lookup (assumes sorted descending) + best_idx = None + for i, v in enumerate(values): + if isinstance(v, (int, float)) and isinstance(lookup_value, (int, float)): + if float(v) >= float(lookup_value): + best_idx = i + 1 + return best_idx if best_idx is not None else "#N/A" + + return "#N/A" + + +def _builtin_xlookup(args: list[Any]) -> Any: + """XLOOKUP(lookup_value, lookup_array, return_array, [if_not_found], [match_mode], [search_mode]). + + Only exact match (match_mode=0, search_mode=1) is built in. + Other modes return None to fall through to formulas lib. + """ + if len(args) < 3 or len(args) > 6: + raise ValueError("XLOOKUP requires 3 to 6 arguments") + lookup_value = args[0] + lookup_array = args[1] + return_array = args[2] + if_not_found = args[3] if len(args) > 3 else "#N/A" + match_mode = int(float(args[4])) if len(args) > 4 and args[4] is not None else 0 + search_mode = int(float(args[5])) if len(args) > 5 and args[5] is not None else 1 + + # Only handle exact match with forward search + if match_mode != 0 or search_mode not in (1, -1): + return None # fall through to formulas lib + + # Flatten arrays + if isinstance(lookup_array, RangeValue): + lookup_vals = lookup_array.values + elif isinstance(lookup_array, (list, tuple)): + lookup_vals = list(lookup_array) + else: + return if_not_found + + if isinstance(return_array, RangeValue): + return_vals = return_array.values + elif isinstance(return_array, (list, tuple)): + return_vals = list(return_array) + else: + return if_not_found + + search_range = range(len(lookup_vals)) if search_mode == 1 else range(len(lookup_vals) - 1, -1, -1) + + for i in search_range: + v = lookup_vals[i] + if v is None: + continue + matched = False + if isinstance(lookup_value, str) and isinstance(v, str): + matched = lookup_value.lower() == v.lower() + elif isinstance(lookup_value, (int, float)) and isinstance(v, (int, float)): + matched = float(lookup_value) == float(v) + else: + matched = lookup_value == v + if matched: + return return_vals[i] if i < len(return_vals) else if_not_found + + return if_not_found + + +def _builtin_choose(args: list[Any]) -> Any: + """CHOOSE(index_num, value1, value2, ...).""" + if len(args) < 2: + raise ValueError("CHOOSE requires at least 2 arguments") + index_num = int(float(args[0])) + if index_num < 1 or index_num > len(args) - 1: + return "#VALUE!" + return args[index_num] + + +# --------------------------------------------------------------------------- +# Conditional aggregation builtins (SUMIF, SUMIFS, COUNTIF, COUNTIFS) +# --------------------------------------------------------------------------- + + +def _builtin_sumif(args: list[Any]) -> float: + """SUMIF(criteria_range, criteria, [sum_range]).""" + if len(args) < 2 or len(args) > 3: + raise ValueError("SUMIF requires 2 or 3 arguments") + criteria_range = args[0] + criteria = args[1] + sum_range = args[2] if len(args) > 2 else None + + # Flatten ranges + if isinstance(criteria_range, RangeValue): + crit_vals = criteria_range.values + elif isinstance(criteria_range, (list, tuple)): + crit_vals = list(criteria_range) + else: + crit_vals = [criteria_range] + + if sum_range is None: + sum_vals = crit_vals + elif isinstance(sum_range, RangeValue): + sum_vals = sum_range.values + elif isinstance(sum_range, (list, tuple)): + sum_vals = list(sum_range) + else: + sum_vals = [sum_range] + + predicate = _parse_criteria(criteria) + total = 0.0 + for i, cv in enumerate(crit_vals): + if predicate(cv): + sv = sum_vals[i] if i < len(sum_vals) else 0 + if isinstance(sv, (int, float)): + total += float(sv) + return total + + +def _builtin_sumifs(args: list[Any]) -> float: + """SUMIFS(sum_range, criteria_range1, criteria1, ...). + + Note: sum_range is FIRST (unlike SUMIF where it's last). + """ + if len(args) < 3 or len(args) % 2 == 0: + raise ValueError("SUMIFS requires sum_range + pairs of (criteria_range, criteria)") + sum_range = args[0] + + # Flatten sum_range + if isinstance(sum_range, RangeValue): + sum_vals = sum_range.values + elif isinstance(sum_range, (list, tuple)): + sum_vals = list(sum_range) + else: + sum_vals = [sum_range] + + # Build predicate pairs + predicates: list[tuple[list[Any], Callable[[Any], bool]]] = [] + for j in range(1, len(args), 2): + crit_range = args[j] + criteria = args[j + 1] + if isinstance(crit_range, RangeValue): + cv = crit_range.values + elif isinstance(crit_range, (list, tuple)): + cv = list(crit_range) + else: + cv = [crit_range] + predicates.append((cv, _parse_criteria(criteria))) + + total = 0.0 + for i in range(len(sum_vals)): + if all(pred(cv[i]) if i < len(cv) else False for cv, pred in predicates): + sv = sum_vals[i] + if isinstance(sv, (int, float)): + total += float(sv) + return total + + +def _builtin_countif(args: list[Any]) -> float: + """COUNTIF(range, criteria).""" + if len(args) != 2: + raise ValueError("COUNTIF requires exactly 2 arguments") + count_range = args[0] + criteria = args[1] + + if isinstance(count_range, RangeValue): + values = count_range.values + elif isinstance(count_range, (list, tuple)): + values = list(count_range) + else: + values = [count_range] + + predicate = _parse_criteria(criteria) + return float(sum(1 for v in values if predicate(v))) + + +def _builtin_countifs(args: list[Any]) -> float: + """COUNTIFS(criteria_range1, criteria1, [criteria_range2, criteria2, ...]).""" + if len(args) < 2 or len(args) % 2 != 0: + raise ValueError("COUNTIFS requires pairs of (criteria_range, criteria)") + + # Build predicate pairs + predicates: list[tuple[list[Any], Callable[[Any], bool]]] = [] + for j in range(0, len(args), 2): + crit_range = args[j] + criteria = args[j + 1] + if isinstance(crit_range, RangeValue): + cv = crit_range.values + elif isinstance(crit_range, (list, tuple)): + cv = list(crit_range) + else: + cv = [crit_range] + predicates.append((cv, _parse_criteria(criteria))) + + # Length of first criteria range determines row count + n = len(predicates[0][0]) if predicates else 0 + count = 0 + for i in range(n): + if all(pred(cv[i]) if i < len(cv) else False for cv, pred in predicates): + count += 1 + return float(count) + + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -360,6 +780,14 @@ def _builtin_concatenate(args: list[Any]) -> str: "MID": _builtin_mid, "LEN": _builtin_len, "CONCATENATE": _builtin_concatenate, + "INDEX": _builtin_index, + "MATCH": _builtin_match, + "XLOOKUP": _builtin_xlookup, + "CHOOSE": _builtin_choose, + "SUMIF": _builtin_sumif, + "SUMIFS": _builtin_sumifs, + "COUNTIF": _builtin_countif, + "COUNTIFS": _builtin_countifs, } diff --git a/tests/test_calc_formulas_integration.py b/tests/test_calc_formulas_integration.py index f3e6860..d9928ec 100644 --- a/tests/test_calc_formulas_integration.py +++ b/tests/test_calc_formulas_integration.py @@ -330,8 +330,8 @@ def test_pmt_perturbation(self) -> None: class TestBuiltinRegistryCoverage: - def test_25_builtins_registered(self) -> None: - """All 25 builtin functions should be in the registry.""" + def test_33_builtins_registered(self) -> None: + """All 33 builtin functions should be in the registry.""" from wolfxl.calc._functions import FunctionRegistry reg = FunctionRegistry() @@ -341,6 +341,8 @@ def test_25_builtins_registered(self) -> None: "IF", "IFERROR", "AND", "OR", "NOT", "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + "INDEX", "MATCH", "XLOOKUP", "CHOOSE", + "SUMIF", "SUMIFS", "COUNTIF", "COUNTIFS", } assert expected == reg.supported_functions @@ -353,6 +355,8 @@ def test_each_builtin_callable_from_evaluator(self) -> None: "IF", "IFERROR", "AND", "OR", "NOT", "COUNT", "COUNTA", "MIN", "MAX", "AVERAGE", "LEFT", "RIGHT", "MID", "LEN", "CONCATENATE", + "INDEX", "MATCH", "XLOOKUP", "CHOOSE", + "SUMIF", "SUMIFS", "COUNTIF", "COUNTIFS", ]: assert ev._functions.has(name), f"Missing builtin: {name}" diff --git a/tests/test_calc_functions.py b/tests/test_calc_functions.py index 47ca7a5..2d5aead 100644 --- a/tests/test_calc_functions.py +++ b/tests/test_calc_functions.py @@ -12,8 +12,8 @@ class TestWhitelist: - def test_whitelist_has_39_functions(self) -> None: - assert len(FUNCTION_WHITELIST_V1) == 39 + def test_whitelist_has_43_functions(self) -> None: + assert len(FUNCTION_WHITELIST_V1) == 43 def test_all_categories_represented(self) -> None: categories = set(FUNCTION_WHITELIST_V1.values()) diff --git a/tests/test_calc_lookup_conditional.py b/tests/test_calc_lookup_conditional.py new file mode 100644 index 0000000..eb98233 --- /dev/null +++ b/tests/test_calc_lookup_conditional.py @@ -0,0 +1,500 @@ +"""Tests for lookup and conditional aggregation builtins. + +Covers: INDEX, MATCH, XLOOKUP, CHOOSE, SUMIF, SUMIFS, COUNTIF, COUNTIFS, +the ``&`` string concatenation operator, RangeValue backward compatibility, +and perturbation propagation through lookup/conditional chains. +""" + +from __future__ import annotations + +import pytest + +import wolfxl +from wolfxl.calc._evaluator import WorkbookEvaluator +from wolfxl.calc._functions import RangeValue, _match_criteria, _parse_criteria + + +# --------------------------------------------------------------------------- +# Helper: build workbook with data + formulas +# --------------------------------------------------------------------------- + + +def _make_wb(data: dict[str, object], formulas: dict[str, str]) -> wolfxl.Workbook: + wb = wolfxl.Workbook() + ws = wb.active + for ref, val in data.items(): + ws[ref] = val + for ref, formula in formulas.items(): + ws[ref] = formula + return wb + + +def _calc(wb: wolfxl.Workbook) -> dict[str, object]: + ev = WorkbookEvaluator() + ev.load(wb) + return ev.calculate() + + +# --------------------------------------------------------------------------- +# RangeValue unit tests +# --------------------------------------------------------------------------- + + +class TestRangeValue: + def test_get_2d(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=2, n_cols=3) + assert rv.get(1, 1) == 1 + assert rv.get(1, 3) == 3 + assert rv.get(2, 2) == 5 + + def test_get_out_of_bounds(self) -> None: + rv = RangeValue(values=[1, 2, 3], n_rows=3, n_cols=1) + assert rv.get(4, 1) is None + assert rv.get(0, 1) is None + + def test_column_extraction(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=3, n_cols=2) + assert rv.column(1) == [1, 3, 5] + assert rv.column(2) == [2, 4, 6] + + def test_row_extraction(self) -> None: + rv = RangeValue(values=[1, 2, 3, 4, 5, 6], n_rows=3, n_cols=2) + assert rv.row(1) == [1, 2] + assert rv.row(3) == [5, 6] + + def test_iterable_and_len(self) -> None: + rv = RangeValue(values=[10, 20, 30], n_rows=3, n_cols=1) + assert list(rv) == [10, 20, 30] + assert len(rv) == 3 + + +# --------------------------------------------------------------------------- +# RangeValue backward compatibility with existing builtins +# --------------------------------------------------------------------------- + + +class TestRangeValueBackwardCompat: + def test_sum_with_range_value(self) -> None: + """SUM should still work when args contain RangeValue.""" + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30}, + {"B1": "=SUM(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 60.0 + + def test_and_with_range_value(self) -> None: + wb = _make_wb( + {"A1": True, "A2": True, "A3": True}, + {"B1": "=AND(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] is True + + def test_or_with_range_value(self) -> None: + wb = _make_wb( + {"A1": False, "A2": True, "A3": False}, + {"B1": "=OR(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] is True + + def test_counta_with_range_value(self) -> None: + wb = _make_wb( + {"A1": "hello", "A2": None, "A3": 42}, + {"B1": "=COUNTA(A1:A3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + +# --------------------------------------------------------------------------- +# INDEX tests +# --------------------------------------------------------------------------- + + +class TestBuiltinIndex: + def test_1d_column_vector(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40, "A5": 50}, + {"B1": "=INDEX(A1:A5,3)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 30 + + def test_2d_array(self) -> None: + wb = _make_wb( + {"A1": 1, "B1": 2, "C1": 3, + "A2": 4, "B2": 5, "C2": 6, + "A3": 7, "B3": 8, "C3": 9}, + {"D1": "=INDEX(A1:C3,2,2)"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 5 + + def test_1d_horizontal(self) -> None: + wb = _make_wb( + {"A1": 100, "B1": 200, "C1": 300}, + {"D1": "=INDEX(A1:C1,2)"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 200 + + def test_nested_index_match(self) -> None: + """The critical INDEX/MATCH pattern used in financial models.""" + wb = _make_wb( + {"A1": "Revenue", "A2": "COGS", "A3": "OpEx", "A4": "Tax", "A5": "NetInc", + "B1": 1000, "B2": 600, "B3": 200, "B4": 50, "B5": 150, + "C1": "COGS"}, + {"D1": "=INDEX(B1:B5,MATCH(C1,A1:A5,0))"}, + ) + results = _calc(wb) + assert results["Sheet!D1"] == 600 + + def test_out_of_bounds(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20}, + {"B1": "=INDEX(A1:A2,5)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "#REF!" + + +# --------------------------------------------------------------------------- +# MATCH tests +# --------------------------------------------------------------------------- + + +class TestBuiltinMatch: + def test_exact_match_numeric(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40, "A5": 50}, + {"B1": "=MATCH(30,A1:A5,0)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 3 + + def test_case_insensitive_string(self) -> None: + wb = _make_wb( + {"A1": "Apple", "A2": "Banana", "A3": "Cherry"}, + {"B1": '=MATCH("banana",A1:A3,0)'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2 + + def test_not_found(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3}, + {"B1": "=MATCH(99,A1:A3,0)"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "#N/A" + + def test_approximate_match_ascending(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 20, "A3": 30, "A4": 40}, + {"B1": "=MATCH(25,A1:A4,1)"}, + ) + results = _calc(wb) + # Largest <= 25 is 20 at position 2 + assert results["Sheet!B1"] == 2 + + +# --------------------------------------------------------------------------- +# XLOOKUP tests +# --------------------------------------------------------------------------- + + +class TestBuiltinXlookup: + def test_basic_exact(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3, + "B1": "Red", "B2": "Green", "B3": "Blue"}, + {"C1": "=XLOOKUP(2,A1:A3,B1:B3)"}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == "Green" + + def test_not_found_default(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 3, + "B1": "Red", "B2": "Green", "B3": "Blue"}, + {"C1": '=XLOOKUP(99,A1:A3,B1:B3,"Not found")'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == "Not found" + + def test_string_lookup(self) -> None: + wb = _make_wb( + {"A1": "Revenue", "A2": "COGS", "A3": "OpEx", + "B1": 1000, "B2": 600, "B3": 200}, + {"C1": '=XLOOKUP("COGS",A1:A3,B1:B3)'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 600 + + +# --------------------------------------------------------------------------- +# CHOOSE tests +# --------------------------------------------------------------------------- + + +class TestBuiltinChoose: + def test_basic_selection(self) -> None: + wb = _make_wb( + {}, + {"A1": '=CHOOSE(2,"a","b","c")'}, + ) + results = _calc(wb) + assert results["Sheet!A1"] == "b" + + def test_with_cell_refs(self) -> None: + wb = _make_wb( + {"A1": 3, "B1": 100, "B2": 200, "B3": 300}, + {"C1": "=CHOOSE(A1,B1,B2,B3)"}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 300 + + +# --------------------------------------------------------------------------- +# SUMIF tests +# --------------------------------------------------------------------------- + + +class TestBuiltinSumif: + def test_operator_criteria(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20, + "B1": 1, "B2": 2, "B3": 3, "B4": 4, "B5": 5}, + {"C1": '=SUMIF(A1:A5,">50",B1:B5)'}, + ) + results = _calc(wb) + # A2=60 and A4=80 match >50 -> B2+B4 = 2+4 = 6 + assert results["Sheet!C1"] == 6.0 + + def test_string_exact_match(self) -> None: + wb = _make_wb( + {"A1": "Sales", "A2": "Marketing", "A3": "Sales", "A4": "Engineering", + "B1": 100, "B2": 200, "B3": 300, "B4": 400}, + {"C1": '=SUMIF(A1:A4,"Sales",B1:B4)'}, + ) + results = _calc(wb) + assert results["Sheet!C1"] == 400.0 # 100 + 300 + + def test_wildcard_criteria(self) -> None: + wb = _make_wb( + {"A1": "apple", "A2": "apricot", "A3": "banana", "A4": "avocado", + "B1": 10, "B2": 20, "B3": 30, "B4": 40}, + {"C1": '=SUMIF(A1:A4,"a*",B1:B4)'}, + ) + results = _calc(wb) + # apple, apricot, avocado match "a*" -> 10+20+40 = 70 + assert results["Sheet!C1"] == 70.0 + + def test_no_sum_range(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20}, + {"B1": '=SUMIF(A1:A5,">50")'}, + ) + results = _calc(wb) + # Sums criteria range itself: 60 + 80 = 140 + assert results["Sheet!B1"] == 140.0 + + +# --------------------------------------------------------------------------- +# SUMIFS tests +# --------------------------------------------------------------------------- + + +class TestBuiltinSumifs: + def test_two_criteria(self) -> None: + wb = _make_wb( + {"A1": 20, "A2": 5, "A3": 30, "A4": 15, "A5": 25, + "B1": "Sales", "B2": "Sales", "B3": "Marketing", "B4": "Sales", "B5": "Sales", + "C1": 100, "C2": 200, "C3": 300, "C4": 400, "C5": 500}, + {"D1": '=SUMIFS(C1:C5,A1:A5,">10",B1:B5,"Sales")'}, + ) + results = _calc(wb) + # A>10 AND B="Sales": rows 1 (20,Sales,100), 4 (15,Sales,400), 5 (25,Sales,500) = 1000 + assert results["Sheet!D1"] == 1000.0 + + def test_numeric_criteria_pair(self) -> None: + wb = _make_wb( + {"A1": 1, "A2": 2, "A3": 1, "A4": 2, + "B1": 10, "B2": 10, "B3": 20, "B4": 20, + "C1": 100, "C2": 200, "C3": 300, "C4": 400}, + {"D1": "=SUMIFS(C1:C4,A1:A4,2,B1:B4,20)"}, + ) + results = _calc(wb) + # A=2 AND B=20: row 4 -> C4=400 + assert results["Sheet!D1"] == 400.0 + + +# --------------------------------------------------------------------------- +# COUNTIF tests +# --------------------------------------------------------------------------- + + +class TestBuiltinCountif: + def test_count_gt_50(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, "A5": 20}, + {"B1": '=COUNTIF(A1:A5,">50")'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + def test_string_match(self) -> None: + wb = _make_wb( + {"A1": "Yes", "A2": "No", "A3": "yes", "A4": "YES"}, + {"B1": '=COUNTIF(A1:A4,"Yes")'}, + ) + results = _calc(wb) + # Case-insensitive: all 3 "yes" variants match + assert results["Sheet!B1"] == 3.0 + + def test_wildcard(self) -> None: + wb = _make_wb( + {"A1": "abc", "A2": "def", "A3": "abx", "A4": None}, + {"B1": '=COUNTIF(A1:A4,"ab*")'}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == 2.0 + + +# --------------------------------------------------------------------------- +# COUNTIFS tests +# --------------------------------------------------------------------------- + + +class TestBuiltinCountifs: + def test_dual_criteria(self) -> None: + wb = _make_wb( + {"A1": "Sales", "A2": "Marketing", "A3": "Sales", "A4": "Sales", + "B1": 100, "B2": 200, "B3": 50, "B4": 150}, + {"C1": '=COUNTIFS(A1:A4,"Sales",B1:B4,">80")'}, + ) + results = _calc(wb) + # Sales AND >80: rows 1 (Sales,100) and 4 (Sales,150) = 2 + assert results["Sheet!C1"] == 2.0 + + +# --------------------------------------------------------------------------- +# & string concatenation operator tests +# --------------------------------------------------------------------------- + + +class TestAmpersandOperator: + def test_basic_string_concat(self) -> None: + wb = _make_wb( + {}, + {"A1": '="Hello"&" "&"World"'}, + ) + results = _calc(wb) + assert results["Sheet!A1"] == "Hello World" + + def test_dynamic_criteria_with_sumif(self) -> None: + wb = _make_wb( + {"A1": 10, "A2": 60, "A3": 30, "A4": 80, + "B1": 1, "B2": 2, "B3": 3, "B4": 4, + "C1": 50}, + {"D1": '=SUMIF(A1:A4,">"&C1,B1:B4)'}, + ) + results = _calc(wb) + # ">"&50 = ">50" -> A2=60, A4=80 match -> B2+B4 = 2+4 = 6 + assert results["Sheet!D1"] == 6.0 + + def test_cell_ref_concat(self) -> None: + wb = _make_wb( + {"A1": "Hello", "A2": " World"}, + {"B1": "=A1&A2"}, + ) + results = _calc(wb) + assert results["Sheet!B1"] == "Hello World" + + +# --------------------------------------------------------------------------- +# Criteria engine unit tests +# --------------------------------------------------------------------------- + + +class TestCriteriaEngine: + def test_numeric_exact(self) -> None: + assert _match_criteria(100, 100) is True + assert _match_criteria(100, 99) is False + + def test_operator_gt(self) -> None: + pred = _parse_criteria(">50") + assert pred(60) is True + assert pred(50) is False + assert pred(40) is False + + def test_operator_not_equal(self) -> None: + pred = _parse_criteria("<>0") + assert pred(1) is True + assert pred(0) is False + + def test_wildcard(self) -> None: + pred = _parse_criteria("app*") + assert pred("apple") is True + assert pred("application") is True + assert pred("banana") is False + + def test_none_handling(self) -> None: + pred = _parse_criteria(">0") + assert pred(None) is False + + +# --------------------------------------------------------------------------- +# Perturbation propagation tests +# --------------------------------------------------------------------------- + + +class TestPerturbationPropagation: + def test_perturbation_through_index_match(self) -> None: + """Perturbing a data cell should propagate through INDEX/MATCH.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "X" + ws["A2"] = "Y" + ws["A3"] = "Z" + ws["B1"] = 100 + ws["B2"] = 200 + ws["B3"] = 300 + ws["C1"] = "Y" + ws["D1"] = "=INDEX(B1:B3,MATCH(C1,A1:A3,0))" + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!D1"] == 200 + + # Perturb B2 (the cell INDEX/MATCH resolves to) + recalc = ev.recalculate({"Sheet!B2": 999}) + delta_map = {d.cell_ref: d for d in recalc.deltas} + assert "Sheet!D1" in delta_map + assert delta_map["Sheet!D1"].new_value == 999 + + def test_perturbation_through_sumif(self) -> None: + """Perturbing a sum_range cell should propagate through SUMIF.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Sales" + ws["A2"] = "Marketing" + ws["A3"] = "Sales" + ws["B1"] = 100 + ws["B2"] = 200 + ws["B3"] = 300 + ws["C1"] = '=SUMIF(A1:A3,"Sales",B1:B3)' + + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!C1"] == 400.0 + + # Perturb B1 -> should change SUMIF result + recalc = ev.recalculate({"Sheet!B1": 500}) + delta_map = {d.cell_ref: d for d in recalc.deltas} + assert "Sheet!C1" in delta_map + assert delta_map["Sheet!C1"].new_value == 800.0 # 500 + 300 From 73831a15d4b2ed03772b47421b8bdb2b7c6f07c3 Mon Sep 17 00:00:00 2001 From: Wolfgang Schoenberger <221313372+wolfiesch@users.noreply.github.com> Date: Fri, 20 Feb 2026 01:41:10 -0800 Subject: [PATCH 3/3] fix: address PR #3 review comments - POWER: return #NUM! for negative base with fractional exponent instead of leaking complex numbers (Copilot #2, Codex #11) - LEFT: return #VALUE! for negative num_chars (Copilot #1) - MID: return #VALUE! for start < 1 or num_chars < 0 (Copilot #8) - range_shape: use min/max instead of abs() for consistency with expand_range (Copilot #3) - Add @_requires_formulas skip marker to formulas-dependent test classes so CI passes without wolfxl[calc] extra (Codex #10) - Add 4 edge case tests: POWER(-1,0.5), LEFT(-1), MID(0,..), MID(.,-1) Co-Authored-By: Claude Opus 4.6 --- python/wolfxl/calc/_functions.py | 9 +++- python/wolfxl/calc/_parser.py | 5 ++- tests/test_calc_formulas_integration.py | 59 +++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/python/wolfxl/calc/_functions.py b/python/wolfxl/calc/_functions.py index 6646684..9e48867 100644 --- a/python/wolfxl/calc/_functions.py +++ b/python/wolfxl/calc/_functions.py @@ -306,12 +306,15 @@ def _builtin_mod(args: list[Any]) -> float: return nums[0] - nums[1] * math.floor(nums[0] / nums[1]) -def _builtin_power(args: list[Any]) -> float: +def _builtin_power(args: list[Any]) -> float | str: if len(args) != 2: raise ValueError("POWER requires exactly 2 arguments") nums = _coerce_numeric(args) if len(nums) != 2: raise ValueError("POWER: non-numeric argument") + # Excel returns #NUM! for negative base with fractional exponent + if nums[0] < 0 and not float(nums[1]).is_integer(): + return "#NUM!" return nums[0] ** nums[1] @@ -355,6 +358,8 @@ def _builtin_left(args: list[Any]) -> str: raise ValueError("LEFT requires 1 or 2 arguments") text = _coerce_string(args[0]) num_chars = int(_coerce_numeric([args[1]])[0]) if len(args) > 1 else 1 + if num_chars < 0: + return "#VALUE!" return text[:num_chars] @@ -372,6 +377,8 @@ def _builtin_mid(args: list[Any]) -> str: text = _coerce_string(args[0]) start = int(_coerce_numeric([args[1]])[0]) num_chars = int(_coerce_numeric([args[2]])[0]) + if start < 1 or num_chars < 0: + return "#VALUE!" # Excel MID is 1-indexed return text[start - 1 : start - 1 + num_chars] diff --git a/python/wolfxl/calc/_parser.py b/python/wolfxl/calc/_parser.py index 567588d..f8454fb 100644 --- a/python/wolfxl/calc/_parser.py +++ b/python/wolfxl/calc/_parser.py @@ -168,8 +168,9 @@ def range_shape(range_ref: str) -> tuple[int, int]: start_row, start_col = a1_to_rowcol(parts[0].replace("$", "")) end_row, end_col = a1_to_rowcol(parts[1].replace("$", "")) - n_rows = abs(end_row - start_row) + 1 - n_cols = abs(end_col - start_col) + 1 + # Use min/max for consistency with expand_range() + n_rows = max(start_row, end_row) - min(start_row, end_row) + 1 + n_cols = max(start_col, end_col) - min(start_col, end_col) + 1 return (n_rows, n_cols) diff --git a/tests/test_calc_formulas_integration.py b/tests/test_calc_formulas_integration.py index d9928ec..da08e01 100644 --- a/tests/test_calc_formulas_integration.py +++ b/tests/test_calc_formulas_integration.py @@ -11,6 +11,17 @@ import wolfxl from wolfxl.calc._evaluator import WorkbookEvaluator +_has_formulas = pytest.importorskip is not None # always True, but we check below +try: + import formulas # noqa: F401 + _has_formulas = True +except ImportError: + _has_formulas = False + +_requires_formulas = pytest.mark.skipif( + not _has_formulas, reason="formulas library not installed (install wolfxl[calc])" +) + # --------------------------------------------------------------------------- # New builtin math functions @@ -93,6 +104,17 @@ def test_fractional_exponent(self) -> None: results = ev.calculate() assert results["Sheet!B1"] == 2.0 + def test_negative_base_fractional_exponent(self) -> None: + """POWER(-1, 0.5) should return #NUM! (complex result).""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = -1 + ws["B1"] = "=POWER(A1,0.5)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#NUM!" + class TestBuiltinSqrt: def test_basic(self) -> None: @@ -164,6 +186,17 @@ def test_default_one_char(self) -> None: results = ev.calculate() assert results["Sheet!B1"] == "H" + def test_negative_num_chars(self) -> None: + """LEFT with negative count returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=LEFT(A1,-1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + class TestBuiltinRight: def test_basic(self) -> None: @@ -188,6 +221,28 @@ def test_basic(self) -> None: results = ev.calculate() assert results["Sheet!B1"] == "World" + def test_start_below_one(self) -> None: + """MID with start < 1 returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=MID(A1,0,3)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + + def test_negative_num_chars(self) -> None: + """MID with negative num_chars returns #VALUE!.""" + wb = wolfxl.Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["B1"] = "=MID(A1,1,-1)" + ev = WorkbookEvaluator() + ev.load(wb) + results = ev.calculate() + assert results["Sheet!B1"] == "#VALUE!" + class TestBuiltinLen: def test_basic(self) -> None: @@ -220,6 +275,7 @@ def test_basic(self) -> None: # --------------------------------------------------------------------------- +@_requires_formulas class TestFormulasConstantFallback: """Formulas that use non-builtin functions with only literal arguments.""" @@ -254,6 +310,7 @@ def test_sln(self) -> None: # --------------------------------------------------------------------------- +@_requires_formulas class TestFormulasCellRefFallback: """Formulas that use non-builtin functions with cell references.""" @@ -302,6 +359,7 @@ def test_npv(self) -> None: # --------------------------------------------------------------------------- +@_requires_formulas class TestFormulasFallbackPerturbation: """Verify perturbation propagates through formulas-lib-evaluated cells.""" @@ -366,6 +424,7 @@ def test_each_builtin_callable_from_evaluator(self) -> None: # --------------------------------------------------------------------------- +@_requires_formulas class TestCombinedEvaluation: """Workbook mixing builtin-evaluated and formulas-lib-evaluated formulas."""