Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/wolfxl/calc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""wolfxl.calc - Formula evaluation engine for wolfxl workbooks."""

from wolfxl.calc._evaluator import WorkbookEvaluator
from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, is_supported
from wolfxl.calc._functions import FUNCTION_WHITELIST_V1, FunctionRegistry, RangeValue, is_supported
from wolfxl.calc._graph import DependencyGraph
from wolfxl.calc._parser import FormulaParser, all_references, expand_range
from wolfxl.calc._protocol import CalcEngine, CellDelta, RecalcResult
Expand All @@ -13,6 +13,7 @@
"FUNCTION_WHITELIST_V1",
"FormulaParser",
"FunctionRegistry",
"RangeValue",
"RecalcResult",
"WorkbookEvaluator",
"all_references",
Expand Down
181 changes: 171 additions & 10 deletions python/wolfxl/calc/_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,46 @@
Replaces fragile regex-based dispatch with a proper recursive descent
parser that handles balanced parentheses, operator precedence, and
arbitrarily nested expressions like ``=ROUND(SUM(A1:A5)*IF(B1>0,1.1,1.0),2)``.

When the ``formulas`` library is installed (via ``wolfxl[calc]``), unsupported
functions fall back to the library's Excel function implementations.
"""

from __future__ import annotations

import inspect
import logging
import re
from typing import TYPE_CHECKING, Any

from wolfxl.calc._functions import FunctionRegistry
from wolfxl.calc._functions import FunctionRegistry, RangeValue
from wolfxl.calc._graph import DependencyGraph
from wolfxl.calc._parser import expand_range
from wolfxl.calc._parser import expand_range, range_shape
from wolfxl.calc._protocol import CellDelta, RecalcResult

if TYPE_CHECKING:
from wolfxl._workbook import Workbook

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# formulas library availability
# ---------------------------------------------------------------------------

_formulas_available: bool | None = None


def _check_formulas() -> bool:
global _formulas_available
if _formulas_available is None:
try:
import formulas # noqa: F401

_formulas_available = True
except ImportError:
_formulas_available = False
return _formulas_available


# ---------------------------------------------------------------------------
# Expression parsing helpers
Expand Down Expand Up @@ -121,7 +143,7 @@ def _find_top_level_split(expr: str) -> tuple[str, str, str] | None:
matched_op = ch
elif ch == '=' and not (i >= 1 and expr[i - 1] in ('>', '<', '!')):
matched_op = ch
elif pass_type == "add" and ch in ('+', '-'):
elif pass_type == "add" and ch in ('+', '-', '&'):
matched_op = ch
elif pass_type == "mul" and ch in ('*', '/'):
matched_op = ch
Expand Down Expand Up @@ -169,7 +191,9 @@ def _has_top_level_colon(expr: str) -> bool:


def _binary_op(left: Any, op: str, right: Any) -> Any:
"""Evaluate an arithmetic binary operation."""
"""Evaluate an arithmetic or string binary operation."""
if op == '&':
return str(left if left is not None else "") + str(right if right is not None else "")
if not isinstance(left, (int, float)) or not isinstance(right, (int, float)):
return None
if op == '+':
Expand Down Expand Up @@ -261,6 +285,8 @@ def __init__(self) -> None:
self._graph = DependencyGraph()
self._functions = FunctionRegistry()
self._loaded = False
self._use_formulas = _check_formulas()
self._compiled_cache: dict[str, Any] = {} # formula -> compiled callable

def load(self, workbook: Workbook) -> None:
"""Scan workbook, store cell values, build dependency graph."""
Expand Down Expand Up @@ -357,14 +383,25 @@ def recalculate(
# ------------------------------------------------------------------

def _evaluate_formula(self, cell_ref: str, formula: str) -> Any:
"""Evaluate a single formula string (starting with ``=``)."""
"""Evaluate a single formula string (starting with ``=``).

Tries the builtin recursive descent evaluator first. If that returns
None (unsupported function), falls back to the ``formulas`` library.
"""
body = formula.strip()
if body.startswith('='):
body = body[1:]
sheet = self._sheet_from_ref(cell_ref)
result = self._eval_expr(body.strip(), sheet)
if result is not None:
return result

# Fallback: try the formulas library for unsupported functions
if self._use_formulas:
fb = self._formulas_fallback(formula, sheet)
if fb is not None:
return fb

logger.debug("Cannot evaluate formula %r in %s", formula, cell_ref)
return None

Expand Down Expand Up @@ -392,7 +429,7 @@ def _eval_expr(self, expr: str, sheet: str) -> Any:
left_str, op, right_str = split
left_val = self._eval_expr(left_str, sheet)
right_val = self._eval_expr(right_str, sheet)
if op in ('+', '-', '*', '/'):
if op in ('+', '-', '*', '/', '&'):
return _binary_op(left_val, op, right_val)
return _compare(left_val, right_val, op)

Expand Down Expand Up @@ -457,7 +494,10 @@ def _resolve_cell_ref(self, expr: str, sheet: str) -> Any:
return self._cell_values.get(ref)

def _resolve_range(self, arg: str, sheet: str) -> list[Any]:
"""Resolve a range like ``A1:A5`` to a list of cell values."""
"""Resolve a range like ``A1:A5`` to a flat list of cell values.

Kept for the ``formulas`` library fallback which needs flat lists.
"""
clean = arg.strip().replace('$', '')
if '!' not in clean:
range_ref = f"{sheet}!{clean.upper()}"
Expand All @@ -468,6 +508,20 @@ def _resolve_range(self, arg: str, sheet: str) -> list[Any]:
cells = expand_range(range_ref)
return [self._cell_values.get(c) for c in cells]

def _resolve_range_2d(self, arg: str, sheet: str) -> RangeValue:
"""Resolve a range to a :class:`RangeValue` preserving 2D shape."""
clean = arg.strip().replace('$', '')
if '!' not in clean:
range_ref = f"{sheet}!{clean.upper()}"
else:
parts = clean.split('!', 1)
ref_sheet = parts[0].strip("'")
range_ref = f"{ref_sheet}!{parts[1].upper()}"
cells = expand_range(range_ref)
n_rows, n_cols = range_shape(range_ref)
values = [self._cell_values.get(c) for c in cells]
return RangeValue(values=values, n_rows=n_rows, n_cols=n_cols)

# ------------------------------------------------------------------
# Function dispatch
# ------------------------------------------------------------------
Expand Down Expand Up @@ -533,18 +587,125 @@ def _parse_function_args(self, args_str: str, sheet: str) -> list[Any]:
def _resolve_arg(self, arg: str, sheet: str) -> Any:
"""Resolve a single function argument.

Range references (containing ``:`` at depth 0) return a list of
cell values. Everything else delegates to ``_eval_expr``.
Range references (containing ``:`` at depth 0) return a
:class:`RangeValue` with 2D shape metadata. Everything else
delegates to ``_eval_expr``.
"""
if not arg:
return None

# Range reference at top level
if _has_top_level_colon(arg) and not arg.startswith('"'):
return self._resolve_range(arg, sheet)
return self._resolve_range_2d(arg, sheet)

return self._eval_expr(arg, sheet)

# ------------------------------------------------------------------
# formulas library fallback
# ------------------------------------------------------------------

def _formulas_fallback(self, formula: str, sheet: str) -> Any:
"""Evaluate a formula via the ``formulas`` library.

Compiles the formula into a callable, resolves its cell reference
parameters from ``_cell_values``, and returns the scalar result.
"""
import formulas as fm
import numpy as np
Comment on lines +613 to +614
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The formulas library and numpy are imported inside the _formulas_fallback method rather than at module level. While this lazy importing pattern is intentional to avoid dependencies when the formulas library is not installed, it means these imports will be executed on every call to _formulas_fallback. Consider moving the imports to the top of the method or caching the imported modules as class attributes after the first import.

Suggested change
import formulas as fm
import numpy as np
# Lazily import and cache the optional formulas and numpy modules
fm = getattr(self, "_formulas_module", None)
np = getattr(self, "_numpy_module", None)
if fm is None or np is None:
try:
import formulas as _fm # type: ignore[import-not-found]
import numpy as _np
except ImportError:
logger.debug(
"formulas fallback requested, but 'formulas' and/or 'numpy' "
"are not installed"
)
return None
self._formulas_module = fm = _fm
self._numpy_module = np = _np

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lazy import is intentional - formulas and numpy are optional extras (wolfxl[calc]). This method only runs as a fallback for functions without builtins. Module-level caching would add complexity for marginal benefit since Python already caches imported modules in sys.modules.


# Compile (with caching)
compiled = self._compiled_cache.get(formula)
if compiled is None:
try:
result = fm.Parser().ast(formula)
if result and len(result) > 1:
compiled = result[1].compile()
self._compiled_cache[formula] = compiled
except Exception:
logger.debug("formulas: cannot compile %r", formula)
return None
if compiled is None:
return None

# Resolve parameters: the compiled function's signature tells us
# which cell references it needs (e.g., "A1:A5", "B1")
try:
params = list(inspect.signature(compiled).parameters.keys())
except (ValueError, TypeError):
params = []

if not params:
# No cell references - purely constant formula (e.g., =PMT(0.05/12,360,200000))
try:
raw = compiled()
return self._normalize_formulas_result(raw)
except Exception as e:
logger.debug("formulas: error evaluating %r: %s", formula, e)
return None

# Map parameter names to cell values
args: list[Any] = []
for param in params:
# Param names from formulas lib use the formula's raw ref tokens
# like "A1:A5" or "A1" (no sheet prefix for same-sheet refs)
if ':' in param:
# Range parameter - resolve to numpy array
# Qualify with sheet name for range_shape parsing
qualified = param if '!' in param else f"{sheet}!{param}"
values = self._resolve_range(param, sheet)
flat = np.array([v if v is not None else 0 for v in values])
n_rows, n_cols = range_shape(qualified)
if n_cols > 1 and flat.size == n_rows * n_cols:
flat = flat.reshape(n_rows, n_cols)
args.append(flat)
Comment on lines +651 to +660
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When converting None values to 0 for numpy arrays (line 636), this could lead to incorrect results for functions that should distinguish between empty cells and zeros. For example, COUNTA should count non-empty cells, but this conversion would make None cells appear as zeros. Consider preserving None values or using numpy's nan for missing values, depending on what the formulas library expects.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentional: the formulas library requires numpy arrays with numeric values. The None→0 coercion is scoped to the formulas fallback path only - builtin functions like COUNTA handle None directly. Using np.nan would break formulas lib function signatures that expect float arrays.

else:
# Single cell parameter
val = self._resolve_cell_ref(param, sheet)
if isinstance(val, (int, float)):
args.append(np.float64(val))
elif isinstance(val, str):
args.append(val)
else:
args.append(np.float64(0) if val is None else val)

try:
raw = compiled(*args)
return self._normalize_formulas_result(raw)
except Exception as e:
logger.debug("formulas: error evaluating %r: %s", formula, e)
return None

@staticmethod
def _normalize_formulas_result(raw: Any) -> Any:
"""Convert a ``formulas`` library result to a plain Python value."""
if raw is None:
return None
# numpy scalar types
if hasattr(raw, 'item'):
try:
val = raw.item()
if isinstance(val, float) and val == int(val):
return int(val)
return val
except (ValueError, TypeError):
pass
# numpy array with single element
if hasattr(raw, 'shape') and hasattr(raw, 'flat'):
try:
if raw.size == 1:
val = raw.flat[0]
if hasattr(val, 'item'):
val = val.item()
if isinstance(val, float) and val == int(val):
return int(val)
return val
except (ValueError, TypeError, IndexError):
pass
# Already a plain Python type
if isinstance(raw, (int, float, str, bool)):
return raw
return raw

@staticmethod
def _sheet_from_ref(cell_ref: str) -> str:
"""Extract sheet name from a canonical cell reference."""
Expand Down
Loading
Loading