-
Notifications
You must be signed in to change notification settings - Fork 0
feat(calc): formulas fallback + 18 new builtins (lookup & conditional) #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,24 +3,46 @@ | |
| Replaces fragile regex-based dispatch with a proper recursive descent | ||
| parser that handles balanced parentheses, operator precedence, and | ||
| arbitrarily nested expressions like ``=ROUND(SUM(A1:A5)*IF(B1>0,1.1,1.0),2)``. | ||
|
|
||
| When the ``formulas`` library is installed (via ``wolfxl[calc]``), unsupported | ||
| functions fall back to the library's Excel function implementations. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import inspect | ||
| import logging | ||
| import re | ||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| from wolfxl.calc._functions import FunctionRegistry | ||
| from wolfxl.calc._functions import FunctionRegistry, RangeValue | ||
| from wolfxl.calc._graph import DependencyGraph | ||
| from wolfxl.calc._parser import expand_range | ||
| from wolfxl.calc._parser import expand_range, range_shape | ||
| from wolfxl.calc._protocol import CellDelta, RecalcResult | ||
|
|
||
| if TYPE_CHECKING: | ||
| from wolfxl._workbook import Workbook | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # formulas library availability | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| _formulas_available: bool | None = None | ||
|
|
||
|
|
||
| def _check_formulas() -> bool: | ||
| global _formulas_available | ||
| if _formulas_available is None: | ||
| try: | ||
| import formulas # noqa: F401 | ||
|
|
||
| _formulas_available = True | ||
| except ImportError: | ||
| _formulas_available = False | ||
| return _formulas_available | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Expression parsing helpers | ||
|
|
@@ -121,7 +143,7 @@ def _find_top_level_split(expr: str) -> tuple[str, str, str] | None: | |
| matched_op = ch | ||
| elif ch == '=' and not (i >= 1 and expr[i - 1] in ('>', '<', '!')): | ||
| matched_op = ch | ||
| elif pass_type == "add" and ch in ('+', '-'): | ||
| elif pass_type == "add" and ch in ('+', '-', '&'): | ||
| matched_op = ch | ||
| elif pass_type == "mul" and ch in ('*', '/'): | ||
| matched_op = ch | ||
|
|
@@ -169,7 +191,9 @@ def _has_top_level_colon(expr: str) -> bool: | |
|
|
||
|
|
||
| def _binary_op(left: Any, op: str, right: Any) -> Any: | ||
| """Evaluate an arithmetic binary operation.""" | ||
| """Evaluate an arithmetic or string binary operation.""" | ||
| if op == '&': | ||
| return str(left if left is not None else "") + str(right if right is not None else "") | ||
| if not isinstance(left, (int, float)) or not isinstance(right, (int, float)): | ||
| return None | ||
| if op == '+': | ||
|
|
@@ -261,6 +285,8 @@ def __init__(self) -> None: | |
| self._graph = DependencyGraph() | ||
| self._functions = FunctionRegistry() | ||
| self._loaded = False | ||
| self._use_formulas = _check_formulas() | ||
| self._compiled_cache: dict[str, Any] = {} # formula -> compiled callable | ||
|
|
||
| def load(self, workbook: Workbook) -> None: | ||
| """Scan workbook, store cell values, build dependency graph.""" | ||
|
|
@@ -357,14 +383,25 @@ def recalculate( | |
| # ------------------------------------------------------------------ | ||
|
|
||
| def _evaluate_formula(self, cell_ref: str, formula: str) -> Any: | ||
| """Evaluate a single formula string (starting with ``=``).""" | ||
| """Evaluate a single formula string (starting with ``=``). | ||
|
|
||
| Tries the builtin recursive descent evaluator first. If that returns | ||
| None (unsupported function), falls back to the ``formulas`` library. | ||
| """ | ||
| body = formula.strip() | ||
| if body.startswith('='): | ||
| body = body[1:] | ||
| sheet = self._sheet_from_ref(cell_ref) | ||
| result = self._eval_expr(body.strip(), sheet) | ||
| if result is not None: | ||
| return result | ||
|
|
||
| # Fallback: try the formulas library for unsupported functions | ||
| if self._use_formulas: | ||
| fb = self._formulas_fallback(formula, sheet) | ||
| if fb is not None: | ||
| return fb | ||
|
|
||
| logger.debug("Cannot evaluate formula %r in %s", formula, cell_ref) | ||
| return None | ||
|
|
||
|
|
@@ -392,7 +429,7 @@ def _eval_expr(self, expr: str, sheet: str) -> Any: | |
| left_str, op, right_str = split | ||
| left_val = self._eval_expr(left_str, sheet) | ||
| right_val = self._eval_expr(right_str, sheet) | ||
| if op in ('+', '-', '*', '/'): | ||
| if op in ('+', '-', '*', '/', '&'): | ||
| return _binary_op(left_val, op, right_val) | ||
| return _compare(left_val, right_val, op) | ||
|
|
||
|
|
@@ -457,7 +494,10 @@ def _resolve_cell_ref(self, expr: str, sheet: str) -> Any: | |
| return self._cell_values.get(ref) | ||
|
|
||
| def _resolve_range(self, arg: str, sheet: str) -> list[Any]: | ||
| """Resolve a range like ``A1:A5`` to a list of cell values.""" | ||
| """Resolve a range like ``A1:A5`` to a flat list of cell values. | ||
|
|
||
| Kept for the ``formulas`` library fallback which needs flat lists. | ||
| """ | ||
| clean = arg.strip().replace('$', '') | ||
| if '!' not in clean: | ||
| range_ref = f"{sheet}!{clean.upper()}" | ||
|
|
@@ -468,6 +508,20 @@ def _resolve_range(self, arg: str, sheet: str) -> list[Any]: | |
| cells = expand_range(range_ref) | ||
| return [self._cell_values.get(c) for c in cells] | ||
|
|
||
| def _resolve_range_2d(self, arg: str, sheet: str) -> RangeValue: | ||
| """Resolve a range to a :class:`RangeValue` preserving 2D shape.""" | ||
| clean = arg.strip().replace('$', '') | ||
| if '!' not in clean: | ||
| range_ref = f"{sheet}!{clean.upper()}" | ||
| else: | ||
| parts = clean.split('!', 1) | ||
| ref_sheet = parts[0].strip("'") | ||
| range_ref = f"{ref_sheet}!{parts[1].upper()}" | ||
| cells = expand_range(range_ref) | ||
| n_rows, n_cols = range_shape(range_ref) | ||
| values = [self._cell_values.get(c) for c in cells] | ||
| return RangeValue(values=values, n_rows=n_rows, n_cols=n_cols) | ||
|
|
||
| # ------------------------------------------------------------------ | ||
| # Function dispatch | ||
| # ------------------------------------------------------------------ | ||
|
|
@@ -533,18 +587,125 @@ def _parse_function_args(self, args_str: str, sheet: str) -> list[Any]: | |
| def _resolve_arg(self, arg: str, sheet: str) -> Any: | ||
| """Resolve a single function argument. | ||
|
|
||
| Range references (containing ``:`` at depth 0) return a list of | ||
| cell values. Everything else delegates to ``_eval_expr``. | ||
| Range references (containing ``:`` at depth 0) return a | ||
| :class:`RangeValue` with 2D shape metadata. Everything else | ||
| delegates to ``_eval_expr``. | ||
| """ | ||
| if not arg: | ||
| return None | ||
|
|
||
| # Range reference at top level | ||
| if _has_top_level_colon(arg) and not arg.startswith('"'): | ||
| return self._resolve_range(arg, sheet) | ||
| return self._resolve_range_2d(arg, sheet) | ||
|
|
||
| return self._eval_expr(arg, sheet) | ||
|
|
||
| # ------------------------------------------------------------------ | ||
| # formulas library fallback | ||
| # ------------------------------------------------------------------ | ||
|
|
||
| def _formulas_fallback(self, formula: str, sheet: str) -> Any: | ||
| """Evaluate a formula via the ``formulas`` library. | ||
|
|
||
| Compiles the formula into a callable, resolves its cell reference | ||
| parameters from ``_cell_values``, and returns the scalar result. | ||
| """ | ||
| import formulas as fm | ||
| import numpy as np | ||
|
|
||
| # Compile (with caching) | ||
| compiled = self._compiled_cache.get(formula) | ||
| if compiled is None: | ||
| try: | ||
| result = fm.Parser().ast(formula) | ||
| if result and len(result) > 1: | ||
| compiled = result[1].compile() | ||
| self._compiled_cache[formula] = compiled | ||
| except Exception: | ||
| logger.debug("formulas: cannot compile %r", formula) | ||
| return None | ||
| if compiled is None: | ||
| return None | ||
|
|
||
| # Resolve parameters: the compiled function's signature tells us | ||
| # which cell references it needs (e.g., "A1:A5", "B1") | ||
| try: | ||
| params = list(inspect.signature(compiled).parameters.keys()) | ||
| except (ValueError, TypeError): | ||
| params = [] | ||
|
|
||
| if not params: | ||
| # No cell references - purely constant formula (e.g., =PMT(0.05/12,360,200000)) | ||
| try: | ||
| raw = compiled() | ||
| return self._normalize_formulas_result(raw) | ||
| except Exception as e: | ||
| logger.debug("formulas: error evaluating %r: %s", formula, e) | ||
| return None | ||
|
|
||
| # Map parameter names to cell values | ||
| args: list[Any] = [] | ||
| for param in params: | ||
| # Param names from formulas lib use the formula's raw ref tokens | ||
| # like "A1:A5" or "A1" (no sheet prefix for same-sheet refs) | ||
| if ':' in param: | ||
| # Range parameter - resolve to numpy array | ||
| # Qualify with sheet name for range_shape parsing | ||
| qualified = param if '!' in param else f"{sheet}!{param}" | ||
| values = self._resolve_range(param, sheet) | ||
| flat = np.array([v if v is not None else 0 for v in values]) | ||
| n_rows, n_cols = range_shape(qualified) | ||
| if n_cols > 1 and flat.size == n_rows * n_cols: | ||
| flat = flat.reshape(n_rows, n_cols) | ||
| args.append(flat) | ||
|
Comment on lines
+651
to
+660
|
||
| else: | ||
| # Single cell parameter | ||
| val = self._resolve_cell_ref(param, sheet) | ||
| if isinstance(val, (int, float)): | ||
| args.append(np.float64(val)) | ||
| elif isinstance(val, str): | ||
| args.append(val) | ||
| else: | ||
| args.append(np.float64(0) if val is None else val) | ||
|
|
||
| try: | ||
| raw = compiled(*args) | ||
| return self._normalize_formulas_result(raw) | ||
| except Exception as e: | ||
| logger.debug("formulas: error evaluating %r: %s", formula, e) | ||
| return None | ||
|
|
||
| @staticmethod | ||
| def _normalize_formulas_result(raw: Any) -> Any: | ||
| """Convert a ``formulas`` library result to a plain Python value.""" | ||
| if raw is None: | ||
| return None | ||
| # numpy scalar types | ||
| if hasattr(raw, 'item'): | ||
| try: | ||
| val = raw.item() | ||
| if isinstance(val, float) and val == int(val): | ||
| return int(val) | ||
| return val | ||
| except (ValueError, TypeError): | ||
| pass | ||
| # numpy array with single element | ||
| if hasattr(raw, 'shape') and hasattr(raw, 'flat'): | ||
| try: | ||
| if raw.size == 1: | ||
| val = raw.flat[0] | ||
| if hasattr(val, 'item'): | ||
| val = val.item() | ||
| if isinstance(val, float) and val == int(val): | ||
| return int(val) | ||
| return val | ||
| except (ValueError, TypeError, IndexError): | ||
| pass | ||
| # Already a plain Python type | ||
| if isinstance(raw, (int, float, str, bool)): | ||
| return raw | ||
| return raw | ||
|
|
||
| @staticmethod | ||
| def _sheet_from_ref(cell_ref: str) -> str: | ||
| """Extract sheet name from a canonical cell reference.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The formulas library and numpy are imported inside the _formulas_fallback method rather than at module level. While this lazy importing pattern is intentional to avoid dependencies when the formulas library is not installed, it means these imports will be executed on every call to _formulas_fallback. Consider moving the imports to the top of the method or caching the imported modules as class attributes after the first import.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The lazy import is intentional -
formulasandnumpyare optional extras (wolfxl[calc]). This method only runs as a fallback for functions without builtins. Module-level caching would add complexity for marginal benefit since Python already caches imported modules insys.modules.