From cff3368f856d470ec9df99a5d2241261529a8725 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 18:26:50 -0600 Subject: [PATCH 1/6] fix test for whether bias is trained --- lenskit/lenskit/basic/bias.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lenskit/lenskit/basic/bias.py b/lenskit/lenskit/basic/bias.py index fbea9c9f3..d6ea6f2d1 100644 --- a/lenskit/lenskit/basic/bias.py +++ b/lenskit/lenskit/basic/bias.py @@ -286,7 +286,7 @@ def __init__( @property def is_trained(self) -> bool: - return hasattr(self, "bias_") + return hasattr(self, "model_") def train(self, data: Dataset): """ From 2a8bf77f1fb4f4136fc4dd745d18450369372659 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 18:34:07 -0600 Subject: [PATCH 2/6] test that the composite fallback isn't just calling the second thing all the time --- lenskit/tests/basic/test_composite.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lenskit/tests/basic/test_composite.py b/lenskit/tests/basic/test_composite.py index 0fc5226dd..25f24d703 100644 --- a/lenskit/tests/basic/test_composite.py +++ b/lenskit/tests/basic/test_composite.py @@ -20,7 +20,9 @@ from lenskit.data import Dataset from lenskit.data.items import ItemList from lenskit.data.types import ID +from lenskit.operations import predict, score from lenskit.pipeline import Pipeline +from lenskit.pipeline.common import RecPipelineBuilder from lenskit.util.test import ml_ds, ml_ratings # noqa: F401 @@ -51,3 +53,26 @@ def test_fallback_fill_missing(ml_ds: Dataset): assert scores[:2] == approx(known(2, ItemList(item_ids=items[:2])).scores()) assert scores[2:] == approx(bias(2, ItemList(item_ids=items[2:])).scores()) + + +def test_fallback_double_bias(rng: np.random.Generator, ml_ds: Dataset): + builder = RecPipelineBuilder() + builder.scorer(BiasScorer(damping=50)) + builder.predicts_ratings(BiasScorer(damping=0)) + pipe = builder.build() + + pipe.train(ml_ds) + + for user in rng.choice(ml_ds.users.ids(), 100): + items = rng.choice(ml_ds.items.ids(), 500) + scores = score(pipe, user, items) + scores = scores.scores() + assert scores is not None + assert not np.any(np.isnan(scores)) + + preds = predict(pipe, user, items) + preds = preds.scores() + assert preds is not None + assert not np.any(np.isnan(preds)) + + assert scores == approx(preds) From 763325a8d4b6b3e5c5c0982b66087b98ce3f8d96 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 19:27:06 -0600 Subject: [PATCH 3/6] add debug logging to composite test --- lenskit/tests/basic/test_composite.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lenskit/tests/basic/test_composite.py b/lenskit/tests/basic/test_composite.py index 25f24d703..5166d4716 100644 --- a/lenskit/tests/basic/test_composite.py +++ b/lenskit/tests/basic/test_composite.py @@ -4,6 +4,7 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT +import logging import pickle from typing import Any @@ -25,6 +26,8 @@ from lenskit.pipeline.common import RecPipelineBuilder from lenskit.util.test import ml_ds, ml_ratings # noqa: F401 +_log = logging.getLogger(__name__) + def test_fallback_fill_missing(ml_ds: Dataset): pipe = Pipeline() @@ -58,8 +61,10 @@ def test_fallback_fill_missing(ml_ds: Dataset): def test_fallback_double_bias(rng: np.random.Generator, ml_ds: Dataset): builder = RecPipelineBuilder() builder.scorer(BiasScorer(damping=50)) - builder.predicts_ratings(BiasScorer(damping=0)) - pipe = builder.build() + builder.predicts_ratings(fallback=BiasScorer(damping=0)) + pipe = builder.build("double-bias") + + _log.info("pipeline configuration: %s", pipe.get_config().model_dump_json(indent=2)) pipe.train(ml_ds) @@ -71,6 +76,7 @@ def test_fallback_double_bias(rng: np.random.Generator, ml_ds: Dataset): assert not np.any(np.isnan(scores)) preds = predict(pipe, user, items) + preds = preds.scores() assert preds is not None assert not np.any(np.isnan(preds)) From 583ce83e44d099d98f0445dac3e6bac82d6d6b54 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 19:27:24 -0600 Subject: [PATCH 4/6] route trace to debug on environment variable --- lenskit/lenskit/logging/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lenskit/lenskit/logging/__init__.py b/lenskit/lenskit/logging/__init__.py index 25de1e782..d551b25c7 100644 --- a/lenskit/lenskit/logging/__init__.py +++ b/lenskit/lenskit/logging/__init__.py @@ -2,6 +2,7 @@ Logging, progress, and resource records. """ +import os from typing import Any import structlog @@ -21,6 +22,7 @@ ] get_logger = structlog.stdlib.get_logger +_trace_debug = os.environ.get("LK_TRACE", "no").lower() == "debug" def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any): @@ -32,3 +34,5 @@ def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any): meth = getattr(logger, "trace", None) if meth is not None: meth(*args, **kwargs) + elif _trace_debug: + logger.debug(*args, **kwargs) From 0cedd7e0ee711ae80d7885a43490f84d64835ca3 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 19:27:46 -0600 Subject: [PATCH 5/6] add more tracing to pipeline runner --- lenskit/lenskit/pipeline/__init__.py | 5 +++-- lenskit/lenskit/pipeline/components.py | 5 +++++ lenskit/lenskit/pipeline/runner.py | 7 ++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lenskit/lenskit/pipeline/__init__.py b/lenskit/lenskit/pipeline/__init__.py index a0ac11922..dbd8dfd94 100644 --- a/lenskit/lenskit/pipeline/__init__.py +++ b/lenskit/lenskit/pipeline/__init__.py @@ -11,12 +11,12 @@ # pyright: strict from __future__ import annotations -import logging import typing import warnings from types import FunctionType, UnionType from uuid import NAMESPACE_URL, uuid4, uuid5 +import structlog from typing_extensions import Any, Literal, Self, TypeAlias, TypeVar, cast, overload from lenskit.data import Dataset @@ -51,7 +51,7 @@ "topn_pipeline", ] -_log = logging.getLogger(__name__) +_log = structlog.stdlib.get_logger(__name__) # common type var for quick use T = TypeVar("T") @@ -707,6 +707,7 @@ def run_all(self, *nodes: str | Node[Any], **kwargs: object) -> PipelineState: runner = PipelineRunner(self, kwargs) node_list = [self.node(n) for n in nodes] + _log.debug("running pipeline", name=self.name, nodes=[n.name for n in node_list]) if not node_list: node_list = self.nodes diff --git a/lenskit/lenskit/pipeline/components.py b/lenskit/lenskit/pipeline/components.py index e229b333f..5f469d9e0 100644 --- a/lenskit/lenskit/pipeline/components.py +++ b/lenskit/lenskit/pipeline/components.py @@ -10,6 +10,7 @@ from __future__ import annotations import inspect +import json from abc import abstractmethod from importlib import import_module from types import FunctionType @@ -214,6 +215,10 @@ def __call__(self, **kwargs: Any) -> COut: """ ... + def __repr__(self) -> str: + params = json.dumps(self.get_config(), indent=2) + return f"<{self.__class__.__name__} {params}>" + def instantiate_component( comp: str | type | FunctionType, config: dict[str, Any] | None diff --git a/lenskit/lenskit/pipeline/runner.py b/lenskit/lenskit/pipeline/runner.py index 034b31e33..4ffcb3f15 100644 --- a/lenskit/lenskit/pipeline/runner.py +++ b/lenskit/lenskit/pipeline/runner.py @@ -59,7 +59,7 @@ def run(self, node: Node[Any], *, required: bool = True) -> Any: elif status == "failed": # pragma: nocover raise RuntimeError(f"{node} previously failed") - trace(self.log, "processing node %s", node) + trace(self.log, "processing node", node=node.name) self.status[node.name] = "in-progress" try: self._run_node(node, required) @@ -96,6 +96,7 @@ def _inject_input(self, name: str, types: set[type] | None, required: bool) -> N if val is not None and types and not is_compatible_data(val, *types): raise TypeError(f"invalid data for input {name} (expected {types}, got {type(val)})") + trace(self.log, "injecting input", name=name, value=val) self.state[name] = val def _run_component( @@ -107,7 +108,7 @@ def _run_component( required: bool, ) -> None: in_data = {} - log = self.log.bind(component=name) + log = self.log.bind(node=name) trace(log, "processing inputs") for iname, itype in inputs.items(): # look up the input wiring for this parameter input @@ -158,7 +159,7 @@ def _run_component( in_data[iname] = ival - trace(log, "running component") + trace(log, "running component", component=comp) self.state[name] = comp(**in_data) From 6b1c07f8a1d6f0cb07e5a767b52c48228f9c9328 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 24 Dec 2024 19:28:06 -0600 Subject: [PATCH 6/6] make rating predictor controls keyword-only --- lenskit/lenskit/pipeline/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lenskit/lenskit/pipeline/common.py b/lenskit/lenskit/pipeline/common.py index b5447fb9b..c7ec60948 100644 --- a/lenskit/lenskit/pipeline/common.py +++ b/lenskit/lenskit/pipeline/common.py @@ -58,7 +58,7 @@ def candidate_selector(self, sel: Component): self._selector = sel def predicts_ratings( - self, transform: Component | None = None, *, fallback: Component | None = None + self, *, transform: Component | None = None, fallback: Component | None = None ): """ Specify that this pipeline will predict ratings, optionally providing a