Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve pipeline logging and builder API #573

Merged
merged 6 commits into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lenskit/lenskit/basic/bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def __init__(

@property
def is_trained(self) -> bool:
return hasattr(self, "bias_")
return hasattr(self, "model_")

def train(self, data: Dataset):
"""
Expand Down
4 changes: 4 additions & 0 deletions lenskit/lenskit/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Logging, progress, and resource records.
"""

import os
from typing import Any

import structlog
Expand All @@ -21,6 +22,7 @@
]

get_logger = structlog.stdlib.get_logger
_trace_debug = os.environ.get("LK_TRACE", "no").lower() == "debug"


def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any):
Expand All @@ -32,3 +34,5 @@
meth = getattr(logger, "trace", None)
if meth is not None:
meth(*args, **kwargs)
elif _trace_debug:
logger.debug(*args, **kwargs)

Check warning on line 38 in lenskit/lenskit/logging/__init__.py

View check run for this annotation

Codecov / codecov/patch

lenskit/lenskit/logging/__init__.py#L38

Added line #L38 was not covered by tests
5 changes: 3 additions & 2 deletions lenskit/lenskit/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
# pyright: strict
from __future__ import annotations

import logging
import typing
import warnings
from types import FunctionType, UnionType
from uuid import NAMESPACE_URL, uuid4, uuid5

import structlog
from typing_extensions import Any, Literal, Self, TypeAlias, TypeVar, cast, overload

from lenskit.data import Dataset
Expand Down Expand Up @@ -51,7 +51,7 @@
"topn_pipeline",
]

_log = logging.getLogger(__name__)
_log = structlog.stdlib.get_logger(__name__)

# common type var for quick use
T = TypeVar("T")
Expand Down Expand Up @@ -707,6 +707,7 @@ def run_all(self, *nodes: str | Node[Any], **kwargs: object) -> PipelineState:

runner = PipelineRunner(self, kwargs)
node_list = [self.node(n) for n in nodes]
_log.debug("running pipeline", name=self.name, nodes=[n.name for n in node_list])
if not node_list:
node_list = self.nodes

Expand Down
2 changes: 1 addition & 1 deletion lenskit/lenskit/pipeline/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def candidate_selector(self, sel: Component):
self._selector = sel

def predicts_ratings(
self, transform: Component | None = None, *, fallback: Component | None = None
self, *, transform: Component | None = None, fallback: Component | None = None
):
"""
Specify that this pipeline will predict ratings, optionally providing a
Expand Down
5 changes: 5 additions & 0 deletions lenskit/lenskit/pipeline/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from __future__ import annotations

import inspect
import json
from abc import abstractmethod
from importlib import import_module
from types import FunctionType
Expand Down Expand Up @@ -214,6 +215,10 @@ def __call__(self, **kwargs: Any) -> COut:
"""
...

def __repr__(self) -> str:
params = json.dumps(self.get_config(), indent=2)
return f"<{self.__class__.__name__} {params}>"


def instantiate_component(
comp: str | type | FunctionType, config: dict[str, Any] | None
Expand Down
7 changes: 4 additions & 3 deletions lenskit/lenskit/pipeline/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def run(self, node: Node[Any], *, required: bool = True) -> Any:
elif status == "failed": # pragma: nocover
raise RuntimeError(f"{node} previously failed")

trace(self.log, "processing node %s", node)
trace(self.log, "processing node", node=node.name)
self.status[node.name] = "in-progress"
try:
self._run_node(node, required)
Expand Down Expand Up @@ -96,6 +96,7 @@ def _inject_input(self, name: str, types: set[type] | None, required: bool) -> N
if val is not None and types and not is_compatible_data(val, *types):
raise TypeError(f"invalid data for input {name} (expected {types}, got {type(val)})")

trace(self.log, "injecting input", name=name, value=val)
self.state[name] = val

def _run_component(
Expand All @@ -107,7 +108,7 @@ def _run_component(
required: bool,
) -> None:
in_data = {}
log = self.log.bind(component=name)
log = self.log.bind(node=name)
trace(log, "processing inputs")
for iname, itype in inputs.items():
# look up the input wiring for this parameter input
Expand Down Expand Up @@ -158,7 +159,7 @@ def _run_component(

in_data[iname] = ival

trace(log, "running component")
trace(log, "running component", component=comp)
self.state[name] = comp(**in_data)


Expand Down
31 changes: 31 additions & 0 deletions lenskit/tests/basic/test_composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Licensed under the MIT license, see LICENSE.md for details.
# SPDX-License-Identifier: MIT

import logging
import pickle
from typing import Any

Expand All @@ -20,9 +21,13 @@
from lenskit.data import Dataset
from lenskit.data.items import ItemList
from lenskit.data.types import ID
from lenskit.operations import predict, score
from lenskit.pipeline import Pipeline
from lenskit.pipeline.common import RecPipelineBuilder
from lenskit.util.test import ml_ds, ml_ratings # noqa: F401

_log = logging.getLogger(__name__)


def test_fallback_fill_missing(ml_ds: Dataset):
pipe = Pipeline()
Expand Down Expand Up @@ -51,3 +56,29 @@ def test_fallback_fill_missing(ml_ds: Dataset):

assert scores[:2] == approx(known(2, ItemList(item_ids=items[:2])).scores())
assert scores[2:] == approx(bias(2, ItemList(item_ids=items[2:])).scores())


def test_fallback_double_bias(rng: np.random.Generator, ml_ds: Dataset):
builder = RecPipelineBuilder()
builder.scorer(BiasScorer(damping=50))
builder.predicts_ratings(fallback=BiasScorer(damping=0))
pipe = builder.build("double-bias")

_log.info("pipeline configuration: %s", pipe.get_config().model_dump_json(indent=2))

pipe.train(ml_ds)

for user in rng.choice(ml_ds.users.ids(), 100):
items = rng.choice(ml_ds.items.ids(), 500)
scores = score(pipe, user, items)
scores = scores.scores()
assert scores is not None
assert not np.any(np.isnan(scores))

preds = predict(pipe, user, items)

preds = preds.scores()
assert preds is not None
assert not np.any(np.isnan(preds))

assert scores == approx(preds)
Loading