Skip to content

Commit

Permalink
Merge pull request #582 from mdekstrand/feature/cleanup-bulk
Browse files Browse the repository at this point in the history
Add predict_pipeline, remove data.bulk module, and clean up batch functions
  • Loading branch information
mdekstrand authored Dec 31, 2024
2 parents 4725505 + c0d4133 commit 48eb645
Show file tree
Hide file tree
Showing 11 changed files with 183 additions and 267 deletions.
13 changes: 8 additions & 5 deletions lenskit-funksvd/tests/test_funksvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@

from pytest import approx, mark

from lenskit.data import Dataset, ItemList, from_interactions_df
from lenskit.data.bulk import dict_to_df, iter_item_lists
from lenskit import batch
from lenskit.data import Dataset, ItemList, ItemListCollection, UserIDKey, from_interactions_df
from lenskit.funksvd import FunkSVDScorer
from lenskit.metrics import call_metric, quick_measure_model
from lenskit.pipeline.common import predict_pipeline
from lenskit.testing import BasicComponentTests, ScorerTests, wantjit

_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -169,15 +170,17 @@ def test_fsvd_save_load(ml_ds: Dataset):
def test_fsvd_known_preds(ml_ds: Dataset):
algo = FunkSVDScorer(15, iterations=125, lrate=0.001)
_log.info("training %s on ml data", algo)
algo.train(ml_ds)
pipe = predict_pipeline(algo, fallback=False)
pipe.train(ml_ds)

dir = Path(__file__).parent
pred_file = dir / "funksvd-preds.csv"
_log.info("reading known predictions from %s", pred_file)
known_preds = pd.read_csv(str(pred_file))
known = ItemListCollection.from_df(known_preds, UserIDKey)

preds = {u: algo(u, il) for (u, il) in iter_item_lists(known_preds)}
preds = dict_to_df(preds)
preds = batch.predict(pipe, known, n_jobs=1)
preds = preds.to_df().drop(columns=["prediction"], errors="ignore")

known_preds.rename(columns={"prediction": "expected"}, inplace=True)
merged = pd.merge(known_preds, preds)
Expand Down
76 changes: 74 additions & 2 deletions lenskit/lenskit/batch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,81 @@

from __future__ import annotations

from ._predict import predict
from ._recommend import recommend
from typing import Mapping, Sequence

from lenskit.data import ID, GenericKey, ItemList, ItemListCollection, UserIDKey
from lenskit.pipeline import Pipeline

from ._results import BatchResults
from ._runner import BatchPipelineRunner, InvocationSpec

__all__ = ["BatchPipelineRunner", "BatchResults", "InvocationSpec", "predict", "recommend"]


def predict(
pipeline: Pipeline,
test: ItemListCollection[GenericKey] | Mapping[ID, ItemList],
*,
n_jobs: int | None = None,
**kwargs,
) -> ItemListCollection[GenericKey]:
"""
Convenience function to batch-generate rating predictions (or other per-item
scores) from a pipeline. This is a batch version of :func:`lenskit.predict`.
Stability:
Caller
"""

runner = BatchPipelineRunner(n_jobs=n_jobs)
runner.predict()
outs = runner.run(pipeline, test)
return outs.output("predictions") # type: ignore


def score(
pipeline: Pipeline,
test: ItemListCollection[GenericKey] | Mapping[ID, ItemList],
*,
n_jobs: int | None = None,
**kwargs,
) -> ItemListCollection[GenericKey]:
"""
Convenience function to batch-generate personalized scores from a pipeline.
This is a batch version of :func:`lenskit.predict`.
Stability:
Caller
"""

runner = BatchPipelineRunner(n_jobs=n_jobs)
runner.score()
outs = runner.run(pipeline, test)
return outs.output("scores") # type: ignore


def recommend(
pipeline: Pipeline,
users: Sequence[ID | UserIDKey],
n: int | None = None,
candidates=None,
*,
n_jobs: int | None = None,
**kwargs,
) -> ItemListCollection[UserIDKey]:
"""
Convenience function to batch-generate recommendations from a pipeline. This
is a batch version of :func:`lenskit.recommend`.
.. todo::
Support more inputs than just user IDs.
Stability:
Caller
"""

runner = BatchPipelineRunner(n_jobs=n_jobs)
runner.recommend(n=n)
outs = runner.run(pipeline, users)
return outs.output("recommendations") # type: ignore
37 changes: 0 additions & 37 deletions lenskit/lenskit/batch/_predict.py

This file was deleted.

38 changes: 0 additions & 38 deletions lenskit/lenskit/batch/_recommend.py

This file was deleted.

17 changes: 15 additions & 2 deletions lenskit/lenskit/batch/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,30 @@ def __init__(self, *, n_jobs: int | None = None):
def add_invocation(self, inv: InvocationSpec):
self.invocations.append(inv)

def score(self, component: str = "scorer", *, output: str = "scores"):
"""
Request the batch run to generate test item scores.
Args:
component:
The name of the rating predictor component to run.
output:
The name of the results in the output dictionary.
"""
self.add_invocation(InvocationSpec("score", {component: output}, "test-items"))

def predict(self, component: str = "rating-predictor", *, output: str = "predictions"):
"""
Request the batch run to generate test item scores or rating predictins.
Request the batch run to generate test item rating predictions. It is identical
to :meth:`score` but with different defaults.
Args:
component:
The name of the rating predictor component to run.
output:
The name of the results in the output dictionary.
"""
self.add_invocation(InvocationSpec("predict-ratings", {component: output}, "test-items"))
return self.score(component, output=output)

def recommend(
self, component: str = "recommender", *, output: str = "recommendations", **extra: Any
Expand Down
125 changes: 0 additions & 125 deletions lenskit/lenskit/data/bulk.py

This file was deleted.

Loading

0 comments on commit 48eb645

Please sign in to comment.