lenskit · mdekstrand · May 28, 2024 · May 28, 2024 · May 25, 2024 · May 28, 2024
diff --git a/lenskit/algorithms/knn/item.py b/lenskit/algorithms/knn/item.py
@@ -8,6 +8,7 @@
 Item-based k-NN collaborative filtering.
 """
 
+# pyright: basic
 from __future__ import annotations
 
 import logging
@@ -20,6 +21,7 @@
 import torch
 
 from lenskit import ConfigWarning, DataWarning, util
+from lenskit.data import FeedbackType
 from lenskit.data.matrix import normalize_sparse_rows, sparse_ratings
 from lenskit.parallel import ensure_parallel_init
 from lenskit.util.logging import pbh_update, progress_handle
@@ -50,15 +52,18 @@ class ItemItem(Predictor):
         items, regardless of the rating the user gave the items.
 
     Args:
-        nnbrs(int):
+        nnbrs:
             the maximum number of neighbors for scoring each item (``None`` for
             unlimited)
-        min_nbrs(int): the minimum number of neighbors for scoring each item
-        min_sim(float): minimum similarity threshold for considering a neighbor
-        save_nbrs(float):
+        min_nbrs: the minimum number of neighbors for scoring each item
+        min_sim:
+            Minimum similarity threshold for considering a neighbor.  Must be
+            positive; if less than the smallest 32-bit normal (:math:`1.175
+            \\times 10^{-38}`), is clamped to that value.
+        save_nbrs:
             the number of neighbors to save per item in the trained model
             (``None`` for unlimited)
-        feedback(str):
+        feedback:
             Control how feedback should be interpreted.  Specifies defaults for
             the other settings, which can be overridden individually; can be one
             of the following values:
@@ -72,15 +77,15 @@ class ItemItem(Predictor):
                 Configure for implicit-feedback mode: ignore rating values, do
                 not center ratings, and use the ``sum`` aggregate method for
                 prediction.
-        center(bool):
+        center:
             whether to normalize (mean-center) rating vectors prior to computing
             similarities and aggregating user rating values.  Defaults to
             ``True``; turn this off when working with unary data and other data
             types that don't respond well to centering.
-        aggregate(str):
+        aggregate:
             the type of aggregation to do. Can be ``weighted-average`` (the
             default) or ``sum``.
-        use_ratings(bool):
+        use_ratings:
             whether or not to use the rating values. If ``False``, it ignores
             rating values and considers an implicit feedback signal of 1 for
             every (user,item) pair present.
@@ -99,6 +104,9 @@ class ItemItem(Predictor):
     save_nbrs: int | None
     feedback: Literal["explicit", "implicit"]
     block_size: int
+    center: bool
+    aggregate: str
+    use_ratings: bool
 
     item_index_: pd.Index
     "The index of item IDs."
@@ -119,7 +127,7 @@ def __init__(
         min_nbrs: int = 1,
         min_sim: float = 1.0e-6,
         save_nbrs: int | None = None,
-        feedback: Literal["explicit", "implicit"] = "explicit",
+        feedback: FeedbackType = "explicit",
         block_size: int = 250,
         **kwargs,
     ):
@@ -175,16 +183,22 @@ def _check_setup(self):
         if self.min_sim < 0:
             _log.warning("item-item does not currently support negative similarities")
             warnings.warn("item-item does not currently support negative similarities")
+        elif self.min_sim == 0:
+            f4i = np.finfo("f4")
+            _log.warn(
+                "minimum similarity %e is too low, using %e", self.min_sim, f4i.smallest_normal
+            )
+            self.min_sim = float(f4i.smallest_normal)
 
-    def fit(self, ratings, **kwargs):
+    def fit(self, ratings: pd.DataFrame, **kwargs):
         """
         Train a model.
 
         The model-training process depends on ``save_nbrs`` and ``min_sim``, but *not* on other
         algorithm parameters.
 
         Args:
-            ratings(pandas.DataFrame):
+            ratings:
                 (user,item,rating) data for computing item similarities.
         """
         ensure_parallel_init()

diff --git a/lenskit/data/__init__.py b/lenskit/data/__init__.py
@@ -4,4 +4,8 @@
 # Licensed under the MIT license, see LICENSE.md for details.
 # SPDX-License-Identifier: MIT
 
+from typing import Literal, TypeAlias
+
 from .matrix import RatingMatrix, sparse_ratings  # noqa: F401
+
+FeedbackType: TypeAlias = Literal["explicit", "implicit"]