agyn-sandbox · casey-brooks · Dec 26, 2025
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -291,7 +291,7 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
     if n_init <= 0:
         raise ValueError("Invalid number of initializations."
                          " n_init=%d must be bigger than zero." % n_init)
-    random_state = check_random_state(random_state)
+    rng = check_random_state(random_state)
 
     if max_iter <= 0:
         raise ValueError('Number of iterations should be a positive number,'
@@ -346,6 +346,8 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
     # precompute squared norms of data points
     x_squared_norms = row_norms(X, squared=True)
 
+    seeds = rng.randint(0, np.iinfo(np.int32).max, size=n_init)
+
     best_labels, best_inertia, best_centers = None, None, None
     if n_clusters == 1:
         # elkan doesn't make sense for a single cluster, full will produce
@@ -363,13 +365,13 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
     if effective_n_jobs(n_jobs) == 1:
         # For a single thread, less memory is needed if we just store one set
         # of the best results (as opposed to one set per run per thread).
-        for it in range(n_init):
+        for seed in seeds:
             # run a k-means once
             labels, inertia, centers, n_iter_ = kmeans_single(
                 X, sample_weight, n_clusters, max_iter=max_iter, init=init,
                 verbose=verbose, precompute_distances=precompute_distances,
                 tol=tol, x_squared_norms=x_squared_norms,
-                random_state=random_state)
+                random_state=seed)
             # determine if these results are the best so far
             if best_inertia is None or inertia < best_inertia:
                 best_labels = labels.copy()
@@ -378,7 +380,6 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
                 best_n_iter = n_iter_
     else:
         # parallelisation of k-means runs
-        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
         results = Parallel(n_jobs=n_jobs, verbose=0)(
             delayed(kmeans_single)(X, sample_weight, n_clusters,
                                    max_iter=max_iter, init=init,

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
@@ -239,6 +239,34 @@ def test_k_means_plus_plus_init_2_jobs():
     _check_fitted_model(km)
 
 
+@if_safe_multiprocessing_with_blas
+@pytest.mark.parametrize('algorithm', ['full', 'elkan'])
+def test_kmeans_consistent_inertia_across_n_jobs(algorithm):
+    X_local, _ = make_blobs(n_samples=200, n_features=5, centers=4,
+                            random_state=0)
+    inertias = []
+    for n_jobs in [1, 2, 3]:
+        km = KMeans(n_clusters=4, n_init=8, random_state=42,
+                    algorithm=algorithm, n_jobs=n_jobs)
+        km.fit(X_local)
+        inertias.append(km.inertia_)
+    assert_allclose(inertias, inertias[0], rtol=1e-12)
+
+
+@if_safe_multiprocessing_with_blas
+@pytest.mark.parametrize('algorithm', ['full', 'elkan'])
+def test_k_means_function_consistent_inertia_across_n_jobs(algorithm):
+    X_local, _ = make_blobs(n_samples=200, n_features=5, centers=4,
+                            random_state=0)
+    inertias = []
+    for n_jobs in [1, 2, 3]:
+        _, _, inertia = k_means(X_local, n_clusters=4, n_init=8,
+                                random_state=42, algorithm=algorithm,
+                                n_jobs=n_jobs)
+        inertias.append(inertia)
+    assert_allclose(inertias, inertias[0], rtol=1e-12)
+
+
 def test_k_means_precompute_distances_flag():
     # check that a warning is raised if the precompute_distances flag is not
     # supported