From 4399b1c87d7a571e09a43fc0052e951ea6a1cc02 Mon Sep 17 00:00:00 2001
From: Tim Millar <tim.millar@plantandfood.co.nz>
Date: Fri, 26 Sep 2025 20:28:03 +1200
Subject: [PATCH 1/2] Fix high memory usage bug with chunked HK relationship
 #1333

---
 sgkit/stats/pedigree.py      |  9 +++++++--
 sgkit/tests/test_pedigree.py | 23 +++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sgkit/stats/pedigree.py b/sgkit/stats/pedigree.py
index bff889407..3420ae13a 100644
--- a/sgkit/stats/pedigree.py
+++ b/sgkit/stats/pedigree.py
@@ -1198,10 +1198,15 @@ def pedigree_kinship(
     # new dataset
     dims = ["samples_0", "samples_1"]
     if return_relationship:
-        relationship = kinship * 2
         if method == EST_HAMILTON_KERR:
             ploidy = tau.sum(axis=-1)
-            relationship *= np.sqrt(ploidy[None, :] / 2 * ploidy[:, None] / 2)
+            ploidy_0 = ploidy.rechunk((kinship.chunks[0],))
+            ploidy_1 = ploidy.rechunk((kinship.chunks[1],))
+            scale = da.sqrt(ploidy_0[:, None] * ploidy_1[None, :])
+            assert kinship.chunks == scale.chunks
+            relationship = kinship * scale
+        else:
+            relationship = kinship * 2
         arrays = {
             variables.stat_pedigree_kinship: xr.DataArray(kinship, dims=dims),
             variables.stat_pedigree_relationship: xr.DataArray(relationship, dims=dims),
diff --git a/sgkit/tests/test_pedigree.py b/sgkit/tests/test_pedigree.py
index 943db916d..42856b523 100644
--- a/sgkit/tests/test_pedigree.py
+++ b/sgkit/tests/test_pedigree.py
@@ -560,6 +560,29 @@ def test_pedigree_kinship__Hamilton_Kerr_compress_parent_dimension(
     np.testing.assert_array_almost_equal(actual, expect)
 
 
+def test_pedigree_kinship__Hamilton_Kerr_chunked_mem():
+    # tests fix for https://github.com/sgkit-dev/sgkit/issues/1333
+    # if chunking is not applied to the ploidy matrix correctly
+    # this will try to allocate > 7TB of RAM (and presumably fail)
+    ds = xr.Dataset()
+    ds["parent"] = ["samples", "parents"], np.full((1_000_000, 2), -1, int)
+    ds["stat_Hamilton_Kerr_tau"] = ["samples", "parents"], np.full(
+        (1_000_000, 2), 2, np.uint64
+    )
+    ds["stat_Hamilton_Kerr_lambda"] = ["samples", "parents"], np.full(
+        (1_000_000, 2), 0.0
+    )
+    matrix = sg.pedigree_kinship(
+        ds,
+        method="Hamilton-Kerr",
+        chunks=((10, 999990), (20, 999980)),
+        return_relationship=True,
+    ).stat_pedigree_relationship
+    expect = np.eye(10, 20)
+    actual = matrix[0:10, 0:20].compute()
+    np.testing.assert_array_almost_equal(actual, expect)
+
+
 @pytest.mark.parametrize("method", ["diploid", "Hamilton-Kerr"])
 def test_pedigree_kinship__half_founder(method):
     ds0 = sg.simulate_genotype_call_dataset(n_variant=1, n_sample=6)

From b95b1bc042352e85d19ee96f6caa8afa900277ac Mon Sep 17 00:00:00 2001
From: Tim Millar <tim.millar@plantandfood.co.nz>
Date: Fri, 26 Sep 2025 20:37:38 +1200
Subject: [PATCH 2/2] Update changelog

---
 docs/changelog.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index f4da563ee..91b574ec8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -28,8 +28,12 @@ Deprecations
 .. Improvements
 .. ~~~~~~~~~~~~
 
-.. Bug fixes
-.. ~~~~~~~~~
+Bug fixes
+~~~~~~~~~
+
+- Fixed bug causing high memory usage in :func:`pedigree_kinship` when
+  evaluating a chunked relationship matrix with the Hamilton-Kerr method.
+  (:user:`timothymillar`, :pr:`1335`, :issue:`1333`)
 
 .. Documentation
 .. ~~~~~~~~~~~~~