From 3a0c8e277146f6886c6a107c7b4051702a0bd2b5 Mon Sep 17 00:00:00 2001
From: Carl Kadie <carlk@msn.com>
Date: Tue, 2 Jul 2024 09:31:57 -0700
Subject: [PATCH] add python 3.9 numpy 2 workaround

---
 Cargo.lock              |  2 +-
 Cargo.toml              |  2 +-
 bed_reader/_open_bed.py | 14 ++++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ad0f0af..df759cc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -142,7 +142,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
 [[package]]
 name = "bed-reader"
-version = "1.0.5-beta.1"
+version = "1.0.5-beta.2"
 dependencies = [
  "anyhow",
  "anyinput",
diff --git a/Cargo.toml b/Cargo.toml
index 84c0556..51deffc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-version = "1.0.5-beta.1"
+version = "1.0.5-beta.2"
 name = "bed-reader"
 description = "Read and write the PLINK BED format, simply and efficiently."
 repository = "https://github.com/fastlmm/bed-reader"
diff --git a/bed_reader/_open_bed.py b/bed_reader/_open_bed.py
index fef08e9..85147f8 100644
--- a/bed_reader/_open_bed.py
+++ b/bed_reader/_open_bed.py
@@ -1,6 +1,8 @@
 import logging
 import multiprocessing
 import os
+import re
+import sys
 from dataclasses import dataclass
 from io import BytesIO
 from itertools import repeat, takewhile
@@ -1792,6 +1794,8 @@ def sparsify(self, val, order, minor_index, batch_slice, data, indices, indptr):
 
 
 def _read_csv(filepath, delimiter=None, dtype=None, usecols=None):
+    pattern = re.compile(r"^np\.\w+\((.+?)\)$")
+
     # Prepare the usecols by ensuring it is a list of indices
     usecols_indices = list(usecols)
     transposed = np.loadtxt(
@@ -1809,6 +1813,16 @@ def _read_csv(filepath, delimiter=None, dtype=None, usecols=None):
     columns = []
     for output_index, input_index in enumerate(usecols_indices):
         col = transposed[output_index]
+
+        # work around numpy/python bug
+        if (
+            (sys.version_info.major, sys.version_info.minor) <= (3, 9)
+            and int(np.__version__.split(".")[0]) >= 2
+            and len(col) > 0
+            and pattern.fullmatch(col[0])
+        ):
+            col = np.array([pattern.fullmatch(x).group(1) for x in col])
+
         # Find the dtype for this column
         col_dtype = dtype.get(input_index, np.str_)
         # Convert the column list to a numpy array with the specified dtype