From 3a0c8e277146f6886c6a107c7b4051702a0bd2b5 Mon Sep 17 00:00:00 2001 From: Carl Kadie Date: Tue, 2 Jul 2024 09:31:57 -0700 Subject: [PATCH] add python 3.9 numpy 2 workaround --- Cargo.lock | 2 +- Cargo.toml | 2 +- bed_reader/_open_bed.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad0f0af..df759cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -142,7 +142,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bed-reader" -version = "1.0.5-beta.1" +version = "1.0.5-beta.2" dependencies = [ "anyhow", "anyinput", diff --git a/Cargo.toml b/Cargo.toml index 84c0556..51deffc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -version = "1.0.5-beta.1" +version = "1.0.5-beta.2" name = "bed-reader" description = "Read and write the PLINK BED format, simply and efficiently." repository = "https://github.com/fastlmm/bed-reader" diff --git a/bed_reader/_open_bed.py b/bed_reader/_open_bed.py index fef08e9..85147f8 100644 --- a/bed_reader/_open_bed.py +++ b/bed_reader/_open_bed.py @@ -1,6 +1,8 @@ import logging import multiprocessing import os +import re +import sys from dataclasses import dataclass from io import BytesIO from itertools import repeat, takewhile @@ -1792,6 +1794,8 @@ def sparsify(self, val, order, minor_index, batch_slice, data, indices, indptr): def _read_csv(filepath, delimiter=None, dtype=None, usecols=None): + pattern = re.compile(r"^np\.\w+\((.+?)\)$") + # Prepare the usecols by ensuring it is a list of indices usecols_indices = list(usecols) transposed = np.loadtxt( @@ -1809,6 +1813,16 @@ def _read_csv(filepath, delimiter=None, dtype=None, usecols=None): columns = [] for output_index, input_index in enumerate(usecols_indices): col = transposed[output_index] + + # work around numpy/python bug + if ( + (sys.version_info.major, sys.version_info.minor) <= (3, 9) + and int(np.__version__.split(".")[0]) >= 2 + and len(col) > 0 + and pattern.fullmatch(col[0]) + ): + col = np.array([pattern.fullmatch(x).group(1) for x in col]) + # Find the dtype for this column col_dtype = dtype.get(input_index, np.str_) # Convert the column list to a numpy array with the specified dtype