From 9a99a8a68118a6ccd83228da16fe849a680ff66c Mon Sep 17 00:00:00 2001 From: Omer Weissbrod Date: Fri, 22 Nov 2024 16:12:17 +0200 Subject: [PATCH] explicitly convert chromosome and basepairs to int for df indices --- ldsc_polyfun/parse.py | 4 +++- polyfun_utils.py | 4 +++- test_polyfun.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ldsc_polyfun/parse.py b/ldsc_polyfun/parse.py index 7859a52..2577e71 100644 --- a/ldsc_polyfun/parse.py +++ b/ldsc_polyfun/parse.py @@ -37,7 +37,9 @@ def set_snpid_index(df): df.loc[df['A1_first'], 'A1s'] = df.loc[df['A1_first'], 'A1'].copy() df['A2s'] = df['A1'].copy() df.loc[df['A1_first'], 'A2s'] = df.loc[df['A1_first'], 'A2'].copy() - df.index = df['CHR'].astype(str) + '.' + df['BP'].astype(str) + '.' + df['A1s'] + '.' + df['A2s'] + s_chr = df['CHR'].map(lambda c: int(c) if str(c)[0] in ['0','1','2','3','4','5,','6','7','8','9'] else c).astype(str) + s_bp = df['BP'].astype(int).astype(str) + df.index = s_chr + '.' + s_bp + '.' + df['A1s'] + '.' + df['A2s'] df.index.name = 'snpid' df.drop(columns=['A1_first', 'A1s', 'A2s'], inplace=True) return df diff --git a/polyfun_utils.py b/polyfun_utils.py index 023e838..1f2f522 100644 --- a/polyfun_utils.py +++ b/polyfun_utils.py @@ -67,7 +67,9 @@ def set_snpid_index(df, copy=False, allow_duplicates=False, allow_swapped_indel_ df.loc[df['A1_first'], 'A1s'] = df.loc[df['A1_first'], 'A1'].copy() df['A2s'] = df['A1'].copy() df.loc[df['A1_first'], 'A2s'] = df.loc[df['A1_first'], 'A2'].copy() - df.index = df['CHR'].astype(int).astype(str) + '.' + df['BP'].astype(str) + '.' + df['A1s'] + '.' + df['A2s'] + s_chr = df['CHR'].map(lambda c: int(c) if str(c)[0] in ['0','1','2','3','4','5,','6','7','8','9'] else c).astype(str) + s_bp = df['BP'].astype(int).astype(str) + df.index = s_chr + '.' + s_bp + '.' + df['A1s'] + '.' + df['A2s'] df.index.name = 'snpid' df.drop(columns=['A1_first', 'A1s', 'A2s'], inplace=True) diff --git a/test_polyfun.py b/test_polyfun.py index d1ceaeb..515a8a8 100644 --- a/test_polyfun.py +++ b/test_polyfun.py @@ -249,7 +249,7 @@ def test_finemapper_susie(tmpdir, python3_exe): #print(finemapper_cmd) retval = os.system(finemapper_cmd) if retval != 0: - raise ValueError('finemapper command failed when running the following command:\n%s'%(cmd)) + raise ValueError('finemapper command failed when running the following command:\n%s'%(finemapper_cmd)) compare_dfs(tmpdir, gold_dir, outfile, sort_column='SNP')