From 26c54f0572792419b5515f67c1c60e56b1a9874b Mon Sep 17 00:00:00 2001 From: ajw2329 Date: Tue, 20 May 2025 16:37:01 -0700 Subject: [PATCH] Coerce ID type to str on VCF ingestion --- minda/decompose.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/minda/decompose.py b/minda/decompose.py index ea952e7..4a70df4 100644 --- a/minda/decompose.py +++ b/minda/decompose.py @@ -69,6 +69,7 @@ def get_df(vcf): else: df = pd.read_csv(vcf, comment='#', sep='\t', usecols=[0,1,2,4,6,7], header=None, compression='gzip', dtype={'#CHROM': 'str', 'POS':'Int64'}) df.columns = ['#CHROM', 'POS', 'ID', 'ALT', 'FILTER', 'INFO'] + df['ID'] = df['ID'].astype('str') return df @@ -83,6 +84,7 @@ def get_intersected_df(vcf, bed): intersect_obj = vcf_to_bt.intersect(bed_to_bt, u=True) df = BedTool.to_dataframe(intersect_obj, header=None, usecols=[0,1,2,4,6,7], dtype={'#CHROM': 'str', 'POS':'int'}) df.columns = ['#CHROM', 'POS', 'ID', 'ALT', 'FILTER', 'INFO'] + df['ID'] = df['ID'].astype('str') return df @@ -457,4 +459,4 @@ def get_decomposed_dfs(caller_name, df, filter, min_size, prefixed, vaf, sample_ decomposed_df_1.ID = prefix + "_" + decomposed_df_1['ID'].astype(str) decomposed_df_2.ID = prefix + "_" + decomposed_df_2['ID'].astype(str) - return decomposed_df_1, decomposed_df_2, max_svlen \ No newline at end of file + return decomposed_df_1, decomposed_df_2, max_svlen