From 68c2543acab72a0d86611b58623c748a1eb67235 Mon Sep 17 00:00:00 2001 From: rnmitchell Date: Tue, 11 Jul 2023 13:12:52 -0400 Subject: [PATCH] filtering of untyped alleles and below AT --- lusSTR/data/snp_config.yaml | 6 +++--- lusSTR/wrappers/snps_convert.py | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lusSTR/data/snp_config.yaml b/lusSTR/data/snp_config.yaml index 331ae3b8..ee31151c 100644 --- a/lusSTR/data/snp_config.yaml +++ b/lusSTR/data/snp_config.yaml @@ -5,13 +5,13 @@ uas: True ## True/False; if ran through UAS samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd output: "lusstr_output" ## output file/directory name; Example: "test_030923" - -## convert settings kit: "sigprep" ## sigprep/kintelligence + +## format settings types: "i" ## choices are "all", "i" (identity SNPs only), "p" (phenotype only), "a" (ancestry only) or any combination nofilter: False ## True/False if no filtering is desired; if False, will remove any allele designated as Not Typed -## format settings +## convert settings strand: "forward" ## forward/uas; indicates which oritentation to report the alleles for the ForenSeq SNPs; uas indicates the orientation as reported by the UAS or the forward strand references: "" ## list IDs of the samples to be run as references in EFM separate: false ## True/False; if want to separate samples into individual files for use in EFM diff --git a/lusSTR/wrappers/snps_convert.py b/lusSTR/wrappers/snps_convert.py index 8239de83..2461181c 100644 --- a/lusSTR/wrappers/snps_convert.py +++ b/lusSTR/wrappers/snps_convert.py @@ -25,7 +25,9 @@ def create_output_table(sample_df, orientation, separate, output_type, uas): allele_col = "Forward_Strand_Allele" all_samples_df = pd.DataFrame() for sample in sample_df["SampleID"].unique(): - indiv_df = sample_df[sample_df["SampleID"] == sample] + indiv_df = sample_df[ + (sample_df["SampleID"] == sample) & (sample_df["Issues"] != "Contains untyped allele") + ] compiled_table = create_sample_df(indiv_df, output_type, allele_col) if not uas: compiled_table = check_allele_calls(compiled_table, output_type) @@ -47,14 +49,15 @@ def create_sample_df(indiv_df, output_type, all_col): .unstack(0) .reset_index() ) - print(compiled_table) + compiled_table.to_csv("test.csv", index=False) try: compiled_table.columns = ["Marker", "Allele 1", "Allele 2", "Height 1", "Height 2"] except ValueError: print("Too many alleles!") if output_type == "reference": + print(compiled_table) for i, row in compiled_table.iterrows(): - if compiled_table.loc[i, "Height 2"] == 0: + if pd.isnull(compiled_table.loc[i, "Height 2"]): compiled_table.loc[i, "Allele 2"] = compiled_table.loc[i, "Allele 1"] compiled_table = compiled_table[["Marker", "Allele 1", "Allele 2"]] return compiled_table