From 68c2543acab72a0d86611b58623c748a1eb67235 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 11 Jul 2023 13:12:52 -0400
Subject: [PATCH] filtering of untyped alleles and below AT

---
 lusSTR/data/snp_config.yaml     | 6 +++---
 lusSTR/wrappers/snps_convert.py | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/lusSTR/data/snp_config.yaml b/lusSTR/data/snp_config.yaml
index 331ae3b8..ee31151c 100644
--- a/lusSTR/data/snp_config.yaml
+++ b/lusSTR/data/snp_config.yaml
@@ -5,13 +5,13 @@
 uas: True  ## True/False; if ran through UAS
 samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd
 output: "lusstr_output" ## output file/directory name; Example: "test_030923"
-
-## convert settings
 kit: "sigprep" ## sigprep/kintelligence 
+
+## format settings
 types: "i" ## choices are "all", "i" (identity SNPs only), "p" (phenotype only), "a" (ancestry only) or any combination
 nofilter: False ## True/False if no filtering is desired; if False, will remove any allele designated as Not Typed
 
-## format settings
+## convert settings
 strand: "forward" ## forward/uas; indicates which oritentation to report the alleles for the ForenSeq SNPs; uas indicates the orientation as reported by the UAS or the forward strand
 references: "" ## list IDs of the samples to be run as references in EFM
 separate: false ## True/False; if want to separate samples into individual files for use in EFM
diff --git a/lusSTR/wrappers/snps_convert.py b/lusSTR/wrappers/snps_convert.py
index 8239de83..2461181c 100644
--- a/lusSTR/wrappers/snps_convert.py
+++ b/lusSTR/wrappers/snps_convert.py
@@ -25,7 +25,9 @@ def create_output_table(sample_df, orientation, separate, output_type, uas):
         allele_col = "Forward_Strand_Allele"
     all_samples_df = pd.DataFrame()
     for sample in sample_df["SampleID"].unique():
-        indiv_df = sample_df[sample_df["SampleID"] == sample]
+        indiv_df = sample_df[
+            (sample_df["SampleID"] == sample) & (sample_df["Issues"] != "Contains untyped allele")
+        ]
         compiled_table = create_sample_df(indiv_df, output_type, allele_col)
         if not uas:
             compiled_table = check_allele_calls(compiled_table, output_type)
@@ -47,14 +49,15 @@ def create_sample_df(indiv_df, output_type, all_col):
         .unstack(0)
         .reset_index()
     )
-    print(compiled_table)
+    compiled_table.to_csv("test.csv", index=False)
     try:
         compiled_table.columns = ["Marker", "Allele 1", "Allele 2", "Height 1", "Height 2"]
     except ValueError:
         print("Too many alleles!")
     if output_type == "reference":
+        print(compiled_table)
         for i, row in compiled_table.iterrows():
-            if compiled_table.loc[i, "Height 2"] == 0:
+            if pd.isnull(compiled_table.loc[i, "Height 2"]):
                 compiled_table.loc[i, "Allele 2"] = compiled_table.loc[i, "Allele 1"]
         compiled_table = compiled_table[["Marker", "Allele 1", "Allele 2"]]
     return compiled_table