From 03b89790f91d7f9fd4a2d9a01c0ac694acff483a Mon Sep 17 00:00:00 2001
From: rnmitchell <57150382+rnmitchell@users.noreply.github.com>
Date: Wed, 22 Nov 2023 08:09:41 -0500
Subject: [PATCH] Add LUS+ as possible STR data type (#57)

---
 lusSTR/cli/config.py                          |  10 +-
 lusSTR/scripts/filter_settings.py             | 118 ++++++++++++----
 .../EFM_test_reference_lusplus.csv            |  28 ++++
 .../LUSPlus_sequence_info.csv                 |  24 ++++
 .../test_filtering_EFMoutput_lusplus.csv      |  28 ++++
 ...test_filtering_EFMoutput_sequence_info.csv |  26 ++++
 ...eference.csv => EFM_test_reference_ce.csv} |   0
 ...ut.csv => test_filtering_EFMoutput_ce.csv} |   0
 lusSTR/tests/data/test_stutter_lusp.txt       |  25 ++++
 lusSTR/tests/test_filters.py                  | 132 ++++++++++++++----
 lusSTR/tests/test_suite.py                    |   2 +-
 lusSTR/wrappers/filter.py                     |  72 ++++++----
 12 files changed, 378 insertions(+), 87 deletions(-)
 create mode 100644 lusSTR/tests/data/LUSPlus_stutter_test/EFM_test_reference_lusplus.csv
 create mode 100644 lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
 create mode 100644 lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_lusplus.csv
 create mode 100644 lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv
 rename lusSTR/tests/data/RU_stutter_test/{EFM_test_reference.csv => EFM_test_reference_ce.csv} (100%)
 rename lusSTR/tests/data/RU_stutter_test/{test_filtering_EFMoutput.csv => test_filtering_EFMoutput_ce.csv} (100%)
 create mode 100644 lusSTR/tests/data/test_stutter_lusp.txt

diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py
index a6c3f23d..a972c847 100644
--- a/lusSTR/cli/config.py
+++ b/lusSTR/cli/config.py
@@ -87,8 +87,8 @@ def edit_str_config(config, args):
         data["info"] = False
     if args.reference:
         data["profile_type"] = "reference"
-    if args.ce:
-        data["data_type"] = "ce"
+    if args.datatype:
+        data["data_type"] = args.datatype
     if args.efm:
         data["output_type"] = "efm"
     if args.strand:
@@ -127,7 +127,11 @@ def subparser(subparsers):
         help="Use for creating Reference profiles for STR workflow"
     )
     p.add_argument("--efm", action="store_true",help="Use to create EuroForMix profiles")
-    p.add_argument("--ce", action="store_true", help="Use for CE data")
+    p.add_argument(
+        "--str-type", choices=["ce", "ngs", "lusplus"], default="ngs",
+        dest="datatype", help="Data type for STRs. Options are: CE allele ('ce'), sequence "
+        "('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.",
+    )
     p.add_argument(
         "--noinfo", action="store_true", 
         help="Use to not create the Sequence Information File in the 'filter' step"
diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index 9edb028b..b6fc13f7 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -37,8 +37,8 @@ def filters(locus_allele_info, locus, locus_reads, datatype, brack_col):
         locus_allele_info = ce_filtering(
             locus_allele_info, locus_reads, metadata, datatype, brack_col
         )
-        if datatype == "ngs":
-            locus_allele_info = same_size_filter(locus_allele_info, metadata)
+        if datatype != "ce":
+            locus_allele_info = same_size_filter(locus_allele_info, metadata, datatype)
     return locus_allele_info
 
 
@@ -146,14 +146,19 @@ def allele_ident(
     locus_allele_info, init_type_all, metadata, ref_allele_reads, i, j, datatype, brack_col
 ):
     quest_al_reads = locus_allele_info.loc[j, "Reads"]
-    ref_allele = float(locus_allele_info.loc[i, "CE_Allele"])
-    question_allele = float(locus_allele_info.loc[j, "CE_Allele"])
-    if datatype == "ngs":
-        ref_bracket = locus_allele_info.loc[i, brack_col]
-        question_bracket = locus_allele_info.loc[j, brack_col]
+    if datatype == "lusplus":
+        ref_allele = float(locus_allele_info.loc[i, "LUS_Plus"].split("_")[0])
+        question_allele = float(locus_allele_info.loc[j, "LUS_Plus"].split("_")[0])
+        ref_altformat = locus_allele_info.loc[i, "LUS_Plus"]
+        question_altformat = locus_allele_info.loc[j, "LUS_Plus"]
     else:
-        ref_bracket = None
-        question_bracket = None
+        ref_allele = float(locus_allele_info.loc[i, "CE_Allele"])
+        question_allele = float(locus_allele_info.loc[j, "CE_Allele"])
+        ref_altformat = None
+        question_altformat = None
+        if datatype == "ngs":
+            ref_altformat = locus_allele_info.loc[i, brack_col]
+            question_altformat = locus_allele_info.loc[j, brack_col]
     locus_allele_info.loc[j, ["allele_type", "perc_stutter"]] = allele_type(
         ref_allele,
         question_allele,
@@ -163,8 +168,8 @@ def allele_ident(
         ref_allele_reads,
         locus_allele_info.loc[j, "allele1_ref_reads"],
         locus_allele_info,
-        ref_bracket,
-        question_bracket,
+        ref_altformat,
+        question_altformat,
         datatype,
         brack_col,
     )
@@ -172,13 +177,13 @@ def allele_ident(
         if "/" in locus_allele_info.loc[j, "allele_type"] and pd.isnull(
             locus_allele_info.loc[j, "parent_allele2"]
         ):
-            stut_allele2 = ref_allele if datatype == "ce" else ref_bracket
+            stut_allele2 = ref_allele if datatype == "ce" else ref_altformat
             locus_allele_info.loc[j, ["parent_allele2", "allele2_ref_reads"]] = [
                 stut_allele2,
                 ref_allele_reads,
             ]
         elif pd.isnull(locus_allele_info.loc[j, "parent_allele1"]):
-            stut_allele1 = ref_allele if datatype == "ce" else ref_bracket
+            stut_allele1 = ref_allele if datatype == "ce" else ref_altformat
             locus_allele_info.loc[j, ["parent_allele1", "allele1_ref_reads"]] = [
                 stut_allele1,
                 ref_allele_reads,
@@ -263,8 +268,8 @@ def allele_type(
     ref_reads,
     al1_ref_reads,
     all_type_df,
-    ref_bracket,
-    question_bracket,
+    ref_altformat,
+    question_altformat,
     datatype,
     brack_col,
 ):
@@ -275,8 +280,16 @@ def allele_type(
     allele_diff = round(ref - ce, 1)
     if allele_diff == 1 and ref_reads > quest_al_reads:  # -1 stutter
         if (
-            datatype == "ngs" and bracketed_stutter_id(ref_bracket, question_bracket, -1) == -1
-        ) or datatype == "ce":
+            (
+                datatype == "ngs"
+                and bracketed_stutter_id(ref_altformat, question_altformat, -1) == -1
+            )
+            or datatype == "ce"
+            or (
+                datatype == "lusplus"
+                and lusplus_stutter_id(ref_altformat, question_altformat, -1) == -1
+            )
+        ):
             all_type, stut_perc = minus1_stutter(
                 all_type,
                 stutter_thresh,
@@ -287,11 +300,19 @@ def allele_type(
                 quest_al_reads,
             )
     elif allele_diff == 2 and ref_reads > quest_al_reads:  # -2 stutter
-        allele = ce if datatype == "ce" else question_bracket
+        allele = ce if datatype == "ce" else question_altformat
         if check_2stutter(all_type_df, datatype, allele, brack_col)[0] is True:
             if (
-                datatype == "ngs" and bracketed_stutter_id(ref_bracket, question_bracket, -2) == -2
-            ) or datatype == "ce":
+                (
+                    datatype == "ngs"
+                    and bracketed_stutter_id(ref_altformat, question_altformat, -2) == -2
+                )
+                or datatype == "ce"
+                or (
+                    datatype == "lusplus"
+                    and lusplus_stutter_id(ref_altformat, question_altformat, -2) == -2
+                )
+            ):
                 ref_reads = check_2stutter(all_type_df, datatype, allele, brack_col)[1]
                 stutter_thresh_reads = stutter_thresh * ref_reads
                 all_type, stut_perc = minus2_stutter(
@@ -305,8 +326,13 @@ def allele_type(
                 )
     elif allele_diff == -1 and ref_reads > quest_al_reads:  # +1 stutter
         if (
-            datatype == "ngs" and bracketed_stutter_id(ref_bracket, question_bracket, 1) == 1
-        ) or datatype == "ce":
+            (datatype == "ngs" and bracketed_stutter_id(ref_altformat, question_altformat, 1) == 1)
+            or datatype == "ce"
+            or (
+                datatype == "lusplus"
+                and lusplus_stutter_id(ref_altformat, question_altformat, 1) == 1
+            )
+        ):
             all_type, stut_perc = plus1_stutter(
                 all_type, stutter_thresh, forward_thresh, ref_reads, al1_ref_reads, quest_al_reads
             )
@@ -342,9 +368,14 @@ def check_2stutter(stutter_df, allele_des, allele, brack_col):
                     break
         else:
             for k, row in stutter_df.iterrows():
-                bracket_test = stutter_df.loc[k, brack_col]
-                if bracketed_stutter_id(bracket_test, allele, -1) == -1:
-                    is_true, reads = True, stutter_df.loc[k, "Reads"]
+                if allele_des == "ngs":
+                    bracket_test = stutter_df.loc[k, brack_col]
+                    if bracketed_stutter_id(bracket_test, allele, -1) == -1:
+                        is_true, reads = True, stutter_df.loc[k, "Reads"]
+                else:
+                    lusp_test = stutter_df.loc[k, "LUS_Plus"]
+                    if lusplus_stutter_id(lusp_test, allele, -1) == -1:
+                        is_true, reads = True, stutter_df.loc[k, "Reads"]
     return is_true, reads
 
 
@@ -409,6 +440,26 @@ def bracketed_stutter_id(ref_bracket, quest_bracket, stutter_id):
     return stutter
 
 
+def lusplus_stutter_id(ref_lusp, question_lusp, stutter_id):
+    diffcount = 0
+    stutter = None
+    for j in range(1, len(ref_lusp.split("_"))):
+        diff = float(question_lusp.split("_")[j]) - round(float(ref_lusp.split("_")[j]))
+        if diff == 0:
+            continue
+        elif diff == stutter_id:
+            if diffcount == 0:
+                diffcount = diff
+                stutter = stutter_id
+            else:
+                stutter = None
+                break
+        else:
+            stutter = None
+            break
+    return stutter
+
+
 def allele_imbalance_check(allele_df):
     imbalance_df = pd.DataFrame(columns=["SampleID", "Locus", "Flags"])
     try:
@@ -430,10 +481,13 @@ def allele_imbalance_check(allele_df):
     return imbalance_df
 
 
-def check_D7(allele_df):
+def check_D7(allele_df, datatype):
     D7_df = pd.DataFrame(columns=["SampleID", "Locus", "Flags"])
     for i in range(len(allele_df)):
-        al = allele_df.loc[i, "CE_Allele"]
+        if datatype == "lusplus":
+            al = allele_df.loc[i, "LUS_Plus"].split("_")[0]
+        else:
+            al = allele_df.loc[i, "CE_Allele"]
         try:
             if str(al).split(".")[1] == "1" and allele_df.loc[i, "allele_type"] != "BelowAT":
                 print("D7 microvariants detected! Check flagged file for details.")
@@ -447,16 +501,18 @@ def check_D7(allele_df):
     return D7_df
 
 
-def flags(allele_df):
+def flags(allele_df, datatype):
     flags_df = pd.DataFrame(columns=["SampleID", "Locus", "Flags"])
     flags_df = flags_df.append(allele_counts(allele_df))
     flags_df = flags_df.append(allele_imbalance_check(allele_df))
-    flags_df = flags_df.append(check_D7(allele_df))
+    flags_df = flags_df.append(check_D7(allele_df, datatype))
     return flags_df
 
 
-def same_size_filter(df, metadata):
+def same_size_filter(df, metadata, datatype):
     final_df = pd.DataFrame()
+    if datatype == "lusplus":
+        df["CE_Allele"] = df["LUS_Plus"].apply(lambda x: x.split("_")[0])
     al_list = df["CE_Allele"].unique()
     for ce_allele in al_list:
         df_filt = (
@@ -475,4 +531,6 @@ def same_size_filter(df, metadata):
         else:
             final_df = final_df.append(df_filt)
     final_df = final_df.reset_index(drop=True)
+    if datatype == "lusplus":
+        final_df = final_df.drop("CE_Allele", axis=1)
     return final_df
diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/EFM_test_reference_lusplus.csv b/lusSTR/tests/data/LUSPlus_stutter_test/EFM_test_reference_lusplus.csv
new file mode 100644
index 00000000..dd94b709
--- /dev/null
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/EFM_test_reference_lusplus.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2
+Positive_Control,CSF1PO,12_12_0,12_12_0
+Positive_Control,D10S1248,13_13,15_15
+Positive_Control,D12S391,18_11_6_0,23_14_9_0
+Positive_Control,D13S317,11_12_3_1,9_9_3_1
+Positive_Control,D16S539,13_13_0,9_9_0
+Positive_Control,D17S1301,11_11,12_12
+Positive_Control,D18S51,16_16_1,18_18_1
+Positive_Control,D19S433,13_11_1_0,14_12_1_0
+Positive_Control,D1S1656,12_11_1_0,13_13_0_0
+Positive_Control,D20S482,14_14,15_15
+Positive_Control,D21S11,29_11_4_6,31.2_11_5_6
+Positive_Control,D22S1045,16_13,16_13
+Positive_Control,D2S1338,22_12_1_7,25_15_1_7
+Positive_Control,D2S441,10_10_0,14_11_0
+Positive_Control,D3S1358,17_13_3,18_14_3
+Positive_Control,D4S2408,9_9_0,9_9_0
+Positive_Control,D5S818,12_12,12_12
+Positive_Control,D6S1043,12_12_0,20_14_1
+Positive_Control,D7S820,11_11_1_0,8_8_1_0
+Positive_Control,D8S1179,14_12_1_0,15_12_1_0
+Positive_Control,D9S1122,12_10,12_12
+Positive_Control,FGA,20_12_3_0,23_15_3_0
+Positive_Control,PENTA D,12_12,13_13
+Positive_Control,PENTA E,14_14,7_7
+Positive_Control,TH01,6_6,9.3_6
+Positive_Control,TPOX,11_11,11_11
+Positive_Control,VWA,16_12_3_1,19_14_4_1
diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
new file mode 100644
index 00000000..a093c1fd
--- /dev/null
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
@@ -0,0 +1,24 @@
+SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele
+Sample1,D4S2408,10_10_0,1022,real_allele,,,,,,,
+Sample1,D4S2408,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,,
+Sample1,D4S2408,8_8_0,1050,real_allele,,,,,,,
+Sample1,D8S1179,14_12_1_0,869,real_allele,,,,,,,
+Sample1,D8S1179,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212,
+Sample1,D8S1179,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201,
+Sample1,D9S1122,13_11,948,real_allele,,,,,,,
+Sample1,D9S1122,12_10,108,-1_stutter,13_11,,948.0,,,0.114,
+Sample1,D9S1122,11_11,991,real_allele,,,,,,,
+Sample1,D9S1122,10_10,87,-1_stutter,11_11,,991.0,,,0.088,
+Sample1,FGA,23_15_3_0,1436,real_allele,,,,,,,
+Sample1,FGA,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182,
+Sample1,FGA,21_13_3_0,48,BelowAT,,,,,0.013,,
+Sample1,FGA,20_12_3_0,1750,real_allele,,,,,,,
+Sample1,FGA,18_10_3_0,181,real_allele,,,,,,,
+Sample1,FGA,17_9_3_0,15,BelowAT,,,,,0.004,,
+Sample1,PENTA D,15_15,50,real_allele,,,,,,,
+Sample1,PENTA D,13_13,1000,real_allele,,,,,,,
+Sample1,PENTA E,7_7,505,real_allele,,,,,,,7.0
+Sample1,TH01,7_7,2197,real_allele,,,,,,,
+Sample1,TH01,6_6,1632,real_allele,,,,,,,
+Sample1,TH01,5_5,66,BelowAT,,,,,0.017,,
+Sample1,TPOX,11_11,15,BelowAT,,,,,1.0,,11.0
diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_lusplus.csv b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_lusplus.csv
new file mode 100644
index 00000000..5ebcafee
--- /dev/null
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_lusplus.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2,Allele3,Allele4,Height1,Height2,Height3,Height4
+Sample1,CSF1PO,,,,,,,,
+Sample1,D10S1248,,,,,,,,
+Sample1,D12S391,,,,,,,,
+Sample1,D13S317,,,,,,,,
+Sample1,D16S539,,,,,,,,
+Sample1,D17S1301,,,,,,,,
+Sample1,D18S51,,,,,,,,
+Sample1,D19S433,,,,,,,,
+Sample1,D1S1656,,,,,,,,
+Sample1,D20S482,,,,,,,,
+Sample1,D21S11,,,,,,,,
+Sample1,D22S1045,,,,,,,,
+Sample1,D2S1338,,,,,,,,
+Sample1,D2S441,,,,,,,,
+Sample1,D3S1358,,,,,,,,
+Sample1,D4S2408,10_10_0,8_8_0,9_9_0,,900,1000,1357,
+Sample1,D5S818,,,,,,,,
+Sample1,D6S1043,,,,,,,,
+Sample1,D7S820,,,,,,,,
+Sample1,D8S1179,12_9_1_0,13_10_1_0,13_11_1_0,14_11_1_0,26,89,95,739
+Sample1,D9S1122,10_10,11_11,12_10,13_11,87,991,108,948
+Sample1,FGA,18_10_3_0,20_12_3_0,22_14_3_0,23_15_3_0,181,1750,262,1436
+Sample1,PENTA D,13_13,15_15,,,1000,50,,
+Sample1,PENTA E,7_7,,,,505,,,
+Sample1,TH01,6_6,7_7,,,1632,2197,,
+Sample1,TPOX,,,,,,,,
+Sample1,VWA,,,,,,,,
diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv
new file mode 100644
index 00000000..9016d7a3
--- /dev/null
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv
@@ -0,0 +1,26 @@
+SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele
+Sample1,D4S2408,10_10_0,900.0,real_allele,,,,,,,
+Sample1,D4S2408,9_9_0,1357.0,real_allele,,,,,,,
+Sample1,D4S2408,8_8_0,1000.0,real_allele,,,,,,,
+Sample1,D8S1179,14_11_1_0,739.0,real_allele,,,,,,,
+Sample1,D8S1179,13_11_1_0,95.0,real_allele,,,,,,,
+Sample1,D8S1179,13_10_1_0,89.0,-1_stutter,14_11_1_0,,739.0,,,0.12,
+Sample1,D8S1179,12_9_1_0,26.0,real_allele,,,,,,,
+Sample1,D8S1179,12_10_1_0,11.0,BelowAT,,,,,0.01,,
+Sample1,D9S1122,13_11,948.0,real_allele,,,,,,,
+Sample1,D9S1122,12_10,108.0,-1_stutter,13_11,,948.0,,,0.114,
+Sample1,D9S1122,11_11,991.0,real_allele,,,,,,,
+Sample1,D9S1122,10_10,87.0,-1_stutter,11_11,,991.0,,,0.088,
+Sample1,FGA,23_15_3_0,1436.0,real_allele,,,,,,,
+Sample1,FGA,22_14_3_0,262.0,-1_stutter,23_15_3_0,,1436.0,,,0.182,
+Sample1,FGA,21_13_3_0,48.0,BelowAT,,,,,0.013,,
+Sample1,FGA,20_12_3_0,1750.0,real_allele,,,,,,,
+Sample1,FGA,18_10_3_0,181.0,real_allele,,,,,,,
+Sample1,FGA,17_9_3_0,15.0,BelowAT,,,,,0.004,,
+Sample1,PENTA D,15_15,50.0,real_allele,,,,,,,
+Sample1,PENTA D,13_13,1000.0,real_allele,,,,,,,
+Sample1,PENTA E,7_7,505.0,real_allele,,,,,,,7.0
+Sample1,TH01,7_7,2197.0,real_allele,,,,,,,
+Sample1,TH01,6_6,1632.0,real_allele,,,,,,,
+Sample1,TH01,5_5,66.0,BelowAT,,,,,0.017,,
+Sample1,TPOX,11_11,15.0,BelowAT,,,,,1.0,,11.0
diff --git a/lusSTR/tests/data/RU_stutter_test/EFM_test_reference.csv b/lusSTR/tests/data/RU_stutter_test/EFM_test_reference_ce.csv
similarity index 100%
rename from lusSTR/tests/data/RU_stutter_test/EFM_test_reference.csv
rename to lusSTR/tests/data/RU_stutter_test/EFM_test_reference_ce.csv
diff --git a/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput.csv b/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_ce.csv
similarity index 100%
rename from lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput.csv
rename to lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_ce.csv
diff --git a/lusSTR/tests/data/test_stutter_lusp.txt b/lusSTR/tests/data/test_stutter_lusp.txt
new file mode 100644
index 00000000..239b9c5c
--- /dev/null
+++ b/lusSTR/tests/data/test_stutter_lusp.txt
@@ -0,0 +1,25 @@
+SampleID	Project	Analysis	Locus	UAS_Output_Sequence	Forward_Strand_Sequence	UAS_Output_Bracketed_Notation	Forward_Strand_Bracketed_Notation	CE_Allele	LUS	LUS_Plus	Reads
+Sample1	Experiment_1.1	Experiment1	D8S1179	TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA	TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA	TCTA TCTG [TCTA]12	TCTA TCTG [TCTA]12	14	14_12	14_12_1_0	869
+Sample1	Experiment_1.1	Experiment1	D8S1179	TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA	TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA	[TCTA]2 TCTG [TCTA]10	[TCTA]2 TCTG [TCTA]10	13	13_11	13_11_1_0	184
+Sample1	Experiment_1.1	Experiment1	D8S1179	TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA	TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA	[TCTA]2 TCTG [TCTA]9	[TCTA]2 TCTG [TCTA]9	12	12_10	12_10_1_0	37
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]12 AGAA AAAA [GAAA]3	20	20_12	20_12_3_0	1750
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]15 AGAA AAAA [GAAA]3	23	23_15	23_15_3_0	1436
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]14 AGAA AAAA [GAAA]3	22	22_14	22_14_3_0	262
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]13 AGAA AAAA [GAAA]3	21	21_13	21_13_3_0	48
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]10 AGAA AAAA [GAAA]3	18	18_10	18_10_3_0	181
+Sample1	Experiment_1.1	Experiment1	FGA	TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC	GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA	[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2	[GGAA]2 GGAG [AAAG]9 AGAA AAAA [GAAA]3	17	17_9	17_9_3_0	15
+Sample1	Experiment_1.1	Experiment1	TH01	AATGAATGAATGAATGAATGAATGAATG	AATGAATGAATGAATGAATGAATGAATG	[AATG]7	[AATG]7	7	7_7	7_7	2197
+Sample1	Experiment_1.1	Experiment1	TH01	AATGAATGAATGAATGAATGAATG	AATGAATGAATGAATGAATGAATG	[AATG]6	[AATG]6	6	6_6	6_6	1632
+Sample1	Experiment_1.1	Experiment1	TH01	AATGAATGAATGAATGAATG	AATGAATGAATGAATGAATG	[AATG]5	[AATG]5	5	5_5	5_5	66
+Sample1	Experiment_1.1	Experiment1	D9S1122	TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	[TAGA]11	[TAGA]11	11	11_11	11_11	991
+Sample1	Experiment_1.1	Experiment1	D9S1122	TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGA TCGA [TAGA]11	TAGA TCGA [TAGA]11	13	13_11	13_11	948
+Sample1	Experiment_1.1	Experiment1	D9S1122	TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGA TCGA [TAGA]10	TAGA TCGA [TAGA]10	12	12_10	12_10	108
+Sample1	Experiment_1.1	Experiment1	D9S1122	TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA	[TAGA]10	[TAGA]10	10	10_10	10_10	87
+Sample1	Experiment_1.1	Experiment1	VWA	TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA	TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA	TCTA [TCTG]3 [TCTA]12 TCCA TCTA	TAGA TGGA [TAGA]12 [CAGA]3 TAGA	16	16_12	16_12_3_1	6
+Sample1	Experiment_1.1	Experiment1	TPOX	AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG	AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG	[AATG]11	[AATG]11	11	11_11	11_11	15
+Sample1	Experiment_1.1	Experiment1	PENTA E	AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA	TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT	[AAAGA]7	[TCTTT]7	7	7_7	7_7	505
+Sample1	Experiment_1.1	Experiment1	PENTA D	AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA	AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA	AAAAG [AAAGA]13	AAAAG [AAAGA]13	13	13_13	13_13	1000
+Sample1	Experiment_1.1	Experiment1	PENTA D	AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA	AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA	AAAAG [AAAGA]13	AAAAG [AAAGA]13	15	15_15	15_15	50
+Sample1	Experiment_1.1	Experiment1	D4S2408	ATCTATCTATCTATCTATCTATCTATCTATCTATCT	ATCTATCTATCTATCTATCTATCTATCTATCTATCT	[ATCT]9	[ATCT]9	9	9_9	9_9_0	116
+Sample1	Experiment_1.1	Experiment1	D4S2408	ATCTATCTATCTATCTATCTATCTATCTATCT	ATCTATCTATCTATCTATCTATCTATCTATCT	[ATCT]8	[ATCT]8	8	8_8	8_8_0	1050
+Sample1	Experiment_1.1	Experiment1	D4S2408	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[ATCT]10	[ATCT]10	10	10_10	10_10_0	1022
\ No newline at end of file
diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py
index 96b9cf8b..d5a85681 100644
--- a/lusSTR/tests/test_filters.py
+++ b/lusSTR/tests/test_filters.py
@@ -157,14 +157,28 @@ def test_plus1stutter(
     assert test_stut_perc == stut_perc
 
 
-def test_EFMoutput_format(tmp_path):
+@pytest.mark.parametrize(
+    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+)
+def test_EFMoutput_format(outputdir, datatype, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_stutter.txt")
-    exp_out = data_file("RU_stutter_test/test_filtering_EFMoutput.csv")
-    exp_info_out = data_file("RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv")
-    obs_out = str(tmp_path / "WD/test_output/test_output_evidence_ce.csv")
+    exp_out = data_file(f"{outputdir}test_filtering_EFMoutput_{datatype}.csv")
+    exp_info_out = data_file(f"{outputdir}test_filtering_EFMoutput_sequence_info.csv")
+    obs_out = str(tmp_path / f"WD/test_output/test_output_evidence_{datatype}.csv")
     obs_info_out = str(tmp_path / "WD/test_output/test_output_sequence_info.csv")
-    arglist = ["config", "-w", str_path, "-o", "test_output", "--efm", "--ce", "--input", "WD"]
+    arglist = [
+        "config",
+        "-w",
+        str_path,
+        "-o",
+        "test_output",
+        "--efm",
+        "--str-type",
+        datatype,
+        "--input",
+        "WD",
+    ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "test_output.csv"))
     shutil.copyfile(inputfile, os.path.join(str_path, "test_output.txt"))
@@ -183,10 +197,17 @@ def test_STRmixoutput_format(outputdir, datatype, tmp_path):
     exp_info_out = data_file(f"{outputdir}STRmix_Files_sequence_info.csv")
     obs_out = str(tmp_path / f"WD/STRmix_Files/Sample1_evidence_{datatype}.csv")
     obs_info_out = str(tmp_path / f"WD/STRmix_Files/STRmix_Files_sequence_info.csv")
-    if datatype == "ngs":
-        arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files"]
-    else:
-        arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files", "--ce"]
+    arglist = [
+        "config",
+        "-w",
+        str_path,
+        "--input",
+        "WD",
+        "-o",
+        "STRmix_Files",
+        "--str-type",
+        datatype,
+    ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.csv"))
     shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.txt"))
@@ -224,19 +245,30 @@ def test_flags(tmp_path):
     assert filecmp.cmp(exp_out, obs_out) is True
 
 
-def test_efm_reference(tmp_path):
+@pytest.mark.parametrize(
+    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+)
+def test_efm_reference(outputdir, datatype, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_references.txt")
-    exp_out = data_file("RU_stutter_test/EFM_test_reference.csv")
-    obs_efm_out = str(tmp_path / "WD/lusstr_output/lusstr_output_reference_ce.csv")
-    arglist = ["config", "-w", str_path, "--input", "WD", "--efm", "--reference", "--ce"]
+    exp_out = data_file(f"{outputdir}EFM_test_reference_{datatype}.csv")
+    obs_efm_out = str(tmp_path / f"WD/lusstr_output/lusstr_output_reference_{datatype}.csv")
+    arglist = [
+        "config",
+        "-w",
+        str_path,
+        "--input",
+        "WD",
+        "--efm",
+        "--reference",
+        "--str-type",
+        datatype,
+    ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.csv"))
     shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.txt"))
-    print(os.listdir(str_path))
     all_arglist = ["strs", "all", "-w", str_path]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist))
-    print(os.listdir(f"{str_path}/lusstr_output"))
     assert filecmp.cmp(exp_out, obs_efm_out) is True
 
 
@@ -248,20 +280,18 @@ def test_strmix_reference(outputdir, datatype, tmp_path):
     inputfile = data_file("test_references.txt")
     exp_out = data_file(f"{outputdir}Positive_Control_reference_{datatype}.csv")
     obs_out = str(tmp_path / f"WD/STRmix_Files/Positive_Control_reference_{datatype}.csv")
-    if datatype == "ngs":
-        arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files", "--reference"]
-    else:
-        arglist = [
-            "config",
-            "-w",
-            str_path,
-            "--input",
-            "WD",
-            "-o",
-            "STRmix_Files",
-            "--ce",
-            "--reference",
-        ]
+    arglist = [
+        "config",
+        "-w",
+        str_path,
+        "--input",
+        "WD",
+        "-o",
+        "STRmix_Files",
+        "--reference",
+        "--str-type",
+        datatype,
+    ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.csv"))
     shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.txt"))
@@ -304,6 +334,23 @@ def test_ngs_stutter(ref_bracket, quest_bracket, stutter, actual_call):
     assert test_stut == actual_call
 
 
+@pytest.mark.parametrize(
+    "ref_lusp, question_lusp, stutter, actual_call",
+    [
+        ("10_10_1_0", "9_9_1_0", -1, -1),
+        ("10_10_1_0", "9_10_0_0", -1, -1),
+        ("12_11_1_0", "11_9_2_0", -1, None),
+        ("19_13_1", "20_14_1", 1, 1),
+        ("19_13_1", "20_15_0", 1, None),
+        ("19_13_1", "17_11_1", -2, -2),
+        ("19_13_1", "17_12_0", -2, None),
+    ],
+)
+def test_lusplus_stutter(ref_lusp, question_lusp, stutter, actual_call):
+    test_stut = lusSTR.scripts.filter_settings.lusplus_stutter_id(ref_lusp, question_lusp, stutter)
+    assert test_stut == actual_call
+
+
 def test_forward_strand_orientation(tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_stutter.txt")
@@ -326,3 +373,30 @@ def test_forward_strand_orientation(tmp_path):
     all_arglist = ["strs", "all", "-w", str_path]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist))
     assert filecmp.cmp(exp_out, obs_out) is True
+
+
+def test_lusplus_sequence_info(tmp_path):
+    str_path = str(tmp_path / "WD")
+    inputfile = data_file("test_stutter_lusp.txt")
+    exp_out = data_file("LUSPlus_stutter_test/LUSPlus_sequence_info.csv")
+    obs_out = str(tmp_path / f"WD/LUSPlus/LUSPlus_sequence_info.csv")
+    arglist = [
+        "config",
+        "-w",
+        str_path,
+        "--input",
+        "WD",
+        "-o",
+        "LUSPlus",
+        "--strand",
+        "forward",
+        "--str-type",
+        "lusplus",
+        "--efm",
+    ]
+    lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
+    shutil.copyfile(inputfile, os.path.join(str_path, "LUSPlus.csv"))
+    shutil.copyfile(inputfile, os.path.join(str_path, "LUSPlus.txt"))
+    all_arglist = ["strs", "all", "-w", str_path]
+    lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist))
+    assert filecmp.cmp(exp_out, obs_out) is True
diff --git a/lusSTR/tests/test_suite.py b/lusSTR/tests/test_suite.py
index 8cfcef4c..7ce41c1e 100644
--- a/lusSTR/tests/test_suite.py
+++ b/lusSTR/tests/test_suite.py
@@ -254,7 +254,7 @@ def test_config(tmp_path):
 
 def test_config_settings(tmp_path):
     obs_config = str(tmp_path / "config.yaml")
-    arglist = ["config", "-w", str(tmp_path), "--straitrazor", "--reference", "--ce"]
+    arglist = ["config", "-w", str(tmp_path), "--straitrazor", "--reference", "--str-type", "ce"]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     with open(obs_config, "r") as file:
         data = yaml.safe_load(file)
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index c0e828e3..790b58f3 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -77,9 +77,10 @@ def process_strs(dict_loc, datatype, seq_col):
             data_combine = data.groupby(["SampleID", "Locus", "CE_Allele"], as_index=False)[
                 "Reads"
             ].sum()
-            data_order = data_combine.sort_values(by=["CE_Allele"], ascending=False).reset_index(
-                drop=True
-            )
+        elif datatype == "lusplus":
+            data_combine = data.groupby(
+                ["SampleID", "Locus", "CE_Allele", "LUS_Plus"], as_index=False
+            )["Reads"].sum()
         else:
             data_combine = data[
                 [
@@ -91,9 +92,9 @@ def process_strs(dict_loc, datatype, seq_col):
                     "Reads",
                 ]
             ]
-            data_order = data_combine.sort_values(by=["CE_Allele"], ascending=False).reset_index(
-                drop=True
-            )
+        data_order = data_combine.sort_values(by=["CE_Allele"], ascending=False).reset_index(
+            drop=True
+        )
         total_reads = data_order["Reads"].sum()
         locus = key[1]
         data_order = data_order.reindex(
@@ -111,28 +112,43 @@ def process_strs(dict_loc, datatype, seq_col):
         )
         filtered_df = filters(data_order, locus, total_reads, datatype, brack_col)
         final_df = final_df.append(filtered_df)
-        flags_df = flags_df.append(flags(filtered_df))
-    final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
+        flags_df = flags_df.append(flags(filtered_df, datatype))
+    if datatype == "ce" or datatype == "ngs":
+        final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
     return final_df, flags_df
 
 
-def EFM_output(profile, outfile, profile_type, separate=False):
+def EFM_output(profile, outfile, profile_type, data_type, separate=False):
     if profile_type == "reference":
         profile = profile[profile.allele_type == "real_allele"]
     else:
         profile = profile[profile.allele_type != "BelowAT"]
-    efm_profile = populate_efm_profile(profile)
+    efm_profile = populate_efm_profile(profile, data_type)
     if separate:
-        write_sample_specific_efm_profiles(efm_profile, profile_type, outfile)
+        write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outfile)
     else:
-        write_aggregate_efm_profile(efm_profile, profile_type, outfile)
+        write_aggregate_efm_profile(efm_profile, profile_type, data_type, outfile)
 
 
-def populate_efm_profile(profile):
-    profile = profile.sort_values(by=["SampleID", "Locus", "CE_Allele"])
+def populate_efm_profile(profile, data_type):
+    if data_type == "ce":
+        prof_col = "CE_Allele"
+    elif data_type == "lusplus":
+        prof_col = "LUS_Plus"
+    else:
+        message = (
+            f"Incorrect data type {data_type} specified for EFM. Please choose either "
+            "'ce' or 'lusplus'."
+        )
+        raise ValueError(message)
+    profile = profile.sort_values(by=["SampleID", "Locus", prof_col])
+    profile = profile.rename(columns={prof_col: "Allele"})
     allele_heights = defaultdict(lambda: defaultdict(dict))
     for i, row in profile.iterrows():
-        allele_heights[row.SampleID][row.Locus][float(row.CE_Allele)] = int(row.Reads)
+        if data_type == "ce":
+            allele_heights[row.SampleID][row.Locus][float(row.Allele)] = int(row.Reads)
+        else:
+            allele_heights[row.SampleID][row.Locus][row.Allele] = int(row.Reads)
     max_num_alleles = determine_max_num_alleles(allele_heights)
     reformatted_profile = list()
     for sampleid, loci in allele_heights.items():
@@ -161,13 +177,13 @@ def populate_efm_profile(profile):
     return efm_profile
 
 
-def write_sample_specific_efm_profiles(efm_profile, profile_type, outdir):
+def write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outdir):
     Path(outdir).mkdir(parents=True, exist_ok=True)
     for sample in efm_profile.SampleName:
         sample_profile = efm_profile[efm_profile.SampleName == sample].reset_index(drop=True)
         sample_profile.dropna(axis=1, how="all", inplace=True)
         if profile_type == "evidence":
-            sample_profile.to_csv(f"{outdir}/{sample}_evidence_ce.csv", index=False)
+            sample_profile.to_csv(f"{outdir}/{sample}_evidence_{data_type}.csv", index=False)
         else:
             num_alleles = (len(sample_profile.columns) - 2) / 2
             if num_alleles > 2:
@@ -180,19 +196,21 @@ def write_sample_specific_efm_profiles(efm_profile, profile_type, outdir):
             for i in range(len(sample_profile)):
                 if pd.isna(sample_profile.loc[i, "Allele2"]):
                     sample_profile.loc[i, "Allele2"] = sample_profile.loc[i, "Allele1"]
-            sample_profile.iloc[:, :4].to_csv(f"{outdir}/{sample}_reference_ce.csv", index=False)
+            sample_profile.iloc[:, :4].to_csv(
+                f"{outdir}/{sample}_reference_{data_type}.csv", index=False
+            )
 
 
-def write_aggregate_efm_profile(efm_profile, profile_type, outfile):
+def write_aggregate_efm_profile(efm_profile, profile_type, data_type, outfile):
     Path(outfile).mkdir(parents=True, exist_ok=True)
     name = os.path.basename(outfile)
     if profile_type == "evidence":
-        efm_profile.to_csv(f"{outfile}/{name}_evidence_ce.csv", index=False)
+        efm_profile.to_csv(f"{outfile}/{name}_evidence_{data_type}.csv", index=False)
     else:
         for i in range(len(efm_profile)):
             if pd.isna(efm_profile.loc[i, "Allele2"]):
                 efm_profile.loc[i, "Allele2"] = efm_profile.loc[i, "Allele1"]
-        efm_profile.iloc[:, :4].to_csv(f"{outfile}/{name}_reference_ce.csv", index=False)
+        efm_profile.iloc[:, :4].to_csv(f"{outfile}/{name}_reference_{data_type}.csv", index=False)
 
 
 def determine_max_num_alleles(allele_heights):
@@ -212,6 +230,12 @@ def STRmix_output(profile, outdir, profile_type, data_type, seq_col):
         filtered_df = profile[profile.allele_type != "BelowAT"]
     if data_type == "ce":
         strmix_profile = strmix_ce_processing(filtered_df)
+    elif data_type == "lusplus":
+        error_message = (
+            "LUSPlus specified as the data type. STRmix does not accept LUSPlus! Please rerun with "
+            "'ce' or 'ngs' specified."
+        )
+        raise ValueError(error_message)
     else:
         strmix_profile = filtered_df.loc[:, ["SampleID", "Locus", "CE_Allele", seq_col, "Reads"]]
         strmix_profile.rename(
@@ -302,7 +326,7 @@ def main(
     input = str(input)
     if profile_type not in ("evidence", "reference"):
         raise ValueError(f"unknown profile type '{profile_type}'")
-    if data_type not in ("ce", "ngs"):
+    if data_type not in ("ce", "ngs", "lusplus"):
         raise ValueError(f"unknown data type '{data_type}'")
     if output_type not in ("efm", "strmix"):
         raise ValueError(f"unknown output type '{output_type}'")
@@ -315,14 +339,14 @@ def main(
     if nofilters:
         full_df["allele_type"] = "real_allele"
         if output_type == "efm":
-            EFM_output(full_df, outpath, profile_type, separate)
+            EFM_output(full_df, outpath, profile_type, data_type, separate)
         else:
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col)
         if output_type == "efm":
-            EFM_output(final_df, outpath, profile_type, separate)
+            EFM_output(final_df, outpath, profile_type, data_type, separate)
         else:
             STRmix_output(final_df, outpath, profile_type, data_type, seq_col)
         if info: