bioforensics · standage · Dec 11, 2023 · Dec 1, 2023 · Dec 6, 2023 · Dec 6, 2023
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -12,3 +12,4 @@ include lusSTR/tests/data/NGS_stutter_test/*
 include lusSTR/tests/data/kinsnps/*
 include lusSTR/tests/data/lusstr_output/*
 include lusSTR/tests/data/LUSPlus_stutter_test/*
+include lusSTR/tests/data/MPSproto_test/*
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 lusSTR is a tool written in Python to convert NGS sequence data of forensic STR loci to different sequence representations (sequence bracketed form) and allele designations (CE allele, LUS/LUS+ alleles) for ease in downstream analyses. See the below section ```Converting STR sequences to other sequence representations and allele designations``` for more information. 
 
-Further, lusSTR can perform filtering and stutter identification using the CE allele, the LUS+ allele, or the bracketed sequence form for autosomal loci and create files for direct input into two probabilistic genotyping software packages, EuroForMix (CE and LUS+) and STRmix (CE and NGS). 
+Further, lusSTR can perform filtering and stutter identification using the CE allele, the LUS+ allele, or the bracketed sequence form for autosomal loci and create files for direct input into three probabilistic genotyping software packages, EuroForMix (CE and LUS+), MPSproto (NGS), and STRmix (CE and NGS). 
 
 lusSTR also processes SNP data from the Verogen ForenSeq and Kintelligence panels and create evidence and/or reference files for use in EFM. See the below section ```SNP Data Processing``` for more information.
 
@@ -65,12 +65,12 @@ kit: ```forenseq``` (forenseq/powerseq) (invoke the ```--powerseq``` flag if usi
 nocombine: ```False``` (True/False); do not combine identical sequences during the ```convert``` step, if using STRait Razor data. (invoke the ```--nocombine``` flag)  
 
 ### filter settings  
-output_type: ```strmix``` (strmix/efm) (invoke ```--efm``` flag if creating output for EuroForMix)  
+output_type: ```strmix``` (strmix/efm/mpsproto) (indicate using the ```--software``` flag)  
 profile_type: ```evidence``` (evidence/reference) (invoke ```--reference``` flag if creating a reference output file)  
 data_type: ```ngs``` (ce/ngs/lusplus) (indicate using the ```--str-type```)  
 info: ```True``` (True/False); create allele information file (invoke ```--noinfo``` flag to not create the allele information file)  
-separate: ```False``` (True/False); for EFM only, if True will create individual files for samples; if False, will create one file with all samples (invoke ```--separate``` flag to separate EFM output files)  
-nofilters: ```False``` (True/False); skip all filtering steps but still creates EFM/STRmix output files (invoke ```--nofilters``` flag)  
+separate: ```False``` (True/False); for EFM/MPSproto only, if True will create individual files for samples; if False, will create one file with all samples (invoke ```--separate``` flag to separate EFM/MPSproto output files)  
+nofilters: ```False``` (True/False); skip all filtering steps but still creates EFM/MPSproto/STRmix output files (invoke ```--nofilters``` flag)  
 strand: ```uas``` (uas/forward); indicates the strand orientation in which to report the sequence in the final output table for STRmix NGS only (indicate using ```--strand```)
 
 One additional argument can be provided with ```lusstr config```:  
@@ -189,7 +189,7 @@ Each locus is checked for containing greater than 2 alleles (indicating a potent
 
 When using STRmix data, the data type can be specified using the ```data-type``` setting as either ```ce```, ```ngs``` or ```lusplus``` (default is ```ngs```). If ```ngs``` or ```lusplus``` is specified, the same size filter is applied following the stutter filter. Further, the columns and column names in the output file differ based on the data type.
 
-Finally, output files are created for direct use in EuroForMix (EFM) or STRmix. If EFM is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix and STRmix require different formatting depending on the type of sample. 
+Finally, output files are created for direct use in EuroForMix (EFM), MPSproto or STRmix. If EFM or MPSproto is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix/MPSproto and STRmix require different formatting depending on the type of sample. 
 
 ___
 

diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py
@@ -89,8 +89,8 @@ def edit_str_config(config, args):
         data["profile_type"] = "reference"
     if args.datatype:
         data["data_type"] = args.datatype
-    if args.efm:
-        data["output_type"] = "efm"
+    if args.software:
+        data["output_type"] = args.software
     if args.strand:
         data["strand"] = args.strand
     return data
@@ -126,11 +126,15 @@ def subparser(subparsers):
         "--reference", action="store_true", 
         help="Use for creating Reference profiles for STR workflow"
     )
-    p.add_argument("--efm", action="store_true",help="Use to create EuroForMix profiles")
+    p.add_argument(
+        "--software", choices=["efm", "mpsproto", "strmix"], default="strmix",
+        help="Specify the probabilistic genotyping software package of choice. The final output"
+        " files will be in the correct format for direct use. Default is strmix."
+    )
     p.add_argument(
         "--str-type", choices=["ce", "ngs", "lusplus"], default="ngs",
         dest="datatype", help="Data type for STRs. Options are: CE allele ('ce'), sequence "
-        "('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.",
+        "or bracketed sequence form('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.",
     )
     p.add_argument(
         "--noinfo", action="store_true", 

diff --git a/lusSTR/tests/data/MPSproto_test/EFM_test_reference_ngs.csv b/lusSTR/tests/data/MPSproto_test/EFM_test_reference_ngs.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2
+Positive_Control,CSF1PO,[AGAT]12,[AGAT]12
+Positive_Control,D10S1248,[GGAA]13,[GGAA]15
+Positive_Control,D12S391,[AGAT]11 [AGAC]6 AGAT,[AGAT]14 [AGAC]9
+Positive_Control,D13S317,[TATC]12 AATC [ATCT]3 TTCT GTCT GTC,[TATC]9 [AATC]2 [ATCT]3 TTCT GTCT GTC
+Positive_Control,D16S539,[GATA]13,[GATA]9
+Positive_Control,D17S1301,[AGAT]11,[AGAT]12
+Positive_Control,D18S51,[AGAA]16 AAAG AGAG AG,[AGAA]18 AAAG AGAG AG
+Positive_Control,D19S433,AAGG AAAG AAGG TAGG [AAGG]11 AGAG AGGA AGAA AGAG AG,AAGG AAAG AAGG TAGG [AAGG]12 AGAG AGGA AGAA AGAG AG
+Positive_Control,D1S1656,[TAGA]11 TAGG [TGTG]2 TG,[TAGA]13 [TGTG]2 TG
+Positive_Control,D20S482,[AGAT]14,[AGAT]15
+Positive_Control,D21S11,[TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCATA [TCTA]11,[TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCATA [TCTA]11 TA TCTA
+Positive_Control,D22S1045,[ATT]13 ACT [ATT]2,[ATT]13 ACT [ATT]2
+Positive_Control,D2S1338,[TGCC]7 [TTCC]12 GTCC [TTCC]2,[TGCC]7 [TTCC]15 GTCC [TTCC]2
+Positive_Control,D2S441,[TCTA]10,[TCTA]11 TTTA [TCTA]2
+Positive_Control,D3S1358,TCTA [TCTG]3 [TCTA]13,TCTA [TCTG]3 [TCTA]14
+Positive_Control,D4S2408,[ATCT]9,[ATCT]9
+Positive_Control,D5S818,[AGAT]12 AGAG,[AGAT]12 AGAG
+Positive_Control,D6S1043,[AGAT]12,[AGAT]14 ACAT [AGAT]5
+Positive_Control,D7S820,[GATA]11 GACA GATT GATA GTTT,[GATA]8 GACA GATT GATA GTTT
+Positive_Control,D8S1179,TCTA TCTG [TCTA]12,[TCTA]2 TCTG [TCTA]12
+Positive_Control,D9S1122,TAGA TCGA [TAGA]10,[TAGA]12
+Positive_Control,FGA,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2
+Positive_Control,PENTA D,AAAAG [AAAGA]12,AAAAG [AAAGA]13
+Positive_Control,PENTA E,[AAAGA]14,[AAAGA]7
+Positive_Control,TH01,[AATG]6,[AATG]6 ATG [AATG]3
+Positive_Control,TPOX,[AATG]11,[AATG]11
+Positive_Control,VWA,TCTA [TCTG]3 [TCTA]12 TCCA TCTA,TCTA [TCTG]4 [TCTA]14 TCCA TCTA
diff --git a/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_ngs.csv b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_ngs.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2,Allele3,Allele4,Height1,Height2,Height3,Height4
+Sample1,CSF1PO,,,,,,,,
+Sample1,D10S1248,,,,,,,,
+Sample1,D12S391,,,,,,,,
+Sample1,D13S317,,,,,,,,
+Sample1,D16S539,,,,,,,,
+Sample1,D17S1301,,,,,,,,
+Sample1,D18S51,,,,,,,,
+Sample1,D19S433,,,,,,,,
+Sample1,D1S1656,,,,,,,,
+Sample1,D20S482,,,,,,,,
+Sample1,D21S11,,,,,,,,
+Sample1,D22S1045,,,,,,,,
+Sample1,D2S1338,,,,,,,,
+Sample1,D2S441,,,,,,,,
+Sample1,D3S1358,,,,,,,,
+Sample1,D4S2408,[ATCT]10,[ATCT]8,[ATCT]9,,900,1000,1357,
+Sample1,D5S818,,,,,,,,
+Sample1,D6S1043,,,,,,,,
+Sample1,D7S820,,,,,,,,
+Sample1,D8S1179,TCTA TCTG [TCTA]11,[TCTA]2 TCTG [TCTA]10,[TCTA]2 TCTG [TCTA]11,[TCTA]2 TCTG [TCTA]9,95,89,739,26
+Sample1,D9S1122,TAGA TCGA [TAGA]10,TAGA TCGA [TAGA]11,[TAGA]10,[TAGA]11,108,948,87,991
+Sample1,FGA,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,181,1750,262,1436
+Sample1,PENTA D,AAAAG [AAAGA]13,,,,1000,,,
+Sample1,PENTA E,[AAAGA]7,,,,505,,,
+Sample1,TH01,[AATG]6,[AATG]7,,,1632,2197,,
+Sample1,TPOX,,,,,,,,
+Sample1,VWA,,,,,,,,
diff --git a/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv
@@ -0,0 +1,26 @@
+SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10.0,[ATCT]10,900,real_allele,,,,,,
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9.0,[ATCT]9,1357,real_allele,,,,,,
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8.0,[ATCT]8,1000,real_allele,,,,,,
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,real_allele,,,,,,
+Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.129
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.12
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.035
+Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01,
+Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,TAGA TCGA [TAGA]11,948,real_allele,,,,,,
+Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,12.0,TAGA TCGA [TAGA]10,108,-1_stutter,TAGA TCGA [TAGA]11,,948.0,,,0.114
+Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,[TAGA]11,991,real_allele,,,,,,
+Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10.0,[TAGA]10,87,-1_stutter,[TAGA]11,,991.0,,,0.088
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23.0,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,22.0,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,262,-1_stutter,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,,1436.0,,,0.182
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,21.0,[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2,48,BelowAT,,,,,0.013,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20.0,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18.0,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,17.0,[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2,15,BelowAT,,,,,0.004,
+Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15.0,AAAAG [AAAGA]13,50,real_allele,,,,,,
+Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,AAAAG [AAAGA]13,1000,real_allele,,,,,,
+Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7.0,[AAAGA]7,505,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7.0,[AATG]7,2197,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATGAATG,6.0,[AATG]6,1632,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATG,5.0,[AATG]5,66,BelowAT,,,,,0.017,
+Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11.0,[AATG]11,15,BelowAT,,,,,1.0,
diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py
@@ -158,9 +158,14 @@ def test_plus1stutter(
 
 
 @pytest.mark.parametrize(
-    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+    "outputdir, datatype, software",
+    [
+        ("RU_stutter_test/", "ce", "efm"),
+        ("LUSPlus_stutter_test/", "lusplus", "efm"),
+        ("MPSproto_test/", "ngs", "mpsproto"),
+    ],
 )
-def test_EFMoutput_format(outputdir, datatype, tmp_path):
+def test_EFMoutput_format(outputdir, datatype, software, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_stutter.txt")
     exp_out = data_file(f"{outputdir}test_filtering_EFMoutput_{datatype}.csv")
@@ -173,7 +178,8 @@ def test_EFMoutput_format(outputdir, datatype, tmp_path):
         str_path,
         "-o",
         "test_output",
-        "--efm",
+        "--software",
+        software,
         "--str-type",
         datatype,
         "--input",
@@ -246,9 +252,14 @@ def test_flags(tmp_path):
 
 
 @pytest.mark.parametrize(
-    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+    "outputdir, datatype, software",
+    [
+        ("RU_stutter_test/", "ce", "efm"),
+        ("LUSPlus_stutter_test/", "lusplus", "efm"),
+        ("MPSproto_test/", "ngs", "mpsproto"),
+    ],
 )
-def test_efm_reference(outputdir, datatype, tmp_path):
+def test_efm_reference(outputdir, datatype, software, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_references.txt")
     exp_out = data_file(f"{outputdir}EFM_test_reference_{datatype}.csv")
@@ -259,7 +270,8 @@ def test_efm_reference(outputdir, datatype, tmp_path):
         str_path,
         "--input",
         "WD",
-        "--efm",
+        "--software",
+        software,
         "--reference",
         "--str-type",
         datatype,
@@ -392,7 +404,8 @@ def test_lusplus_sequence_info(tmp_path):
         "forward",
         "--str-type",
         "lusplus",
-        "--efm",
+        "--software",
+        "efm",
     ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "LUSPlus.csv"))

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
@@ -19,7 +19,7 @@ separate = config["separate"]
 def get_sample_IDs(input, uas, output, software, separate):
     convert_out = f"{output}.txt"
     format_out = f"{output}.csv"
-    if software == "efm" and separate is False:
+    if (software == "efm" or software == "mpsproto") and separate is False:
         ID_list = os.path.basename(output)
     elif os.path.exists(convert_out):
         ID_list = get_existing_IDs(convert_out, "\t")

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
@@ -118,23 +118,25 @@ def process_strs(dict_loc, datatype, seq_col):
     return final_df, flags_df
 
 
-def EFM_output(profile, outfile, profile_type, data_type, separate=False):
+def EFM_output(profile, outfile, profile_type, data_type, col, separate=False):
     if profile_type == "reference":
         profile = profile[profile.allele_type == "real_allele"]
     else:
         profile = profile[profile.allele_type != "BelowAT"]
-    efm_profile = populate_efm_profile(profile, data_type)
+    efm_profile = populate_efm_profile(profile, data_type, col)
     if separate:
         write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outfile)
     else:
         write_aggregate_efm_profile(efm_profile, profile_type, data_type, outfile)
 
 
-def populate_efm_profile(profile, data_type):
+def populate_efm_profile(profile, data_type, colname):
     if data_type == "ce":
         prof_col = "CE_Allele"
     elif data_type == "lusplus":
         prof_col = "LUS_Plus"
+    elif data_type == "ngs":
+        prof_col = colname
     else:
         message = (
             f"Incorrect data type {data_type} specified for EFM. Please choose either "
@@ -328,25 +330,28 @@ def main(
         raise ValueError(f"unknown profile type '{profile_type}'")
     if data_type not in ("ce", "ngs", "lusplus"):
         raise ValueError(f"unknown data type '{data_type}'")
-    if output_type not in ("efm", "strmix"):
+    if output_type not in ("efm", "strmix", "mpsproto"):
         raise ValueError(f"unknown output type '{output_type}'")
     full_df = pd.read_csv(input, sep="\t")
     if output_dir is None:
         raise ValueError("No output specified using --out.")
     else:
         outpath = output_dir
     seq_col = "UAS_Output_Sequence" if strand == "uas" else "Forward_Strand_Sequence"
+    brack_col = (
+        "UAS_Output_Bracketed_Notation" if strand == "uas" else "Forward_Strand_Bracketed_Form"
+    )
     if nofilters:
         full_df["allele_type"] = "real_allele"
-        if output_type == "efm":
-            EFM_output(full_df, outpath, profile_type, data_type, separate)
+        if output_type == "efm" or output_type == "mpsproto":
+            EFM_output(full_df, outpath, profile_type, data_type, brack_col, separate)
         else:
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col)
-        if output_type == "efm":
-            EFM_output(final_df, outpath, profile_type, data_type, separate)
+        if output_type == "efm" or output_type == "mpsproto":
+            EFM_output(final_df, outpath, profile_type, data_type, brack_col, separate)
         else:
             STRmix_output(final_df, outpath, profile_type, data_type, seq_col)
         if info:

diff --git a/setup.py b/setup.py
@@ -32,7 +32,9 @@
             "lusSTR/tests/data/NGS_stutter_test/*",
             "lusSTR/tests/data/kinsnps/*",
             "lusSTR/tests/data/lusstr_output/*",
-            "lusSTR/tests/data/LUSPlus_stutter_test/*" "lusSTR/workflows/*",
+            "lusSTR/tests/data/LUSPlus_stutter_test/*",
+            "lusSTR/tests/data/MPSproto_test/*",
+            "lusSTR/workflows/*",
             "lusSTR/wrappers/*",
         ]
     },