Skip to content

Commit

Permalink
filtering of untyped alleles and below AT
Browse files Browse the repository at this point in the history
  • Loading branch information
rnmitchell committed Jul 11, 2023
1 parent 9dfe0a9 commit 68c2543
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
6 changes: 3 additions & 3 deletions lusSTR/data/snp_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
uas: True ## True/False; if ran through UAS
samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd
output: "lusstr_output" ## output file/directory name; Example: "test_030923"

## convert settings
kit: "sigprep" ## sigprep/kintelligence

## format settings
types: "i" ## choices are "all", "i" (identity SNPs only), "p" (phenotype only), "a" (ancestry only) or any combination
nofilter: False ## True/False if no filtering is desired; if False, will remove any allele designated as Not Typed

## format settings
## convert settings
strand: "forward" ## forward/uas; indicates which oritentation to report the alleles for the ForenSeq SNPs; uas indicates the orientation as reported by the UAS or the forward strand
references: "" ## list IDs of the samples to be run as references in EFM
separate: false ## True/False; if want to separate samples into individual files for use in EFM
Expand Down
9 changes: 6 additions & 3 deletions lusSTR/wrappers/snps_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def create_output_table(sample_df, orientation, separate, output_type, uas):
allele_col = "Forward_Strand_Allele"
all_samples_df = pd.DataFrame()
for sample in sample_df["SampleID"].unique():
indiv_df = sample_df[sample_df["SampleID"] == sample]
indiv_df = sample_df[
(sample_df["SampleID"] == sample) & (sample_df["Issues"] != "Contains untyped allele")
]
compiled_table = create_sample_df(indiv_df, output_type, allele_col)
if not uas:
compiled_table = check_allele_calls(compiled_table, output_type)
Expand All @@ -47,14 +49,15 @@ def create_sample_df(indiv_df, output_type, all_col):
.unstack(0)
.reset_index()
)
print(compiled_table)
compiled_table.to_csv("test.csv", index=False)
try:
compiled_table.columns = ["Marker", "Allele 1", "Allele 2", "Height 1", "Height 2"]
except ValueError:
print("Too many alleles!")
if output_type == "reference":
print(compiled_table)
for i, row in compiled_table.iterrows():
if compiled_table.loc[i, "Height 2"] == 0:
if pd.isnull(compiled_table.loc[i, "Height 2"]):
compiled_table.loc[i, "Allele 2"] = compiled_table.loc[i, "Allele 1"]
compiled_table = compiled_table[["Marker", "Allele 1", "Allele 2"]]
return compiled_table
Expand Down

0 comments on commit 68c2543

Please sign in to comment.