-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwheatAHP.py
45 lines (39 loc) · 2.85 KB
/
wheatAHP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import ahpy
import numpy as np
import pandas as pd
from pandas import read_csv
import argparse
from Source.epitope import Epitope
from Source.AHPmethod import criteria1, criteria2epitope, criteria2gene, criteria3epitope, criteria3gene
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", help = "/path/to/input file with usearch frequencies results.")
parser.add_argument("-o", "--path_output", help = "/path/to/ output files directory.", default="./")
parser.add_argument("-e", "--epitopes", help = "/path/to/epitope fasta file.")
parser.add_argument("-d", "--remove_epitope_duplication", help = "Remove epitope with duplicated sequences. Optios = yes | no.", default="yes")
parser.add_argument("-ol", "--oligopeptides", help = "/path/to/oligopeptides tsv file.")
args = parser.parse_args()
def main():
rawdata = read_csv(args.input, sep = ";")
Epsilon = 1 # Value to add to frequencies in NGS data to perform Log2(x + Epsilon) for avoid errors on cases when x = 0.
ahpy_dict = {} # Dict with results for aphy method changing the intensity value between epitopes and genes in criteria1.
for intensity in range(1, 9 + 1):
# Criteria 1:
epitopes_genes_comparisons = {('Epitopes', 'Genes'): intensity}
ahpy_criteria1 = criteria1(epitopes_genes_comparisons)
#Criteria 2:
epitopes = Epitope(args.epitopes, args.remove_epitope_duplication) # Epitope sequences library.
oligoEpitopes = epitopes.findEpoligos(args.oligopeptides) # Find epitopes in 20-amino acid oligopeptides and keep the max score for each one.
ahpy_criteria2_epitopes, epitopes_with_score = criteria2epitope(oligoEpitopes, Epsilon) # Intensity values for each epitope comparison based on score from oligopeptides.
ahpy_criteria2_genes, list_genes = criteria2gene(rawdata, Epsilon) # Intensity values for each amplicon comparison based on number of epitopes in them.
# Criteria 3:
ahpy_criteria3_epitopes = criteria3epitope(rawdata, Epsilon, epitopes_with_score) # Intensity values between lines for each epitope based on the epitope frequency in each one.
ahpy_criteria2_epitopes.add_children(ahpy_criteria3_epitopes)
ahpy_criteria3_genes = criteria3gene(rawdata, Epsilon, list_genes) # Intensity values between lines for each amplicon type based on the amplicon frequency in each one.
ahpy_criteria2_genes.add_children(ahpy_criteria3_genes)
ahpy_criteria1.add_children([ahpy_criteria2_epitopes, ahpy_criteria2_genes])
ahpy_dict[intensity] = ahpy_criteria1.target_weights # Scores for each line.
scores = pd.DataFrame(data=ahpy_dict)
scoresdata = pd.concat([scores, scores.mean(axis=1).rename('mean'), scores.std(axis=1).rename('sd')], axis=1)
scoresdata.to_csv(args.path_output + 'table_scores.txt', header=True, sep='\t', mode='a')
if __name__ == "__main__":
main()