-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunoxlipidome_estimation.py
141 lines (117 loc) · 5.44 KB
/
unoxlipidome_estimation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: utf-8 -*-
#
# Copyright (C) 2018-2019 SysMedOs_team @ AG Bioanalytik, University of Leipzig:
# SysMedOs_team: Zhixu Ni, Maria Fedorova
# [The GNU General Public License version 2] (https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html)
#
# For more info please contact:
# Developer Zhixu Ni: zhixu.ni@uni-leipzig.de
from itertools import product
import pandas as pd
from scipy.special import comb
class TheoLipidome(object):
"""
Calculate total number of unoxLipid from given list of fatty acids.
It can be modified to predict number of all site specific species by using site_specific=True.
This will use a modified permutation algorithm to make all modifications repeatable at all sites
while remove mirrored products from TG and cardiolipin.
LysoPL, MG, and DG also treated with special care for positions
"""
def __init__(self, fa_lst_path, x_dct):
"""
Load default settings, see the __main__ function to modify the default values.
:param fa_lst_path: file path to default FA_list.xlsx file
:type fa_lst_path: str
:param x_dct: default values for the number of lipid classes that have n FA residues
:type x_dct: dict
"""
fa_df = pd.read_excel(fa_lst_path, header=0)
print('Load FA list:')
print(fa_df)
self.f = fa_df.shape[0] # total number of Free Fatty acids F
db_lst = fa_df['DB'].values.tolist()
n_db_lst = list(set(db_lst))
self.fa_dct = {}
self.n_lst = []
for n_db in n_db_lst:
if n_db > 0:
self.fa_dct[n_db] = db_lst.count(n_db)
self.n_lst.append(n_db)
self.x_dct = x_dct
print('All settings loaded...')
def get_estimation(self, site_specific=False):
"""
Calculate Number of sum product for Lipids from FA list
The number calculated do NOT contain any unmodified lipids.
:param site_specific: set to False to use combinations only. set to True to generate all site specific species
:type site_specific: bool
:return: Number Lipids with all FA chain got oxidation T[all]_ox
:rtype: int
"""
tot_lipid = 0
if site_specific is False:
tot_lipid += len(self.x_dct['x1']) * self.f
tot_lipid += len(self.x_dct['x2']) * comb(self.f + 1, 2)
tot_lipid += len(self.x_dct['x3']) * comb(self.f + 2, 3)
tot_lipid += len(self.x_dct['x4']) * comb(self.f + 3, 4)
else:
tot_lipid += len(self.x_dct['x2']) * (self.f ** 2 - self.f ** 2)
n_x1 = len(self.x_dct['x1'])
for x1 in self.x_dct['x1']:
if x1 in ['LPA', 'LPC', 'LPE', 'LPG', 'LPI', 'LPS']:
n_x1 += 1
if x1 in ['Monoacylglycerol', 'MG']:
n_x1 += 2
tot_lipid += n_x1 * self.f
tot_lipid += len(self.x_dct['x2']) * (self.f ** 2)
# DG with -OH at sn1/sn3 is like PLs and calculated above
if 'Diacylglycerol' in self.x_dct['x2'] or 'DG' in self.x_dct['x2']:
tot_lipid += comb(self.f, 2) + self.f
else:
pass
if 'Triacylglycerol' in self.x_dct['x3'] or 'TG' in self.x_dct['x3']:
tot_lipid += self.get_product_no_mirror(3)
if len(self.x_dct['x3']) - 1 > 0:
tot_lipid += (len(self.x_dct['x3']) - 1) * (self.f ** 3) # Other Lipid with 3 FA
else:
pass
else:
tot_lipid += len(self.x_dct['x3']) * (self.f ** 3 - self.f ** 3) # Other Lipid with 3 FA
if 'Cardiolipin' in self.x_dct['x3'] or 'CL' in self.x_dct['x4']:
tot_lipid += self.get_product_no_mirror(4)
if len(self.x_dct['x4']) - 1 > 0:
tot_lipid += (len(self.x_dct['x4']) - 1) * (self.f ** 4) # Other Lipid with 4 FA
else:
pass
else:
tot_lipid += len(self.x_dct['x4']) * (self.f ** 4) # Other Lipid with 4 FA
return int(tot_lipid)
def get_product_no_mirror(self, n_sn):
fa_lst = range(self.f)
all_lst = list(product(fa_lst, repeat=n_sn))
pre_out_lst = []
for i in all_lst:
if tuple(reversed(i)) in pre_out_lst:
pass
else:
pre_out_lst.append(i)
return len(pre_out_lst)
if __name__ == '__main__':
# load default FA list
usr_fa_lst = r'data/FA_list.xlsx'
usr_lipid_classes = {
# list of lipid classes with 1 FA
'x1': ['FA', 'CholesterolEster', 'LPA', 'LPC', 'LPE', 'LPG', 'LPI', 'LPS',
'Monoacylglycerol', 'Ceramide', 'Sphingolipid'],
'x2': ['PA', 'PC', 'PE', 'PG', 'PI', 'PS', 'Diacylglycerol'], # list of lipid classes with 2 FA
'x3': ['Triacylglycerol'], # list of lipid classes with 3 FA
'x4': ['Cardiolipin'], # list of lipid classes with 4 FA
}
unoxlipidome = TheoLipidome(usr_fa_lst, usr_lipid_classes)
# position non-specific
print('\nResults for predictions with FA residues combinations only:')
print('Total number of unoxLipidome: ', unoxlipidome.get_estimation())
# position modification type and site specific predictions
print('\nResults for predictions with sn site specific:')
print('Total number of unoxLipidome: ', unoxlipidome.get_estimation(site_specific=True))
print('\nFinished!')