-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPymaceuticals.py
366 lines (240 loc) · 7.77 KB
/
Pymaceuticals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env python
# coding: utf-8
# ## Conclusions:
#
# - Ramicane had the greatest affect in reducing tumor growth between drugs. Ramicane is one of only two drugs that acheived an overall tumor reduction.
#
# - Mice given Propriva were the least likely to survive the 45 days; these mice had a survival rate of 26%, which is 22% below the median.
#
# - Ramicane maintained the fewest number of metastatic sites across the 45 days, ending with approximately 200% less than Mice treated with Ketapril.
#
# In[1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import sem
# In[2]:
warnings.filterwarnings('ignore')
# In[3]:
drug_data_path = "./data/mouse_drug_data.csv"
trial_data_path = "./data/clinicaltrial_data.csv"
drug_df = pd.read_csv(drug_data_path)
trials_df = pd.read_csv(trial_data_path)
mice_trials_df = pd.merge(trials_df, drug_df, on="Mouse ID")
drugs = [
"Capomulin"
, "Ceftamin"
, "Infubinol"
, "Ketapril"
, "Naftisol"
, "Placebo"
, "Propriva"
, "Ramicane"
, "Stelasyn"
, "Zoniferol"
]
linewidth = 0.3
marker = '^'
# In[4]:
mice_trials_df.head()
# In[5]:
avg_tumor_volume = pd.DataFrame(
{
"Average Tumor Volume (mm3)": mice_trials_df.groupby(["Drug", "Timepoint"]).mean()["Tumor Volume (mm3)"]
}
).reset_index()
# ## Tumor Response to Treatment
# In[6]:
avg_tumor_volume.head()
# In[7]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
mice_trials_by_drug_and_timepoint = mice_trials_df.groupby(["Drug", "Timepoint"])
tumor_volume_samples = [sample for sample in mice_trials_by_drug_and_timepoint["Tumor Volume (mm3)"]]
drug_samples = [sample[0][0] for sample in tumor_volume_samples]
timepoints = [sample[0][1] for sample in tumor_volume_samples]
means = [sample[1].mean() for sample in tumor_volume_samples]
sems = [sample[1].sem() for sample in tumor_volume_samples]
# Convert to DataFrame
standard_error = pd.DataFrame(
{
"Drug": drug_samples
, "Timepoint": timepoints
, "Tumor Volume (Standard Error)": sems
}
)
# In[8]:
standard_error.head()
# In[9]:
# Minor Data Munging to Re-Format the Data Frames
avg_tumor_volume_pivot = pd.pivot_table(
avg_tumor_volume
, values="Average Tumor Volume (mm3)"
, index="Timepoint"
, columns="Drug"
)
# In[10]:
# Preview that Reformatting worked
avg_tumor_volume_pivot
# In[11]:
try:
avg_tumor_volume.insert(3, "Tumor Volume (Standard Error)", sems)
except:
print("The 'avg_tumor_volume' DataFrame already has a 'Standard Error' column.")
# Generate the Plot (with Error Bars)
plt.figure(figsize=(10,8))
for drug in drugs:
avg_tumor_volume_while_on_drug = avg_tumor_volume[avg_tumor_volume.Drug == drug]
plt.errorbar(
avg_tumor_volume_while_on_drug["Timepoint"]
, avg_tumor_volume_while_on_drug["Average Tumor Volume (mm3)"]
, yerr=avg_tumor_volume_while_on_drug["Tumor Volume (Standard Error)"]
, linewidth=linewidth
, marker=marker
)
plt.grid(True)
plt.xlabel("Time (Days)")
plt.ylabel("Average Tumor Volume (mm3)")
plt.legend(avg_tumor_volume.Drug.unique(), loc="upper left")
# Save the Figure
plt.savefig("./presentation/1.png")
# In[12]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint
met_samples = [sample for sample in mice_trials_by_drug_and_timepoint["Metastatic Sites"]]
drug_samples = [sample[0][0] for sample in met_samples]
timepoints = [sample[0][1] for sample in met_samples]
means = [sample[1].mean() for sample in met_samples]
sems = [sample[1].sem() for sample in met_samples]
# Convert to DataFrame
avg_met = pd.DataFrame(
{
"Drug": drug_samples
, "Timepoint": timepoints
, "Metastatic Sites (Average)": means
}
)
# In[13]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint
standard_error_met = pd.DataFrame(
{
"Drug": drug_samples
, "Timepoint": timepoints
, "Metastatic Sites (Standard Error)": sems
}
)
# ## Metastatic Response to Treatment
# In[14]:
avg_met.head()
# In[15]:
standard_error_met.head()
# In[16]:
# Minor Data Munging to Re-Format the Data Frames
standard_error_met_pivot = pd.pivot_table(
standard_error_met
, values="Metastatic Sites (Standard Error)"
, index="Timepoint"
, columns="Drug"
)
# In[17]:
# Preview that Reformatting worked
standard_error_met_pivot
# In[18]:
try:
avg_met.insert(3, "Metastatic Sites (Standard Error)", sems)
except:
print("The 'avg_met' DataFrame already has a 'Standard Error' column.")
# Generate the Plot (with Error Bars)
plt.figure(figsize=(10,8))
for drug in drugs:
avg_met_while_on_drug = avg_met[avg_met.Drug == drug]
plt.errorbar(
avg_met_while_on_drug["Timepoint"]
, avg_met_while_on_drug["Metastatic Sites (Average)"]
, yerr=avg_met_while_on_drug["Metastatic Sites (Standard Error)"]
, linewidth=linewidth
, marker=marker
)
plt.grid(True)
plt.xlabel("Treatment Duration (Days)")
plt.ylabel("Metastatic Sites")
plt.legend(avg_met.Drug.unique(), loc="upper left")
# Save the Figure
plt.savefig("./presentation/2.png")
# In[19]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice = mice_trials_by_drug_and_timepoint .count() .reset_index()["Mouse ID"]
# Convert to DataFrame
mice_survival_over_time_df = pd.DataFrame(
{
"Drug": drug_samples
, "Timepoint": timepoints
, "Mice": mice
}
)
# In[20]:
# Minor Data Munging to Re-Format the Data Frames
mice_survival_over_time_df_pivot = pd.pivot_table(
mice_survival_over_time_df
, values="Mice"
, index="Timepoint"
, columns="Drug"
)
# ## Survival Rates
# In[21]:
# Preview DataFrame
mice_survival_over_time_df.head()
# In[22]:
# Preview pivoted DataFrame
mice_survival_over_time_df_pivot
# In[23]:
# Generate the Plot (Accounting for percentages)
plt.figure(figsize=(10,8))
for drug in drugs:
starting_number_of_mice = mice_survival_over_time_df_pivot[drug][0]
plt.plot(
100 * mice_survival_over_time_df_pivot[drug] / starting_number_of_mice
, linewidth=linewidth
, marker=marker)
plt.grid(True)
plt.xlabel("Time (Days)")
plt.ylabel("Survival Rate (%)")
plt.legend(avg_met.Drug.unique(), loc="lower left")
# Save the Figure
plt.savefig("./presentation/3.png")
# Show the Figure
plt.show()
# In[24]:
percentage_change_list = []
# Calculate the percent changes for each drug and display data
print("Drug")
for drug in drugs:
averages_list = list(avg_tumor_volume[(avg_tumor_volume.Drug == drug)]["Average Tumor Volume (mm3)"])
percentage_change = 100 * (averages_list[-1] - averages_list[0]) / averages_list[0]
print(drug, " ", percentage_change)
percentage_change_list.append(percentage_change)
# Store all relevant percent changes into a Tuple
tumor_volume_change_tup = tuple(percentage_change_list)
# ## Summary Bar Graph
# In[25]:
plt.figure(figsize=(12,8))
plt.bar(
avg_met.Drug.unique()
, tumor_volume_change_tup
, color=["g","r", "r", "r", "r", "r", "r", "g", "r", "r"]
)
plt.grid(True, color="black", linestyle="dashed")
plt.title("Tumor Change Over 45 Day Treatment")
plt.ylabel("% Tumor Volume Change")
def plot_tumor_volume_change_text(tup):
for i in range(len(tup)):
if tup[i] > 0:
x = i
label = "% " + str(round(tup[i]))
plt.text(x, 3, label, ha='center', va='top', color="white", fontsize="xx-large")
else:
x = i
label = "% " + str(round(tup[i]))
plt.text(x, -3, label, ha='center', va='top', color="white", fontsize="xx-large")
plot_tumor_volume_change_text(tumor_volume_change_tup)
# Save the Figure
plt.savefig("./presentation/4.png")