-
Notifications
You must be signed in to change notification settings - Fork 1
/
p1.py
262 lines (181 loc) · 7.79 KB
/
p1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import csv
import numpy as np
def entropy(probabilities):
"""
Computes H(X)
X is given as a numpy array, all elements of the array
are assumed to represent the distribution (the shape
of the array is not meaningful)
"""
# Avoid situations where log can't be computed
logs = np.log2(probabilities[probabilities != 0])
return - np.sum(probabilities * logs)
def joint_entropy(X_and_Y):
"""
Compute H(X ∩ Y)
X_and_Y : joint probabilitiesof X and Y, a 2D numpy array such that
X_and_Y[i,j] == P[X_i ∩ Y_j]
We don't assume X is conditionally independent of Y.
"""
return entropy(X_and_Y)
def conditional_entropy(X_given_Y, Y):
"""
Computes H(X|Y)
X_given_Y : conditional probability P(X|Y)
X_i | Y_i : row(i), column(j)
Y: marginal probability P(Y)
Slide 3, course 2 : H(X |Y) = − Σ Σ P(Xi ∩ Yj) log P(Xi | Yj)
Recall that P(X ∩ Y) = P(X | Y) P(Y) => We can use the marginal
=> P(X_i ∩ Y_j) = P(X_i|Y_j) * P(Y_j)
"""
# Compare number of columns in X_given_Y (that's number of Y's)
# to length of the Y vector.
assert X_given_Y.shape[1] == Y.shape[1], f"Shape don't match : {X_given_Y.shape[1]} != {Y.shape[1]}"
# z helps to compute combinations of i and j indices.
z = np.tile(Y, (X_given_Y.shape[0], 1))
return - np.sum(X_given_Y * z * np.log2(X_given_Y))
def mutual_information(X, Y, X_and_Y):
"""
I(X;Y) = H(X) + H(Y) - H(X,Y) (See wikipedia)
"""
print(X.shape)
print(Y.shape)
print(X_and_Y.shape)
# Warning! Vector's shape are only one integer long
assert X.shape[0] == X_and_Y.shape[0]
assert Y.shape[0] == X_and_Y.shape[1]
return entropy(X) + entropy(Y) - joint_entropy(X_and_Y)
def joint_entropy2(P) #X_and_Y_and_Z):
"""
FIXME
I'm afraid this suggests P(X,Y,Z) = P(X)P(Y)P(Z) which is true
only if X ⊥ Y ⊥ Z. No ?
P(X,Y,Z) = (joint probabilities)
X Y Z P(X and Y and Z)
-----------------------------------------
true false true 0.1
false false true 0.05
...
DIS X Y P(DIS,X,Y)
---------------------------------------------
steatose age>40 sdfsd 0.01
steatose age>40 dddd 0.01
healthy age>40 sdf 0.02
PBC age<40 sdfdsf 0.1
steatos .... ... ....
PBC
PBC
...
DIS X Y P(DIS,X,Y)
---------------------------------------------
steatose age>40
sdfsd 0.01
dddd 0.01
DIS X Y P(DIS,X,Y)
---------------------------------------------
steatose age>40 ----- 0.01+0.01=0.02
P(Z) = sum_X sum_Y P(...)
H(a and b) = - sum P(A and B) log P(A and B)
"""
return entropy(X_and_Y_and_Z)
def cond_mutual_information(X_and_Z, Y_and_Z, X, Y, Z):
"""
I(X,Y|Z) = H(X,Z)+ H(Y,Z) - H(Z) - H(X,Y,Z)
H(X,Y,Z) = H(X,Y|Z)+H(Z) => Stef: H(X,Y,Z) = H(X | Y,Z) + H(Y | Z) + H(Z) (chain rule)
= H(Y,X|Z) - H(Y|Z) + H(Y | Z) + H(Z) (see rule below)
= H(Y,X|Z) + H(Z) ( - H(Y|Z) + H(Y | Z) == 0 !)
H(X,Y|Z) = H(X|Z) + H(Y|X,Z) => from the course (Slide 9, point 2)
=> H(Y|X,Z) = H(X,Y|Z) - H(X|Z)
... renaming X <-> Y
=> H(X|Y,Z) = H(Y,X|Z) - H(Y|Z)
which we prove like this :
H(X,Y|Z) = - Σ_X Σ_Y Σ_Z P(X,Y ∩ Z) log(P(X,Y|Z))
= - Σ_X Σ_Y Σ_Z P(X,Y,Z) log(P(Y|Z,X) P(X|Z)) # Because P(X,Y|Z) = P(X,Y,Z)/P(Z)
= (P(X,Y,Z)/P(X,Z)))*(P(X,Z)/P(Z)
= P(Y|X,Z) * P(X|Z)
= - Σ_X Σ_Y Σ_Z P(X,Y,Z) log P(Y|Z,X) # because log of product
- Σ_X Σ_Y Σ_Z P(X,Y,Z) log P(X|Z)
= H(Y|Z,X) # because definition of H
- Σ_Y P(Y|Z,X) Σ_X Σ_Z P(Z,X) log P(X|Z)
= H(Y|Z,X)
+ 1 * H(Z,X) # Because Y probability distribution => Σ_i Y_i = 1
= H(Y|Z,X) + H(X,Z)
X_and_Z = found by marginalizing Y out of X_Y_Z
BUT !!! margnialization can only be done if we know the values of the random varaibles X,Y and Z
=> we can't just have a probability table, we must have a distribution table P(X=x_i AND Y=y_i AND Z=Z_i) == 0.19... for all x_i, y_i, z_i.
=> only X_Y_Z is needed, other P(...) can be deduced by appropriate margnilaization.
"""
return joint_entropy(X_and_Z) + joint_entropy(Y_and_Z) - entropy(Z) - joint_entropy2(X_Y_Z)
def cond_joint_entropy(X_Y_Z):
"""
Computes H(X,Y|Z)
H(X,Y|Z) this term is obtained here: H(X,Y,Z) = H(X,Y|Z)+H(Z)
H(X,Y|Z) = H(X,Y,Z) - H(Z) (see above)
"""
return joint_entropy2(X_Y_Z) - entropy(Z + marginalzation of Y and X)
def cond_joint_entropy2(X_Y_Z, Z):
"""
Computes H(X,Y|Z) base on the distribution table for P(X=x_i ∩ Y=y_i ∩ Z=z_i) (X_Y_Z)
and P(Z=z_i)
We have :
H(X,Y,Z) = H(X,Y|Z)+H(Z) => Stef: H(X,Y,Z) = H(X | Y,Z) + H(Y | Z) + H(Z) (chain rule)
= H(Y,X|Z) - H(Y|Z) + H(Y | Z) + H(Z) (see rule below)
= H(Y,X|Z) + H(Z)
So :
H(X,Y|Z) = H(X,Y,Z) - H(Z)
note that P(Z) can be obtained from P(X,Y,Z) by marginalilzation, but that would
require another structure for X_Y_Z.
"""
return entropy(X_Y_Z) - entropy(Z)
if __name__ == "__main__":
p = np.array([0.5, 0.5])
assert entropy(p) == 1, "2 outcomes needs 1 bit"
p = np.array([0, 1])
assert entropy(p) == 0, f"Only one possible outcome needs 0 bit not {entropy(p)}"
p = np.array([0.25, 0.25, 0.25, 0.25])
assert entropy(p) == 2, "4 outcomes needs 2 bits"
X_and_Y = np.array([[0.25, 0.25], [0.25, 0.25]])
h = joint_entropy(X_and_Y.flatten())
assert h == 2, f"{h}"
X_given_Y = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]])
Y = np.array([[0.1, 0.2, 0.7]])
print(conditional_entropy(X_given_Y, Y))
# FIXME fails
# X = np.array([0.5, 0.5])
# Y = np.array([0.5, 0.5])
# X_and_Y = np.array([[0.25, 0.25], [0.25, 0.25]])
# print(mutual_information(X, Y, X_and_Y))
PARAMS = {
"Age" : ["<40", ">40"],
"Sex" : ["man", "woman"],
"Obesity" : ["thin","regular","overweight"],
"Alcoholic antecedents" : ["yes", "no"],
"Iron" : ["low", "normal", "high", "very high"],
"Disease" : ["healthy", "PBC", "steatosis"],
"Fatigue" : ["yes", "no"],
"Triglycerides" : ["abnormal", "normal"],
"Alanine transaminase" : ["abnormal", "normal"],
"Aspartate transaminase" : ["abnormal", "normal"],
"Gamma-glutamyl transpeptidase" : ["abnormal", "normal"],
"CholesterolCHL" : ["low", "normal", "high"],
"Anti-mitochondrial antibody" : ["yes", "no"],
"Muscular ache" : ["yes", "no"],
"Bilirubin" : ["abnormal", "normal"],
"Itching" : ["yes", "no"],
"Jaundice" : ["yes", "no"] }
with open('P1_medicalDB.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
print(row)
for k,v in PARAMS.items():
ent = entropy( np.ones((1,len(v))) / len(v))
print(f"{k} : {ent}")
v = PARAMS["Disease"]
X = np.ones((1,len(v))) / len(v)
for k,v in PARAMS.items():
if k != "Disease":
# X = disease; Y=other variable
Y = np.ones((1,len(v))) / len(v)
conditional_entropy(X_given_Y, Y)
ent = entropy( np.ones((1,len(v))) / len(v))
print(f"{k} : {ent}")