-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
335 lines (293 loc) · 11.9 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
import pandas as pd
import numpy as np
import emoji
import tensorflow as tf
import pickle
import keras
import requests
from tensorflow.keras import datasets, layers, models,Input,Model
from shiny import App, render, ui,reactive, ui,run_app
import sklearn
import json
from tensorflow.keras.layers import TextVectorization
import lime
import lime.lime_text
import matplotlib.colors as mcolors
from pathlib import Path
import matplotlib.pyplot as plt
from icons import gear_fill,info_circle_fill
# vectorizer
# Load the vocabulary
with open('vocab.json', 'r') as f:
vocab = json.load(f)
# Rebuild the TextVectorization layer
vectorize_layer = TextVectorization(max_tokens=50000, pad_to_max_tokens=True, output_mode='int')
vectorize_layer.set_vocabulary(vocab)
# K means
with open("kmea.pkl","rb") as file:
kmea=pickle.load(file)
model = keras.models.load_model('model.h5')
#feature miner
feature_miner=Model(
inputs=model.inputs,
outputs=model.get_layer(name="Bidirectional2").output,name="feature_miner"
)
# replacement
strange=pd.read_html(requests.get('https://lhncbc.nlm.nih.gov/LSG/Projects/lvg/current/docs/designDoc/UDF/unicode/DefaultTables/symbolTable.html').content)[-1] #getting the webpage with the table
new_row = pd.DataFrame({
'Unicode': ["U+302D"],
'Mapped String': ["..."],
'Char': ["…"],
'Unicode Name': ["THREE DOTS"]
})
strange = pd.concat([strange, new_row], ignore_index=True)
s_n=len(strange)
strange=strange.drop(list(strange["Mapped String"]).index("'"))
# text cleaning
def my_clean(text):
text=" ".join(text.split())
text=text.lower()
return text
# emoji removal
def remove_emoji(string):
return emoji.get_emoji_regexp().sub(u'', string)
# load naive bayes
with open('naivebayes.pkl', 'rb') as f:
nv = pickle.load(f)
def decode_text(indices):
return " ".join([vectorize_layer.get_vocabulary()[index] for index in indices if index != 0])
def predict_prob(texts):
sequences = vectorize_layer(tf.constant(texts))
predictions = model.predict(sequences)
return predictions
base_orange = (1, 0.65, 0) # Lighter orange for positive contributions
base_blue = (0.27, 0.51, 0.71) # Lighter blue for negative contributions
base_white = (1, 1, 1) # White for zero contribution
def contribution_to_background_color(word, contribution, max_contribution=1):
norm_contribution = contribution / max_contribution
norm_contribution = max(min(norm_contribution, 1), -1)
if norm_contribution > 0:
darkened_color = [base_white[i] * (1 - norm_contribution) + base_orange[i] * norm_contribution for i in range(3)]
elif norm_contribution < 0:
darkened_color = [base_white[i] * (1 + norm_contribution) + base_blue[i] * -norm_contribution for i in range(3)]
else:
darkened_color = base_white
return f'<span style="background-color:{mcolors.to_hex(darkened_color)}; padding:2px;">{word}</span>'
AUTOTUNE = tf.data.AUTOTUNE
# Define the UI
app_ui = ui.page_fluid(
ui.tags.div("News Authentication app",class_='title-bar'),
ui.layout_sidebar(
ui.sidebar(
ui.input_numeric(id="num_features",label="Number of feature",value=100,min=0,max=None),
ui.input_numeric(id='num_samples',label="number of samples",value =5000,min=0,max=None),
ui.input_action_button(id="Go", label="Check", width="200px",align="center")
),
ui.card(
ui.card_header(
"Enter Article",
ui.popover(
ui.span(
info_circle_fill,
style="position:absolute; top: 5px; right: 7px;",
),
ui.div(
ui.tags.p("This application predicts whether a news article is fake or real based on user input."),
ui.tags.p("You can enter the article text and adjust the parameters to see how the predictions change."),
ui.tags.p("Use the 'Check' button to get predictions."),
ui.tags.p("The models used here were trained with articles longer than 200 characters, so this app works better for such cases."),
class_="custom-popover"),
placement="right",
id="card_popover",
),
),
ui.input_text_area(id="article", label="", placeholder="🚀 NASA's #PsycheMission launched on October 13, 2023, to explore asteroid 16 Psyche, believed to be the exposed core of a planetesimal! 🪐 The mission aims to unlock secrets of planetary formation. Psyche is mostly metal and could offer clues about Earth’s core. Stay tuned as the spacecraft journeys over 2 billion miles to reach it by 2029! #SpaceExploration #NASA #Asteroid", height='200px', width='1200px')
),
ui.layout_columns(
ui.card(
ui.card_header("Prediction Results"),
ui.card_body(
ui.layout_column_wrap(
ui.card(
ui.card_body(
ui.output_ui("txt_length"),
#ui.output_ui("txt1"),
ui.output_ui("txt2"),
ui.output_ui("txt3"),
ui.output_ui("txt4")),
fill=True
),
ui.card(
ui.card_body(
ui.output_plot("bar_fake")
),
fill=True
),
heights_equal="row",
width=1),
fill=True,
)),
ui.card(
ui.card_header("LIME Explanation"),
ui.card_body(
ui.output_ui("lime_output")
),
fill=True
),
col_widths=[4, 8],
fill=False
)
),
ui.tags.div(
ui.tags.p("© 2024 Fesnic Research Solutions. All rights reserved."),
ui.tags.div(
ui.tags.a(
ui.tags.img(src="https://raw.githubusercontent.com/statisticsguru1/Utility-functions/refs/heads/main/E-learn/images/facebook.svg", alt="Facebook"),
href="https://www.facebook.com/FesnicResearchSolutions/", target="_blank"
),
ui.tags.a(
ui.tags.img(src= "https://raw.githubusercontent.com/statisticsguru1/Utility-functions/refs/heads/main/E-learn/images/instagram.svg", alt="Instagram"),
href="https://www.instagram.com/fesnicresearchsolutions/?hl=en", target="_blank"
),
ui.tags.a(
ui.tags.img(src="https://raw.githubusercontent.com/statisticsguru1/Utility-functions/refs/heads/main/E-learn/images/linkedin.svg", alt="LinkedIn"),
href="https://www.linkedin.com/in/festus-nzuma-26580163", target="_blank"
),
ui.tags.a(
ui.tags.img(src="https://raw.githubusercontent.com/statisticsguru1/Utility-functions/refs/heads/main/E-learn/images/youtube.svg", alt="YouTube"),
href="https://www.youtube.com/@FesnicResearchSolutions", target="_blank"
),
class_="social-media"
),
class_='custom-foot'
),
ui.include_css(Path(__file__).parent / "styles.css")
)
# Define the server logic
def server(input, output, session):
@output
@render.ui
@reactive.event(input.Go)
def txt_title():
return ui.tags.h4("Results")
@output
@render.ui
@reactive.event(input.Go)
def txt_length():
xx = str(input.article())
xx=len(xx.split())
return ui.HTML(f"<strong>Article length: {xx} words</strong>")
@reactive.Calc
@reactive.event(input.Go)
def text_preps():
text = input.article()
text=remove_emoji(text)
for i in [x for x in range(s_n) if x != 2]:
text=text.replace(strange["Char"][i],str(strange["Mapped String"][i]))
text=my_clean(text)
textdata=tf.data.Dataset.from_tensor_slices([text])
textdata= textdata.batch(batch_size=64)
textdata=textdata.map(vectorize_layer).cache().prefetch(buffer_size=AUTOTUNE)
return textdata
@reactive.Calc
def Ann_pred():
textdata = text_preps()
modelprediction=model.predict(textdata)[0]
return modelprediction
@reactive.Calc
def Kmean_pred():
textdata = text_preps()
kmeans_pred=1-kmea.predict(feature_miner.predict(textdata))
return kmeans_pred
@reactive.Calc
def naive_pred():
kmeans_pred = Kmean_pred()
modelprediction=Ann_pred()
modelprediction1=round(modelprediction[0])
final=np.stack((kmeans_pred,np.array([modelprediction1],dtype="int32")), axis=1)
naive=nv.predict(final)
return naive
@output
@render.ui
def txt1():
modelprediction=Ann_pred()
xs=round(100*modelprediction[0],4)
return ui.HTML(f"<strong>Prob it is fake fake(based on ANN) :{round(xs,2)}%</strong>")
# Create the progress bar for "Fake"
@output
@render.plot
def bar_fake():
plt.figure(figsize=(3, 3))
categories = ['Fake', 'Real']
values = [round(Ann_pred()[0], 4), round(Ann_pred()[1], 4)]
colors = ['#145da0', '#FF8C00']
plt.bar(categories, values, color=colors)
for index, value in enumerate(values):
plt.text(index, value, str(value), ha='center', va='bottom', fontsize=12)
plt.title("Probabilities")
plt.tight_layout()
plt.show()
@output
@render.ui
def txt2():
modelprediction=Ann_pred()
modelprediction1=round(modelprediction[0])
if modelprediction1==0:
x="Reliable"
else:
x="Fake"
xx=x
return ui.HTML(f"<strong>ANN prediction: {xx}</strong>")
@output
@render.ui
def txt3():
kmeans_pred = Kmean_pred()
if kmeans_pred==0:
x="Reliable"
else:
x="Fake"
xx=x
return ui.HTML(f"<strong>Kmeans prediction: {xx}</strong>")
@output
@render.ui
def txt4():
kmeans_pred = Kmean_pred()
modelprediction=Ann_pred()
modelprediction1=round(modelprediction[0])
final=np.stack((kmeans_pred,np.array([modelprediction1],dtype="int32")), axis=1)
naive_pred=nv.predict(final)
if naive_pred==0:
x="Reliable"
else:
x="Fake"
xx=x
return ui.HTML(f"<strong>weighted prediction: {xx}</strong>")
# LIME Explanation logic
@reactive.Calc
@reactive.event(input.Go)
def lime_explanation():
# Prepare the input
textdata = text_preps()
x_sample = next(iter(textdata))
# Decode the sample text
decoded_text = decode_text(x_sample[0])
# LIME explanation
explainer = lime.lime_text.LimeTextExplainer(class_names=['Fake', 'Real'])
explanation = explainer.explain_instance(decoded_text, predict_prob, num_features=input.num_features(),num_samples=input.num_samples())
text_explanation=explanation.as_list()
max_contribution = max(abs(c) for _, c in text_explanation)
colored_text = []
for word in input.article().split():
for explained_word, contribution in text_explanation:
if word in explained_word:
colored_text.append(contribution_to_background_color(word, contribution, max_contribution))
break
else:
colored_text.append(f'<span style="white-space:nowrap">{word}</span>')
colored_text_html = " ".join(colored_text)
return colored_text_html
@output
@render.ui
def lime_output():
return ui.HTML(f'<div style="display: inline-block; white-space: normal;">{lime_explanation()}</div>')
app = App(app_ui, server)