-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnlp_semantic_analysis.py
191 lines (150 loc) · 5.95 KB
/
nlp_semantic_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import pandas as pd
import openai
import spacy
# Set up your OpenAI API key
# Load the spaCy language model for semantic analysis
nlp = spacy.load("en_core_web_sm")
# Load the Excel file
excel_file = "Data_analyst_question.xlsx" # Replace with your actual file path
df = pd.read_excel(excel_file)
# Ask the user to choose a category from column A (Category)
print("Available Categories:")
categories = df["Category"].unique()
for idx, category in enumerate(categories):
print(f"{idx + 1}. {category}")
category_choice = int(input("Choose a category (enter the corresponding number): ")) - 1
selected_category = categories[category_choice]
# Filter questions and correct answers for the selected category
category_data = df[df["Category"] == selected_category]
category_data = category_data.sample(frac=1).reset_index(drop=True) # Shuffle within the category
category_data
# Initialize a list to store user answers
user_answers = []
questions = []
correct_answers = []
# Generate questions and collect user answers
num_questions = min(2, len(category_data)) # Ensure you don't exceed the available questions
feedback = ""
for i in range(num_questions):
question = category_data.loc[i, "Question"]
questions.append(question)
correct_answer = category_data.loc[i, "Correct Answer"]
correct_answers.append(correct_answer)
user_answer = input(f"Q: {question}\nA: ")
user_answers.append(user_answer)
# Analyze user answers using semantic analysis and calculate similarity scores
#user_ans_doc = nlp(user_answer)
#correct_ans_doc = nlp(correct_answer)
#similarity_score = user_ans_doc.similarity(correct_ans_doc)
scores = []
for u, q, c in zip(user_answers, questions, correct_answers):
user_ans_doc = nlp(u)
correct_ans_doc = nlp(c)
similarity_score = user_ans_doc.similarity(correct_ans_doc)
scores.append(similarity_score)
# Provide personalized feedback to the user based on semantic analysis
#feedback += f"Question: {question}\n"
#feedback += f"Your Answer: {user_answer}\n"
#feedback += f"Correct Answer: {correct_answer}\n"
#feedback += f"Your Score: {similarity_score:.2f}\n"
#feedback += "\n"
for u, q, c, s in zip(user_answers, questions, correct_answers, scores):
feedback += f"Question: {q}\n"
feedback += f"Your Answer: {u}\n"
feedback += f"Correct Answer: {c}\n"
feedback += f"Your Score: {s:.2f}\n"
feedback += "\n"
# Display personalized feedback to the user
print("\nFeedback:")
print(feedback)
# Set up your OpenAI API key
openai.api_key = "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"
# Load the spaCy language model for semantic analysis
nlp = spacy.load("en_core_web_sm")
def load_excel_file(file_path):
"""
Load the Excel file and return the DataFrame.
"""
df = pd.read_excel(file_path)
return df
def choose_category(df):
"""
Ask the user to choose a category from column A (Category).
Return the selected category.
"""
print("Available Categories:")
categories = df["Category"].unique()
for idx, category in enumerate(categories):
print(f"{idx + 1}. {category}")
category_choice = int(input("Choose a category (enter the corresponding number): ")) - 1
selected_category = categories[category_choice]
return selected_category
def filter_questions(df, selected_category):
"""
Filter questions and correct answers for the selected category.
Shuffle the filtered data within the category.
Return the filtered DataFrame.
"""
category_data = df[df["Category"] == selected_category]
category_data = category_data.sample(frac=1).reset_index(drop=True) # Shuffle within the category
return category_data
def generate_questions(category_data, num_questions):
"""
Generate questions from the category data and collect user answers.
Return the lists of questions and correct answers.
"""
questions = []
correct_answers = []
user_answers = []
for i in range(num_questions):
question = category_data.loc[i, "Question"]
questions.append(question)
correct_answer = category_data.loc[i, "Correct Answer"]
correct_answers.append(correct_answer)
user_answer = input(f"Q: {question}\nA: ")
user_answers.append(user_answer)
return questions, correct_answers, user_answers
def analyze_similarity(user_answers, questions, correct_answers):
"""
Analyze user answers using semantic analysis and calculate similarity scores.
Return the list of similarity scores.
"""
scores = []
for u, q, c in zip(user_answers, questions, correct_answers):
user_ans_doc = nlp(u)
correct_ans_doc = nlp(c)
similarity_score = user_ans_doc.similarity(correct_ans_doc)
scores.append(similarity_score)
return scores
def provide_feedback(user_answers, questions, correct_answers, scores):
"""
Provide personalized feedback to the user based on semantic analysis.
"""
feedback = ""
for u, q, c, s in zip(user_answers, questions, correct_answers, scores):
feedback += f"Question: {q}\n"
feedback += f"Your Answer: {u}\n"
feedback += f"Correct Answer: {c}\n"
feedback += f"Your Score: {s:.2f}\n"
feedback += "\n"
return feedback
def main():
# Load the Excel file
excel_file = "Data_analyst_question.xlsx" # Replace with your actual file path
df = load_excel_file(excel_file)
# Ask the user to choose a category
selected_category = choose_category(df)
# Filter questions for the selected category
category_data = filter_questions(df, selected_category)
# Generate questions and collect user answers
num_questions = min(2, len(category_data)) # Ensure you don't exceed the available questions
questions, correct_answers, user_answers = generate_questions(category_data, num_questions)
# Analyze user answers using semantic analysis
scores = analyze_similarity(user_answers, questions, correct_answers)
# Provide personalized feedback to the user
feedback = provide_feedback(user_answers, questions, correct_answers, scores)
# Display personalized feedback to the user
print("\nFeedback:")
print(feedback)
if __name__ == "__main__":
main()