This repository has been archived by the owner on Feb 28, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathNLP_cosine_similarity.py
45 lines (39 loc) · 1.99 KB
/
NLP_cosine_similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#An illustrative example of how cosine similarity is used in Replika's retrieval model to select a suitable response
context = [
"Thank you for always caring about me. I really appreciate it. I think I just need some time to process",
"I wish you would stay out of my business, you're so irritating. Just back off, will you?"
]
responses = [
"You're welcome. You know that I care about you and am here for you when you want to talk",
"Well, you're on your own. I've had enough of your rudeness!"
]
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer= TfidfVectorizer()
X = vectorizer.fit(context)
array = X.transform(context).toarray()
import numpy as np
def cosine_similarity(a, b):
"""Takes 2 vectors a, b and returns the cosine similarity according
to the definition of the dot product"""
dot_product = np.dot(a, b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
return dot_product / (norm_a * norm_b)
def get_matching_response(test_context):
"""Takes a sample phrase (test_context) & returns the response with
the highest cosine similarity"""
test_context = [test_context]
test_vector = X.transform(test_context).toarray()
response = ""
most_sim = 0
for i in range(len(context)):
if most_sim < cosine_similarity(array[i], test_vector[0]):
most_sim = cosine_similarity(array[i], test_vector[0])
answer_index = i #get the index of the current most similar context
response = responses[answer_index] #get the answer of the most similar context.
response_value = most_sim
print(f"{response} ({response_value})")
get_matching_response("Thanks for supporting me. I think I just need a little time")
get_matching_response("I do appreciate you. Thanks for being there for me")
get_matching_response("Seriously? This is what you're coming back with?")
get_matching_response("You're an idiot. I need a break")