-
Notifications
You must be signed in to change notification settings - Fork 0
/
content_similarity.py
22 lines (16 loc) · 1.08 KB
/
content_similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Import the SentenceTransformer class and util module from the sentence_transformers library
from sentence_transformers import SentenceTransformer, util
# Load a pre-trained transformer model named "all-MiniLM-L6-v2"
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# Define a function named sentence_similarity that takes two input sentences as strings and returns their similarity score as a percentage
def sentence_similarity(input1, input2):
# Encode the two input sentences into their vector representations using the pre-trained model
embedding1 = model.encode(input1.lower(), convert_to_tensor=True)
embedding2 = model.encode(input2.lower(), convert_to_tensor=True)
# Calculate the cosine similarity between the two encoded vectors using the util.cos_sim method
cosine_scores = util.cos_sim(embedding1, embedding2).tolist()[0]
# Round the similarity score to two decimal places and convert it to a percentage value
result = round(cosine_scores[0], 2)
if result < 0.3:
result = 0
return round(result*100, 2)