-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scoring.py
43 lines (35 loc) · 1.07 KB
/
Scoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import docx2txt
import pdfplumber
import fitz
import streamlit as st
#@st.cache
def load_model():
# model = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
# model = 'model\pytorch_model.bin'
model = SentenceTransformer('all-MiniLM-L12-v2')
return model
#@st.cache
def embedding(text, model):
emb = model.encode(text)
return emb
def similar(emb1, emb2):
similarity = list(cosine_similarity(emb1.reshape(1, -1), emb2.reshape(1, -1)))
return similarity[0] * 100
#def pdfread(uploaded_doc):
# with fitz.open(uploaded_doc) as doc:
# cv = ""
# for page in doc:
# cv = ''.join([cv, page.get_text()])
# return cv
def pdfread(uploaded_doc):
data = ''
with pdfplumber.open(uploaded_doc) as pdf:
pages = pdf.pages
for p in pages:
data = ''.join([data, p.extract_text()])
return data
def docxread(uploaded_doc):
cv = docx2txt.process(uploaded_doc)
return cv