Skip to content

Commit

Permalink
add fastSenSimiCal function
Browse files Browse the repository at this point in the history
  • Loading branch information
jsksxs360 committed Dec 10, 2017
1 parent 1fc5881 commit 55228a5
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/me/xiaosheng/word2vec/Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public static void main(String[] args) throws Exception {
List<String> wordList1 = Segment.getWords(s1);
List<String> wordList2 = Segment.getWords(s2);
List<String> wordList3 = Segment.getWords(s3);
//快速句子相似度
System.out.println("快速句子相似度:");
System.out.println("s1|s1: " + vec.fastSentenceSimilarity(wordList1, wordList1));
System.out.println("s1|s2: " + vec.fastSentenceSimilarity(wordList1, wordList2));
System.out.println("s1|s3: " + vec.fastSentenceSimilarity(wordList1, wordList3));
//句子相似度(所有词语权值设为1)
System.out.println("句子相似度:");
System.out.println("s1|s1: " + vec.sentenceSimilarity(wordList1, wordList1));
Expand Down
41 changes: 41 additions & 0 deletions src/me/xiaosheng/word2vec/Word2Vec.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ private float calDist(float[] vec1, float[] vec2) {
}
return dist;
}
/**
* 向量求和
* @param sum 和向量
* @param vec 添加向量
*/
private void calSum(float[] sum, float[] vec) {
for (int i = 0; i < sum.length; i++) {
sum[i] += vec[i];
}
}
/**
* 计算词相似度
* @param word1
Expand Down Expand Up @@ -149,6 +159,37 @@ private float calMaxSimilarity(String centerWord, List<String> wordList) {
if (max == -1) return 0;
return max;
}
/**
* 快速计算句子相似度
* @param sentence1Words 句子1词语列表
* @param sentence2Words 句子2词语列表
* @return 两个句子的相似度
*/
public float fastSentenceSimilarity(List<String> sentence1Words, List<String> sentence2Words) {
if (loadModel == false) {
return 0;
}
if (sentence1Words.isEmpty() || sentence2Words.isEmpty()) {
return 0;
}
float[] sen1vector = new float[vec.getSize()];
float[] sen2vector = new float[vec.getSize()];
double len1 = 0;
double len2 = 0;
for (int i = 0; i < sentence1Words.size(); i++) {
float[] tmp = getWordVector(sentence1Words.get(i));
if (tmp != null) calSum(sen1vector, tmp);
}
for (int i = 0; i < sentence2Words.size(); i++) {
float[] tmp = getWordVector(sentence2Words.get(i));
if (tmp != null) calSum(sen2vector, tmp);
}
for (int i = 0; i < vec.getSize(); i++) {
len1 += sen1vector[i] * sen1vector[i];
len2 += sen2vector[i] * sen2vector[i];
}
return (float) (calDist(sen1vector, sen2vector) / Math.sqrt(len1 * len2));
}
/**
* 计算句子相似度
* 所有词语权值设为1
Expand Down

0 comments on commit 55228a5

Please sign in to comment.