forked from KotlinSpringBoot/saber
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.sql
40 lines (35 loc) · 56.6 KB
/
app.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
CREATE SCHEMA `saber`
DEFAULT CHARACTER SET utf8;
/******************************************/
/* 表名称 = knowledge */
/******************************************/
CREATE TABLE `knowledge` (
`id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT
COMMENT '主键',
`gmt_create` DATETIME NOT NULL
COMMENT '创建时间',
`gmt_modified` DATETIME NOT NULL
COMMENT '修改时间',
`title` VARCHAR(100) NOT NULL
COMMENT '标题',
`answer` LONGTEXT COMMENT '答案',
PRIMARY KEY (`id`),
UNIQUE KEY `uk_title` (`title`)
)
ENGINE = InnoDB
COMMENT ='知识库';
/*
-- Query: SELECT * FROM saber.knowledge
LIMIT 0, 1000
-- Date: 2018-05-22 13:47
*/
INSERT INTO `knowledge` (`id`, `answer`, `gmt_create`, `gmt_modified`, `title`) VALUES (1,
'<pre><span><#if </span><span>feeds</span><span>?</span><span>exists</span><span>><br></span><span> <#list </span><span>feeds </span><span>as </span><span>e</span><span>><br></span><span> </span><span><</span><span>li </span><span>class=</span><span>\"layui-timeline-item\"</span><span>></span><br> <span><</span><span>i </span><span>class=</span><span>\"layui-icon layui-timeline-axis\"</span><span>></span><span></span><span></</span><span>i</span><span>></span><br> <span><</span><span>div </span><span>class=</span><span>\"layui-timeline-content layui-text\"</span><span>></span><br> <span><</span><span>h3 </span><span>class=</span><span>\"layui-timeline-title\"</span><span>></span><br> <span><</span><span>a </span><span>href=</span><span>\"/knowledge/$e.id\" </span><span>target=</span><span>\"_blank\" </span><span>style=</span><span>\"</span><span>color</span>: <span>#469689</span><span>\"</span><span>></span><span>${</span><span>e</span>.<span>title</span><span>}</span><span></</span><span>a</span><span>></span><br> <span></</span><span>h3</span><span>></span><br> <span><</span><span>h5 </span><span>class=</span><span>\"layui-timeline-title\"</span><span>></span><br> <span>${</span><span>e</span>.<span>gmtCreate</span><span>?</span><span>string</span>(<span>\"yyyy-MM-dd HH:mm:ss\"</span>)<span>}<br></span><span> </span><span></</span><span>h5</span><span>></span><br> <span><</span><span>textarea </span><span>class=</span><span>\"content\" </span><span>id=</span><span>\"answer-</span><span>${</span><span>e</span>.<span>id</span><span>}</span><span>\"</span><span>></span><br> <span>${</span><span>e</span>.<span>answer</span><span>}<br></span><span> </span><span></</span><span>textarea</span><span>></span><br> <span></</span><span>div</span><span>></span><br> <span></</span><span>li</span><span>></span><br> <span></#list><br></span><span></#if></span></pre>',
'2018-05-22 13:16:12',
'2018-05-22 13:16:12',
'FreeMarker 遍历 List');
INSERT INTO `knowledge` (`id`, `answer`, `gmt_create`, `gmt_modified`, `title`) VALUES (2,
'<p style=\"text-align: justify;\">无论是做科学研究,还是工程项目,我们总是会碰上要比较字符串的相似性,比如拼写纠错、文本去重、上下文相似性等。度量的方法有很多,到底使用哪一种方法来计算相似性,这就需要我们根据情况选择合适的方法来计算。这里把几种常用到的度量字符串相似性的方法罗列一下,仅供参考,欢迎大家补充指正。</p><p style=\"text-align: justify;\">1、<a href=\"https://en.wikipedia.org/wiki/Cosine_similarity\" target=\"_blank\">余弦相似性(cosine similarity)</a> <br>余弦相似性大家都非常熟悉,它是定义在向量空间模型(VSM)中的。它的定义如下: <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109161752179\" alt=\"余弦公式\" title=\"\"> <br></center><p><br><span>其中,A,B为向量中间中的两个向量。 </span><br><span>在使用它来做字符串相似性度量的时候,需要先将字符串向量化,通常使用词袋模型(BOW)来向量化。举个例子如下: </span><br></p><center><br>String1 = “apple” <br>String2 = “app” <br></center><p><br><span>则词包为{’a’,’e’,’l’,’p’},若使用0,1判断元素是否在词包中,字符串1、2可以转化为: </span><br></p><center><br>StringA = [1111] <br>StringB = [1001] <br></center><p><br><span>那么,根据余弦公式,可以计算字符串相似性为:0.707。</span></p><p style=\"text-align: justify;\"></p><p style=\"text-align: justify;\">2、<a href=\"https://en.wikipedia.org/wiki/Euclidean_distance\" target=\"_blank\">欧氏距离(Euclidean distance)</a> <br>欧氏距离大家非常熟悉,定义在向量空间模型中,计算使用欧氏距离公式: <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109201239933\" alt=\"这里写图片描述\" title=\"\"> <br></center><p style=\"text-align: justify;\"></p><p style=\"text-align: justify;\">3、<a href=\"https://en.wikipedia.org/wiki/Edit_distance\" target=\"_blank\">编辑距离(edit distance)</a> <br>编辑距离,有的地方也会称为Levenshtein距离,表示从一个字符串转化为另一个字符串所需要的最少编辑次数,这里的编辑是指将字符串中的一个字符替换成另一个字符,或者插入删除字符。例如上例String1通过删除’l’与’e’转化为String2,所以其最小编辑次数为2。 <br>编辑距离的核心就是如何计算出一对字符串间的最小编辑次数,考虑到问题的特点,我们可以使用动态规划的思想来计算其最小编辑次数,根据维基百科:两个字符串<span class=\"MathJax\" id=\"MathJax-Element-18852-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>a</mi><mo>=</mo><msub><mi>a</mi><mn>1</mn></msub><msub><mi>a</mi><mn>2</mn></msub><mo>&#x22EF;</mo><msub><mi>a</mi><mi>n</mi></msub><mrow class="MJX-TeXAtom-ORD"><mo>&#xFF0C;</mo></mrow><mi>b</mi><mo>=</mo><msub><mi>b</mi><mn>1</mn></msub><msub><mi>b</mi><mn>2</mn></msub><mo>&#x22EF;</mo><msub><mi>b</mi><mi>m</mi></msub></math>\" role=\"presentation\" style=\"text-align: left;\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-1\"><span><span><span class=\"mrow\" id=\"MathJax-Span-2\"><span class=\"mi\" id=\"MathJax-Span-3\">a</span><span class=\"mo\" id=\"MathJax-Span-4\">=</span><span class=\"msubsup\" id=\"MathJax-Span-5\"><span><span><span class=\"mi\" id=\"MathJax-Span-6\">a</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-7\">1</span><span></span></span></span></span><span class=\"msubsup\" id=\"MathJax-Span-8\"><span><span><span class=\"mi\" id=\"MathJax-Span-9\">a</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-10\">2</span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-11\">⋯</span><span class=\"msubsup\" id=\"MathJax-Span-12\"><span><span><span class=\"mi\" id=\"MathJax-Span-13\">a</span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-14\">n</span><span></span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-15\"><span class=\"mrow\" id=\"MathJax-Span-16\"><span class=\"mo\" id=\"MathJax-Span-17\"><span>,</span></span></span></span><span class=\"mi\" id=\"MathJax-Span-18\">b</span><span class=\"mo\" id=\"MathJax-Span-19\">=</span><span class=\"msubsup\" id=\"MathJax-Span-20\"><span><span><span class=\"mi\" id=\"MathJax-Span-21\">b</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-22\">1</span><span></span></span></span></span><span class=\"msubsup\" id=\"MathJax-Span-23\"><span><span><span class=\"mi\" id=\"MathJax-Span-24\">b</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-25\">2</span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-26\">⋯</span><span class=\"msubsup\" id=\"MathJax-Span-27\"><span><span><span class=\"mi\" id=\"MathJax-Span-28\">b</span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-29\">m</span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>a</mi><mo>=</mo><msub><mi>a</mi><mn>1</mn></msub><msub><mi>a</mi><mn>2</mn></msub><mo>⋯</mo><msub><mi>a</mi><mi>n</mi></msub><mrow class=\"MJX-TeXAtom-ORD\"><mo>,</mo></mrow><mi>b</mi><mo>=</mo><msub><mi>b</mi><mn>1</mn></msub><msub><mi>b</mi><mn>2</mn></msub><mo>⋯</mo><msub><mi>b</mi><mi>m</mi></msub></math></span></span>的编辑距离递归计算公式如下: <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109182513783\" alt=\"距离递归定义\" title=\"\"> <br></center><p><br><span>其中,w表示增删改三种操作的权重,一般定义为: </span><br></p><center><br><span class=\"MathJax\" id=\"MathJax-Element-18853-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>w</mi><mo>=</mo><mrow><mo>{</mo><mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mtr><mtd><mn>1</mn><mo>,</mo></mtd><mtd><mtext>&#x82E5;&#x6709;&#x64CD;&#x4F5C;</mtext></mtd></mtr><mtr><mtd><mn>0</mn><mo>,</mo></mtd><mtd><mtext>&#x65E0;&#x64CD;&#x4F5C;</mtext></mtd></mtr></mtable><mo fence="true" stretchy="true" symmetric="true"></mo></mrow></math>\" role=\"presentation\" style=\"text-align: left;\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-30\"><span><span><span class=\"mrow\" id=\"MathJax-Span-31\"><span class=\"mi\" id=\"MathJax-Span-32\">w</span><span class=\"mo\" id=\"MathJax-Span-33\">=</span><span class=\"mrow\" id=\"MathJax-Span-34\"><span class=\"mo\" id=\"MathJax-Span-35\"><span><span>{</span></span></span><span class=\"mtable\" id=\"MathJax-Span-36\"><span><span><span><span><span class=\"mtd\" id=\"MathJax-Span-37\"><span class=\"mrow\" id=\"MathJax-Span-38\"><span class=\"mn\" id=\"MathJax-Span-39\">1</span><span class=\"mo\" id=\"MathJax-Span-40\">,</span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-44\"><span class=\"mrow\" id=\"MathJax-Span-45\"><span class=\"mn\" id=\"MathJax-Span-46\">0</span><span class=\"mo\" id=\"MathJax-Span-47\">,</span></span></span><span></span></span></span><span></span></span><span><span><span><span class=\"mtd\" id=\"MathJax-Span-41\"><span class=\"mrow\" id=\"MathJax-Span-42\"><span class=\"mtext\" id=\"MathJax-Span-43\"><span>若</span><span>有</span><span>操</span><span>作</span></span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-48\"><span class=\"mrow\" id=\"MathJax-Span-49\"><span class=\"mtext\" id=\"MathJax-Span-50\"><span>无</span><span>操</span><span>作</span></span></span></span><span></span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-51\"></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>w</mi><mo>=</mo><mrow><mo>{</mo><mtable columnalign=\"left left\" rowspacing=\".2em\" columnspacing=\"1em\" displaystyle=\"false\"><mtr><mtd><mn>1</mn><mo>,</mo></mtd><mtd><mtext>若有操作</mtext></mtd></mtr><mtr><mtd><mn>0</mn><mo>,</mo></mtd><mtd><mtext>无操作</mtext></mtd></mtr></mtable><mo fence=\"true\" stretchy=\"true\" symmetric=\"true\"></mo></mrow></math></span></span> <br></center><p><br><span class=\"MathJax\" id=\"MathJax-Element-18854-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mi>i</mi><mn>0</mn></mrow></msub><mo>=</mo><mi>i</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-52\"><span><span><span class=\"mrow\" id=\"MathJax-Span-53\"><span class=\"msubsup\" id=\"MathJax-Span-54\"><span><span><span class=\"mi\" id=\"MathJax-Span-55\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-56\"><span class=\"mrow\" id=\"MathJax-Span-57\"><span class=\"mi\" id=\"MathJax-Span-58\">i</span><span class=\"mn\" id=\"MathJax-Span-59\">0</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-60\">=</span><span class=\"mi\" id=\"MathJax-Span-61\">i</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mi>i</mi><mn>0</mn></mrow></msub><mo>=</mo><mi>i</mi></math></span></span><span>表示从</span><span class=\"MathJax\" id=\"MathJax-Element-18855-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>b</mi><mrow class="MJX-TeXAtom-ORD"><msup><mi></mi><mo>&#x2032;</mo></msup></mrow></msup><mo>=</mo><msub><mi>b</mi><mn>1</mn></msub><mo>&#x22EF;</mo><msub><mi>b</mi><mi>i</mi></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-62\"><span><span><span class=\"mrow\" id=\"MathJax-Span-63\"><span class=\"msubsup\" id=\"MathJax-Span-64\"><span><span><span class=\"mi\" id=\"MathJax-Span-65\">b</span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-66\"><span class=\"mrow\" id=\"MathJax-Span-67\"><span class=\"msup\" id=\"MathJax-Span-68\"><span><span><span class=\"mi\" id=\"MathJax-Span-69\"></span><span></span></span><span><span class=\"mo\" id=\"MathJax-Span-70\">′</span><span></span></span></span></span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-71\">=</span><span class=\"msubsup\" id=\"MathJax-Span-72\"><span><span><span class=\"mi\" id=\"MathJax-Span-73\">b</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-74\">1</span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-75\">⋯</span><span class=\"msubsup\" id=\"MathJax-Span-76\"><span><span><span class=\"mi\" id=\"MathJax-Span-77\">b</span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-78\">i</span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msup><mi>b</mi><mrow class=\"MJX-TeXAtom-ORD\"><msup><mi></mi><mo>′</mo></msup></mrow></msup><mo>=</mo><msub><mi>b</mi><mn>1</mn></msub><mo>⋯</mo><msub><mi>b</mi><mi>i</mi></msub></math></span></span><span>删除为空的编辑次数;</span><span class=\"MathJax\" id=\"MathJax-Element-18856-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mn>0</mn><mi>j</mi></mrow></msub><mo>=</mo><mi>j</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-79\"><span><span><span class=\"mrow\" id=\"MathJax-Span-80\"><span class=\"msubsup\" id=\"MathJax-Span-81\"><span><span><span class=\"mi\" id=\"MathJax-Span-82\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-83\"><span class=\"mrow\" id=\"MathJax-Span-84\"><span class=\"mn\" id=\"MathJax-Span-85\">0</span><span class=\"mi\" id=\"MathJax-Span-86\">j<span></span></span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-87\">=</span><span class=\"mi\" id=\"MathJax-Span-88\">j<span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>0</mn><mi>j</mi></mrow></msub><mo>=</mo><mi>j</mi></math></span></span><span>表示从空插入成</span><span class=\"MathJax\" id=\"MathJax-Element-18857-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><msup><mi></mi><mo>&#x2032;</mo></msup></mrow></msup><mo>=</mo><msub><mi>a</mi><mn>1</mn></msub><mo>&#x22EF;</mo><msub><mi>a</mi><mi>j</mi></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-89\"><span><span><span class=\"mrow\" id=\"MathJax-Span-90\"><span class=\"msubsup\" id=\"MathJax-Span-91\"><span><span><span class=\"mi\" id=\"MathJax-Span-92\">a</span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-93\"><span class=\"mrow\" id=\"MathJax-Span-94\"><span class=\"msup\" id=\"MathJax-Span-95\"><span><span><span class=\"mi\" id=\"MathJax-Span-96\"></span><span></span></span><span><span class=\"mo\" id=\"MathJax-Span-97\">′</span><span></span></span></span></span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-98\">=</span><span class=\"msubsup\" id=\"MathJax-Span-99\"><span><span><span class=\"mi\" id=\"MathJax-Span-100\">a</span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-101\">1</span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-102\">⋯</span><span class=\"msubsup\" id=\"MathJax-Span-103\"><span><span><span class=\"mi\" id=\"MathJax-Span-104\">a</span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-105\">j<span></span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msup><mi>a</mi><mrow class=\"MJX-TeXAtom-ORD\"><msup><mi></mi><mo>′</mo></msup></mrow></msup><mo>=</mo><msub><mi>a</mi><mn>1</mn></msub><mo>⋯</mo><msub><mi>a</mi><mi>j</mi></msub></math></span></span><span>所需的编辑次数;</span><span class=\"MathJax\" id=\"MathJax-Element-18858-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mi>i</mi><mi>j</mi></mrow></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-106\"><span><span><span class=\"mrow\" id=\"MathJax-Span-107\"><span class=\"msubsup\" id=\"MathJax-Span-108\"><span><span><span class=\"mi\" id=\"MathJax-Span-109\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-110\"><span class=\"mrow\" id=\"MathJax-Span-111\"><span class=\"mi\" id=\"MathJax-Span-112\">i</span><span class=\"mi\" id=\"MathJax-Span-113\">j<span></span></span></span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mi>i</mi><mi>j</mi></mrow></msub></math></span></span><span>则是对动态规划中分解子问题的过程。 </span><br><span>仍以(1)中的两个字符串为例: </span><br><span>则编辑距离</span><span class=\"MathJax\" id=\"MathJax-Element-18859-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mn>53</mn></mrow></msub><mo>=</mo><mi>m</mi><mi>i</mi><mi>n</mi><mrow><mo>{</mo><mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mtr><mtd><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mn>43</mn></mrow></msub><mo>+</mo><mn>5</mn></mtd><mtd><mo>,</mo><mrow class="MJX-TeXAtom-ORD"><mo>&#x5220;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x9664;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x64CD;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4F5C;</mo></mrow></mtd></mtr><mtr><mtd><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mn>52</mn></mrow></msub><mo>+</mo><mn>3</mn></mtd><mtd><mo>,</mo><mrow class="MJX-TeXAtom-ORD"><mo>&#x63D2;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x5165;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x64CD;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4F5C;</mo></mrow></mtd></mtr><mtr><mtd><msub><mi>d</mi><mrow class="MJX-TeXAtom-ORD"><mn>42</mn></mrow></msub><mo>+</mo><mn>0</mn></mtd><mtd><mo>,</mo><mrow class="MJX-TeXAtom-ORD"><mo>&#x66FF;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x6362;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x64CD;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4F5C;</mo></mrow></mtd></mtr></mtable><mo fence="true" stretchy="true" symmetric="true"></mo></mrow></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-114\"><span><span><span class=\"mrow\" id=\"MathJax-Span-115\"><span class=\"msubsup\" id=\"MathJax-Span-116\"><span><span><span class=\"mi\" id=\"MathJax-Span-117\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-118\"><span class=\"mrow\" id=\"MathJax-Span-119\"><span class=\"mn\" id=\"MathJax-Span-120\">53</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-121\">=</span><span class=\"mi\" id=\"MathJax-Span-122\">m</span><span class=\"mi\" id=\"MathJax-Span-123\">i</span><span class=\"mi\" id=\"MathJax-Span-124\">n</span><span class=\"mrow\" id=\"MathJax-Span-125\"><span class=\"mo\" id=\"MathJax-Span-126\"><span><span>⎧<span></span></span><span>⎩<span></span></span><span>⎨<span></span></span><span>⎪<span></span></span><span>⎪<span></span></span></span></span><span class=\"mtable\" id=\"MathJax-Span-127\"><span><span><span><span><span class=\"mtd\" id=\"MathJax-Span-128\"><span class=\"mrow\" id=\"MathJax-Span-129\"><span class=\"msubsup\" id=\"MathJax-Span-130\"><span><span><span class=\"mi\" id=\"MathJax-Span-131\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-132\"><span class=\"mrow\" id=\"MathJax-Span-133\"><span class=\"mn\" id=\"MathJax-Span-134\">43</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-135\">+</span><span class=\"mn\" id=\"MathJax-Span-136\">5</span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-152\"><span class=\"mrow\" id=\"MathJax-Span-153\"><span class=\"msubsup\" id=\"MathJax-Span-154\"><span><span><span class=\"mi\" id=\"MathJax-Span-155\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-156\"><span class=\"mrow\" id=\"MathJax-Span-157\"><span class=\"mn\" id=\"MathJax-Span-158\">52</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-159\">+</span><span class=\"mn\" id=\"MathJax-Span-160\">3</span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-176\"><span class=\"mrow\" id=\"MathJax-Span-177\"><span class=\"msubsup\" id=\"MathJax-Span-178\"><span><span><span class=\"mi\" id=\"MathJax-Span-179\">d<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-180\"><span class=\"mrow\" id=\"MathJax-Span-181\"><span class=\"mn\" id=\"MathJax-Span-182\">42</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-183\">+</span><span class=\"mn\" id=\"MathJax-Span-184\">0</span></span></span><span></span></span></span><span></span></span><span><span><span><span class=\"mtd\" id=\"MathJax-Span-137\"><span class=\"mrow\" id=\"MathJax-Span-138\"><span class=\"mo\" id=\"MathJax-Span-139\">,</span><span class=\"texatom\" id=\"MathJax-Span-140\"><span class=\"mrow\" id=\"MathJax-Span-141\"><span class=\"mo\" id=\"MathJax-Span-142\"><span>删</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-143\"><span class=\"mrow\" id=\"MathJax-Span-144\"><span class=\"mo\" id=\"MathJax-Span-145\"><span>除</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-146\"><span class=\"mrow\" id=\"MathJax-Span-147\"><span class=\"mo\" id=\"MathJax-Span-148\"><span>操</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-149\"><span class=\"mrow\" id=\"MathJax-Span-150\"><span class=\"mo\" id=\"MathJax-Span-151\"><span>作</span></span></span></span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-161\"><span class=\"mrow\" id=\"MathJax-Span-162\"><span class=\"mo\" id=\"MathJax-Span-163\">,</span><span class=\"texatom\" id=\"MathJax-Span-164\"><span class=\"mrow\" id=\"MathJax-Span-165\"><span class=\"mo\" id=\"MathJax-Span-166\"><span>插</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-167\"><span class=\"mrow\" id=\"MathJax-Span-168\"><span class=\"mo\" id=\"MathJax-Span-169\"><span>入</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-170\"><span class=\"mrow\" id=\"MathJax-Span-171\"><span class=\"mo\" id=\"MathJax-Span-172\"><span>操</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-173\"><span class=\"mrow\" id=\"MathJax-Span-174\"><span class=\"mo\" id=\"MathJax-Span-175\"><span>作</span></span></span></span></span></span><span></span></span><span><span class=\"mtd\" id=\"MathJax-Span-185\"><span class=\"mrow\" id=\"MathJax-Span-186\"><span class=\"mo\" id=\"MathJax-Span-187\">,</span><span class=\"texatom\" id=\"MathJax-Span-188\"><span class=\"mrow\" id=\"MathJax-Span-189\"><span class=\"mo\" id=\"MathJax-Span-190\"><span>替</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-191\"><span class=\"mrow\" id=\"MathJax-Span-192\"><span class=\"mo\" id=\"MathJax-Span-193\"><span>换</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-194\"><span class=\"mrow\" id=\"MathJax-Span-195\"><span class=\"mo\" id=\"MathJax-Span-196\"><span>操</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-197\"><span class=\"mrow\" id=\"MathJax-Span-198\"><span class=\"mo\" id=\"MathJax-Span-199\"><span>作</span></span></span></span></span></span><span></span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-200\"></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>53</mn></mrow></msub><mo>=</mo><mi>m</mi><mi>i</mi><mi>n</mi><mrow><mo>{</mo><mtable columnalign=\"left left\" rowspacing=\".2em\" columnspacing=\"1em\" displaystyle=\"false\"><mtr><mtd><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>43</mn></mrow></msub><mo>+</mo><mn>5</mn></mtd><mtd><mo>,</mo><mrow class=\"MJX-TeXAtom-ORD\"><mo>删</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>除</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>操</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>作</mo></mrow></mtd></mtr><mtr><mtd><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>52</mn></mrow></msub><mo>+</mo><mn>3</mn></mtd><mtd><mo>,</mo><mrow class=\"MJX-TeXAtom-ORD\"><mo>插</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>入</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>操</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>作</mo></mrow></mtd></mtr><mtr><mtd><msub><mi>d</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>42</mn></mrow></msub><mo>+</mo><mn>0</mn></mtd><mtd><mo>,</mo><mrow class=\"MJX-TeXAtom-ORD\"><mo>替</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>换</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>操</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>作</mo></mrow></mtd></mtr></mtable><mo fence=\"true\" stretchy=\"true\" symmetric=\"true\"></mo></mrow></math></span></span><span>,继续通过不断递归可以得出其编辑距离。</span></p><p style=\"text-align: justify;\"></p><p style=\"text-align: justify;\">4、<a href=\"https://en.wikipedia.org/wiki/Hamming_distance\" target=\"_blank\">海明距离(hamming distance)</a> <br>海明距离用于表示两个等长字符串对应位置不同字符的总个数,也即把一个字符串换成另一个字符串所需要的替换操作次数。根据定义,可以把海明距离理解为编辑距离的一种特殊情况,即只计算等长情况下替换操作的编辑次数。举个例子来讲,字符串“bob”与“pom”的海明距离为2,因为需要至少两次的替换操作两个字符串才能一致。海明距离较常用与二进制串上的操作,如对编码进行检错与纠错。在计算长字符串的相似性时可以 通过hash函数将字符串映射成定长二进制串再利用海明距离来计算相似性。 <br>海明距离的计算比较简单,通过一个循环来比较对应位置的字符是否相同即可。</p><p style=\"text-align: justify;\">5、<a href=\"https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient\" target=\"_blank\">Dice 距离</a> <br>Dice距离用于度量两个集合的相似性,因为可以把字符串理解为一种集合,因此Dice距离也会用于度量字符串的相似性。此外,Dice系数的一个非常著名的使用即实验性能评测的F1值。Dice系数定义如下: <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109194335527\" alt=\"这里写图片描述\" title=\"\"> <br></center><p><br><span>其中,X,Y表示两个集合,分子表示两个集合的相交操作后的长度,分母表示两个集合长度之和。以(1)中的例子来讲的话,</span><span class=\"MathJax\" id=\"MathJax-Element-18860-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>d</mi><mi>i</mi><mi>c</mi><msub><mi>e</mi><mrow class="MJX-TeXAtom-ORD"><mn>12</mn></mrow></msub><mo>=</mo><mfrac><mrow><mn>2</mn><mo>&#x00D7;</mo><mn>3</mn></mrow><mrow><mn>5</mn><mo>+</mo><mn>3</mn></mrow></mfrac><mo>=</mo><mn>0.75</mn></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-201\"><span><span><span class=\"mrow\" id=\"MathJax-Span-202\"><span class=\"mi\" id=\"MathJax-Span-203\">d<span></span></span><span class=\"mi\" id=\"MathJax-Span-204\">i</span><span class=\"mi\" id=\"MathJax-Span-205\">c</span><span class=\"msubsup\" id=\"MathJax-Span-206\"><span><span><span class=\"mi\" id=\"MathJax-Span-207\">e</span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-208\"><span class=\"mrow\" id=\"MathJax-Span-209\"><span class=\"mn\" id=\"MathJax-Span-210\">12</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-211\">=</span><span class=\"mfrac\" id=\"MathJax-Span-212\"><span><span><span class=\"mrow\" id=\"MathJax-Span-213\"><span class=\"mn\" id=\"MathJax-Span-214\">2</span><span class=\"mo\" id=\"MathJax-Span-215\">×</span><span class=\"mn\" id=\"MathJax-Span-216\">3</span></span><span></span></span><span><span class=\"mrow\" id=\"MathJax-Span-217\"><span class=\"mn\" id=\"MathJax-Span-218\">5</span><span class=\"mo\" id=\"MathJax-Span-219\">+</span><span class=\"mn\" id=\"MathJax-Span-220\">3</span></span><span></span></span><span><span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-221\">=</span><span class=\"mn\" id=\"MathJax-Span-222\">0.75</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>d</mi><mi>i</mi><mi>c</mi><msub><mi>e</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>12</mn></mrow></msub><mo>=</mo><mfrac><mrow><mn>2</mn><mo>×</mo><mn>3</mn></mrow><mrow><mn>5</mn><mo>+</mo><mn>3</mn></mrow></mfrac><mo>=</mo><mn>0.75</mn></math></span></span><span>。若集合表示成向量的话,计算可以定义为: </span><br></p><center><br><img src=\"https://img-blog.csdn.net/20161109194530780\" alt=\"这里写图片描述\" title=\"\"> <br></center><p><br><span>其中,A,B表示两个向量。</span></p><p style=\"text-align: justify;\"></p><p style=\"text-align: justify;\">6、<a href=\"https://en.wikipedia.org/wiki/Jaccard_index\" target=\"_blank\">Jaccard distance</a> <br>杰卡德系数的定义如下, <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109195329815\" alt=\"这里写图片描述\" title=\"\"> <br></center><p><br><span>可以看出与Dice系数的定义比较相似,分子部分是个两倍关系,Dice系数的分母比Jaccard系数的分母多减去了一项分子,即</span><span class=\"MathJax\" id=\"MathJax-Element-18861-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow><mi>A</mi><mo>&#x2229;</mo><mi>B</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-223\"><span><span><span class=\"mrow\" id=\"MathJax-Span-224\"><span class=\"texatom\" id=\"MathJax-Span-225\"><span class=\"mrow\" id=\"MathJax-Span-226\"><span class=\"mo\" id=\"MathJax-Span-227\">|</span></span></span><span class=\"mi\" id=\"MathJax-Span-228\">A</span><span class=\"mo\" id=\"MathJax-Span-229\">∩</span><span class=\"mi\" id=\"MathJax-Span-230\">B</span><span class=\"texatom\" id=\"MathJax-Span-231\"><span class=\"mrow\" id=\"MathJax-Span-232\"><span class=\"mo\" id=\"MathJax-Span-233\">|</span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow><mi>A</mi><mo>∩</mo><mi>B</mi><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow></math></span></span><span>。 </span><br><span>Jaccard与Dice之间具有一种转化关系: </span><br></p><center><br><span class=\"MathJax\" id=\"MathJax-Element-18862-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>J</mi><mo>=</mo><mfrac><mi>D</mi><mrow><mn>2</mn><mo>&#x2212;</mo><mi>D</mi></mrow></mfrac></math>\" role=\"presentation\" style=\"text-align: left;\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-234\"><span><span><span class=\"mrow\" id=\"MathJax-Span-235\"><span class=\"mi\" id=\"MathJax-Span-236\">J<span></span></span><span class=\"mo\" id=\"MathJax-Span-237\">=</span><span class=\"mfrac\" id=\"MathJax-Span-238\"><span><span><span class=\"mi\" id=\"MathJax-Span-239\">D</span><span></span></span><span><span class=\"mrow\" id=\"MathJax-Span-240\"><span class=\"mn\" id=\"MathJax-Span-241\">2</span><span class=\"mo\" id=\"MathJax-Span-242\">−</span><span class=\"mi\" id=\"MathJax-Span-243\">D</span></span><span></span></span><span><span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>J</mi><mo>=</mo><mfrac><mi>D</mi><mrow><mn>2</mn><mo>−</mo><mi>D</mi></mrow></mfrac></math></span></span>, <br></center><p><br><span>或: </span><br></p><center><br><span class=\"MathJax\" id=\"MathJax-Element-18863-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>D</mi><mo>=</mo><mfrac><mrow><mn>2</mn><mi>J</mi></mrow><mrow><mn>1</mn><mo>+</mo><mi>J</mi></mrow></mfrac></math>\" role=\"presentation\" style=\"text-align: left;\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-244\"><span><span><span class=\"mrow\" id=\"MathJax-Span-245\"><span class=\"mi\" id=\"MathJax-Span-246\">D</span><span class=\"mo\" id=\"MathJax-Span-247\">=</span><span class=\"mfrac\" id=\"MathJax-Span-248\"><span><span><span class=\"mrow\" id=\"MathJax-Span-249\"><span class=\"mn\" id=\"MathJax-Span-250\">2</span><span class=\"mi\" id=\"MathJax-Span-251\">J<span></span></span></span><span></span></span><span><span class=\"mrow\" id=\"MathJax-Span-252\"><span class=\"mn\" id=\"MathJax-Span-253\">1</span><span class=\"mo\" id=\"MathJax-Span-254\">+</span><span class=\"mi\" id=\"MathJax-Span-255\">J<span></span></span></span><span></span></span><span><span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>D</mi><mo>=</mo><mfrac><mrow><mn>2</mn><mi>J</mi></mrow><mrow><mn>1</mn><mo>+</mo><mi>J</mi></mrow></mfrac></math></span></span> <br></center><p style=\"text-align: justify;\"></p><p style=\"text-align: justify;\">7、<a href=\"https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance\" target=\"_blank\">J-W距离(Jaro–Winkler distance)</a> <br>J-W距离也常用来度量两个字符串的相似性,它实际上 Jaro distance的一种变种。 Jaro distance距离属于编辑距离的一类,被用于记录链接领域来将异构数据源中的records链接到同义实体中,也可以用于拼写纠错。Jaro distance定义如下: <br></p><center><br><img src=\"https://img-blog.csdn.net/20161109212242924\" alt=\"这里写图片描述\" title=\"\"> <br></center><p><br><span>其中,m是两个字符串匹配上的字符数目,t是字符中换位数目的一半,即若在字符串的第i位出现了a,b,在第j位又出现了b,a,则表示两者出现了换位。举个例子来讲: </span><br><span class=\"MathJax\" id=\"MathJax-Element-18864-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>s</mi><mn>1</mn><mo>=</mo><mi>M</mi><mi>A</mi><mi>R</mi><mi>T</mi><mi>H</mi><mi>A</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-256\"><span><span><span class=\"mrow\" id=\"MathJax-Span-257\"><span class=\"mi\" id=\"MathJax-Span-258\">s</span><span class=\"mn\" id=\"MathJax-Span-259\">1</span><span class=\"mo\" id=\"MathJax-Span-260\">=</span><span class=\"mi\" id=\"MathJax-Span-261\">M<span></span></span><span class=\"mi\" id=\"MathJax-Span-262\">A</span><span class=\"mi\" id=\"MathJax-Span-263\">R</span><span class=\"mi\" id=\"MathJax-Span-264\">T<span></span></span><span class=\"mi\" id=\"MathJax-Span-265\">H<span></span></span><span class=\"mi\" id=\"MathJax-Span-266\">A</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>s</mi><mn>1</mn><mo>=</mo><mi>M</mi><mi>A</mi><mi>R</mi><mi>T</mi><mi>H</mi><mi>A</mi></math></span></span><span> </span><br><span class=\"MathJax\" id=\"MathJax-Element-18865-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>s</mi><mn>2</mn><mo>=</mo><mi>M</mi><mi>A</mi><mi>R</mi><mi>H</mi><mi>T</mi><mi>A</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-267\"><span><span><span class=\"mrow\" id=\"MathJax-Span-268\"><span class=\"mi\" id=\"MathJax-Span-269\">s</span><span class=\"mn\" id=\"MathJax-Span-270\">2</span><span class=\"mo\" id=\"MathJax-Span-271\">=</span><span class=\"mi\" id=\"MathJax-Span-272\">M<span></span></span><span class=\"mi\" id=\"MathJax-Span-273\">A</span><span class=\"mi\" id=\"MathJax-Span-274\">R</span><span class=\"mi\" id=\"MathJax-Span-275\">H<span></span></span><span class=\"mi\" id=\"MathJax-Span-276\">T<span></span></span><span class=\"mi\" id=\"MathJax-Span-277\">A</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>s</mi><mn>2</mn><mo>=</mo><mi>M</mi><mi>A</mi><mi>R</mi><mi>H</mi><mi>T</mi><mi>A</mi></math></span></span><span> </span><br><span>则</span><span class=\"MathJax\" id=\"MathJax-Element-18866-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>m</mi><mo>=</mo><mn>6</mn><mo>,</mo><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow><mi>s</mi><mn>1</mn><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow><mo>=</mo><mn>6</mn><mo>,</mo><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow><mi>s</mi><mn>2</mn><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">|</mo></mrow><mo>=</mo><mn>6</mn><mo>,</mo><mi>T</mi><mrow class="MJX-TeXAtom-ORD"><mo>/</mo></mrow><mi>H</mi><mrow class="MJX-TeXAtom-ORD"><mo>&#x548C;</mo></mrow><mi>H</mi><mrow class="MJX-TeXAtom-ORD"><mo>/</mo></mrow><mi>T</mi><mrow class="MJX-TeXAtom-ORD"><mo>&#x5C5E;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4E8E;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4E24;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x5BF9;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x6362;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x4F4D;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x5B57;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x7B26;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x5BF9;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#xFF0C;</mo></mrow><mrow class="MJX-TeXAtom-ORD"><mo>&#x6545;</mo></mrow><mi>t</mi><mo>=</mo><mfrac><mrow><mn>1</mn><mo>+</mo><mn>1</mn></mrow><mn>2</mn></mfrac><mo>=</mo><mn>1</mn></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-278\"><span><span><span class=\"mrow\" id=\"MathJax-Span-279\"><span class=\"mi\" id=\"MathJax-Span-280\">m</span><span class=\"mo\" id=\"MathJax-Span-281\">=</span><span class=\"mn\" id=\"MathJax-Span-282\">6</span><span class=\"mo\" id=\"MathJax-Span-283\">,</span><span class=\"texatom\" id=\"MathJax-Span-284\"><span class=\"mrow\" id=\"MathJax-Span-285\"><span class=\"mo\" id=\"MathJax-Span-286\">|</span></span></span><span class=\"mi\" id=\"MathJax-Span-287\">s</span><span class=\"mn\" id=\"MathJax-Span-288\">1</span><span class=\"texatom\" id=\"MathJax-Span-289\"><span class=\"mrow\" id=\"MathJax-Span-290\"><span class=\"mo\" id=\"MathJax-Span-291\">|</span></span></span><span class=\"mo\" id=\"MathJax-Span-292\">=</span><span class=\"mn\" id=\"MathJax-Span-293\">6</span><span class=\"mo\" id=\"MathJax-Span-294\">,</span><span class=\"texatom\" id=\"MathJax-Span-295\"><span class=\"mrow\" id=\"MathJax-Span-296\"><span class=\"mo\" id=\"MathJax-Span-297\">|</span></span></span><span class=\"mi\" id=\"MathJax-Span-298\">s</span><span class=\"mn\" id=\"MathJax-Span-299\">2</span><span class=\"texatom\" id=\"MathJax-Span-300\"><span class=\"mrow\" id=\"MathJax-Span-301\"><span class=\"mo\" id=\"MathJax-Span-302\">|</span></span></span><span class=\"mo\" id=\"MathJax-Span-303\">=</span><span class=\"mn\" id=\"MathJax-Span-304\">6</span><span class=\"mo\" id=\"MathJax-Span-305\">,</span><span class=\"mi\" id=\"MathJax-Span-306\">T<span></span></span><span class=\"texatom\" id=\"MathJax-Span-307\"><span class=\"mrow\" id=\"MathJax-Span-308\"><span class=\"mo\" id=\"MathJax-Span-309\">/</span></span></span><span class=\"mi\" id=\"MathJax-Span-310\">H<span></span></span><span class=\"texatom\" id=\"MathJax-Span-311\"><span class=\"mrow\" id=\"MathJax-Span-312\"><span class=\"mo\" id=\"MathJax-Span-313\"><span>和</span></span></span></span><span class=\"mi\" id=\"MathJax-Span-314\">H<span></span></span><span class=\"texatom\" id=\"MathJax-Span-315\"><span class=\"mrow\" id=\"MathJax-Span-316\"><span class=\"mo\" id=\"MathJax-Span-317\">/</span></span></span><span class=\"mi\" id=\"MathJax-Span-318\">T<span></span></span><span class=\"texatom\" id=\"MathJax-Span-319\"><span class=\"mrow\" id=\"MathJax-Span-320\"><span class=\"mo\" id=\"MathJax-Span-321\"><span>属</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-322\"><span class=\"mrow\" id=\"MathJax-Span-323\"><span class=\"mo\" id=\"MathJax-Span-324\"><span>于</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-325\"><span class=\"mrow\" id=\"MathJax-Span-326\"><span class=\"mo\" id=\"MathJax-Span-327\"><span>两</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-328\"><span class=\"mrow\" id=\"MathJax-Span-329\"><span class=\"mo\" id=\"MathJax-Span-330\"><span>对</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-331\"><span class=\"mrow\" id=\"MathJax-Span-332\"><span class=\"mo\" id=\"MathJax-Span-333\"><span>换</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-334\"><span class=\"mrow\" id=\"MathJax-Span-335\"><span class=\"mo\" id=\"MathJax-Span-336\"><span>位</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-337\"><span class=\"mrow\" id=\"MathJax-Span-338\"><span class=\"mo\" id=\"MathJax-Span-339\"><span>字</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-340\"><span class=\"mrow\" id=\"MathJax-Span-341\"><span class=\"mo\" id=\"MathJax-Span-342\"><span>符</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-343\"><span class=\"mrow\" id=\"MathJax-Span-344\"><span class=\"mo\" id=\"MathJax-Span-345\"><span>对</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-346\"><span class=\"mrow\" id=\"MathJax-Span-347\"><span class=\"mo\" id=\"MathJax-Span-348\"><span>,</span></span></span></span><span class=\"texatom\" id=\"MathJax-Span-349\"><span class=\"mrow\" id=\"MathJax-Span-350\"><span class=\"mo\" id=\"MathJax-Span-351\"><span>故</span></span></span></span><span class=\"mi\" id=\"MathJax-Span-352\">t<span></span></span><span class=\"mo\" id=\"MathJax-Span-353\">=</span><span class=\"mfrac\" id=\"MathJax-Span-354\"><span><span><span class=\"mrow\" id=\"MathJax-Span-355\"><span class=\"mn\" id=\"MathJax-Span-356\">1</span><span class=\"mo\" id=\"MathJax-Span-357\">+</span><span class=\"mn\" id=\"MathJax-Span-358\">1</span></span><span></span></span><span><span class=\"mn\" id=\"MathJax-Span-359\">2</span><span></span></span><span><span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-360\">=</span><span class=\"mn\" id=\"MathJax-Span-361\">1</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>m</mi><mo>=</mo><mn>6</mn><mo>,</mo><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow><mi>s</mi><mn>1</mn><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow><mo>=</mo><mn>6</mn><mo>,</mo><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow><mi>s</mi><mn>2</mn><mrow class=\"MJX-TeXAtom-ORD\"><mo stretchy=\"false\">|</mo></mrow><mo>=</mo><mn>6</mn><mo>,</mo><mi>T</mi><mrow class=\"MJX-TeXAtom-ORD\"><mo>/</mo></mrow><mi>H</mi><mrow class=\"MJX-TeXAtom-ORD\"><mo>和</mo></mrow><mi>H</mi><mrow class=\"MJX-TeXAtom-ORD\"><mo>/</mo></mrow><mi>T</mi><mrow class=\"MJX-TeXAtom-ORD\"><mo>属</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>于</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>两</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>对</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>换</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>位</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>字</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>符</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>对</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>,</mo></mrow><mrow class=\"MJX-TeXAtom-ORD\"><mo>故</mo></mrow><mi>t</mi><mo>=</mo><mfrac><mrow><mn>1</mn><mo>+</mo><mn>1</mn></mrow><mn>2</mn></mfrac><mo>=</mo><mn>1</mn></math></span></span><span> </span><br><span>代入公式可得:</span><span class=\"MathJax\" id=\"MathJax-Element-18867-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>J</mi><mrow class="MJX-TeXAtom-ORD"><mn>1</mn><mo>,</mo><mn>2</mn></mrow></msub><mo>=</mo><mn>0.944</mn></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-362\"><span><span><span class=\"mrow\" id=\"MathJax-Span-363\"><span class=\"msubsup\" id=\"MathJax-Span-364\"><span><span><span class=\"mi\" id=\"MathJax-Span-365\">J<span></span></span><span></span></span><span><span class=\"texatom\" id=\"MathJax-Span-366\"><span class=\"mrow\" id=\"MathJax-Span-367\"><span class=\"mn\" id=\"MathJax-Span-368\">1</span><span class=\"mo\" id=\"MathJax-Span-369\">,</span><span class=\"mn\" id=\"MathJax-Span-370\">2</span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-371\">=</span><span class=\"mn\" id=\"MathJax-Span-372\">0.944</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>J</mi><mrow class=\"MJX-TeXAtom-ORD\"><mn>1</mn><mo>,</mo><mn>2</mn></mrow></msub><mo>=</mo><mn>0.944</mn></math></span></span><span>。一般定义当J值不大于</span><img src=\"https://img-blog.csdn.net/20161109214449941\" alt=\"这里写图片描述\" title=\"\"><span>时两个字符串被匹配上。 </span><br><span>有了Jaro distance,我们定义J-W距离: </span><br></p><center><br><img src=\"https://img-blog.csdn.net/20161109214551751\" alt=\"这里写图片描述\" title=\"\"> <br></center><p><br><span>其中,</span><span class=\"MathJax\" id=\"MathJax-Element-18868-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mi>j</mi></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-373\"><span><span><span class=\"mrow\" id=\"MathJax-Span-374\"><span class=\"msubsup\" id=\"MathJax-Span-375\"><span><span><span class=\"mi\" id=\"MathJax-Span-376\">d<span></span></span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-377\">j<span></span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mi>j</mi></msub></math></span></span><span>即为Jaro距离;</span><span class=\"MathJax\" id=\"MathJax-Element-18869-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>l</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-378\"><span><span><span class=\"mrow\" id=\"MathJax-Span-379\"><span class=\"mi\" id=\"MathJax-Span-380\">l<span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>l</mi></math></span></span><span>是字符串的起始最大公共前缀,最大不超过4;</span><span class=\"MathJax\" id=\"MathJax-Element-18870-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>p</mi></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-381\"><span><span><span class=\"mrow\" id=\"MathJax-Span-382\"><span class=\"mi\" id=\"MathJax-Span-383\">p</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>p</mi></math></span></span><span>为一个缩放因子,用于对l进行调整,避免</span><span class=\"MathJax\" id=\"MathJax-Element-18871-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mi>w</mi></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-384\"><span><span><span class=\"mrow\" id=\"MathJax-Span-385\"><span class=\"msubsup\" id=\"MathJax-Span-386\"><span><span><span class=\"mi\" id=\"MathJax-Span-387\">d<span></span></span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-388\">w</span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mi>w</mi></msub></math></span></span><span>超出1,一般设为0.1;</span><span class=\"MathJax\" id=\"MathJax-Element-18872-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>b</mi><mi>t</mi></msub></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-389\"><span><span><span class=\"mrow\" id=\"MathJax-Span-390\"><span class=\"msubsup\" id=\"MathJax-Span-391\"><span><span><span class=\"mi\" id=\"MathJax-Span-392\">b</span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-393\">t<span></span></span><span></span></span></span></span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>b</mi><mi>t</mi></msub></math></span></span><span>为boost threshold,当值超过该值时激发Jaro距离为J-W距离,该值一般设为0.7。 </span><br><span>仍以上面的两个字符串为例,</span><span class=\"MathJax\" id=\"MathJax-Element-18873-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mi>j</mi></msub><mo>=</mo><mn>0.944</mn><mo>&gt;</mo><mn>0.7</mn><mo>,</mo><mi>l</mi><mo>=</mo><mn>3</mn><mo>,</mo><mi>p</mi><mo>=</mo><mn>0.1</mn></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-394\"><span><span><span class=\"mrow\" id=\"MathJax-Span-395\"><span class=\"msubsup\" id=\"MathJax-Span-396\"><span><span><span class=\"mi\" id=\"MathJax-Span-397\">d<span></span></span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-398\">j<span></span></span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-399\">=</span><span class=\"mn\" id=\"MathJax-Span-400\">0.944</span><span class=\"mo\" id=\"MathJax-Span-401\">></span><span class=\"mn\" id=\"MathJax-Span-402\">0.7</span><span class=\"mo\" id=\"MathJax-Span-403\">,</span><span class=\"mi\" id=\"MathJax-Span-404\">l<span></span></span><span class=\"mo\" id=\"MathJax-Span-405\">=</span><span class=\"mn\" id=\"MathJax-Span-406\">3</span><span class=\"mo\" id=\"MathJax-Span-407\">,</span><span class=\"mi\" id=\"MathJax-Span-408\">p</span><span class=\"mo\" id=\"MathJax-Span-409\">=</span><span class=\"mn\" id=\"MathJax-Span-410\">0.1</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mi>j</mi></msub><mo>=</mo><mn>0.944</mn><mo>></mo><mn>0.7</mn><mo>,</mo><mi>l</mi><mo>=</mo><mn>3</mn><mo>,</mo><mi>p</mi><mo>=</mo><mn>0.1</mn></math></span></span><span>,代入公式可算出</span><span class=\"MathJax\" id=\"MathJax-Element-18874-Frame\" tabindex=\"0\" data-mathml=\"<math xmlns="http://www.w3.org/1998/Math/MathML"><msub><mi>d</mi><mi>w</mi></msub><mo>=</mo><mn>0.961</mn></math>\" role=\"presentation\"><nobr aria-hidden=\"true\"><span class=\"math\" id=\"MathJax-Span-411\"><span><span><span class=\"mrow\" id=\"MathJax-Span-412\"><span class=\"msubsup\" id=\"MathJax-Span-413\"><span><span><span class=\"mi\" id=\"MathJax-Span-414\">d<span></span></span><span></span></span><span><span class=\"mi\" id=\"MathJax-Span-415\">w</span><span></span></span></span></span><span class=\"mo\" id=\"MathJax-Span-416\">=</span><span class=\"mn\" id=\"MathJax-Span-417\">0.961</span></span><span></span></span></span><span></span></span></nobr><span class=\"MJX_Assistive_MathML\" role=\"presentation\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><msub><mi>d</mi><mi>w</mi></msub><mo>=</mo><mn>0.961</mn></math></span></span><span>。</span></p>',
'2018-05-22 13:45:24',
'2018-05-22 13:45:24',
'字符串相似性的几种度量方法');