Skip to content

Commit 959cbf3

Browse files
committed
close #88 and accept abbreviations
1 parent 28443ca commit 959cbf3

File tree

3 files changed

+215
-17
lines changed

3 files changed

+215
-17
lines changed

package-lock.json

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/quizbowl.js

Lines changed: 206 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,184 @@ const SUBCATEGORIES_FLATTENED = ['American Literature', 'British Literature', 'C
2626
const SUBCATEGORIES_FLATTENED_ALL = ['Long Fiction', 'Short Fiction', 'Poetry', 'Drama', 'American Literature', 'British Literature', 'Classical Literature', 'European Literature', 'World Literature', 'Other Literature', 'American History', 'Ancient History', 'European History', 'World History', 'Other History', 'Biology', 'Chemistry', 'Physics', 'Math', 'Other Science', 'Visual Fine Arts', 'Auditory Fine Arts', 'Other Fine Arts', 'Religion', 'Mythology', 'Philosophy', 'Social Science', 'Current Events', 'Geography', 'Other Academic', 'Trash'];
2727
const METAWORDS = ['the', 'like', 'descriptions', 'description', 'of', 'do', 'not', 'as', 'accept', 'or', 'other', 'prompt', 'on', 'except', 'before', 'after', 'is', 'read', 'stated', 'mentioned', 'at', 'any', 'don\'t', 'more', 'specific', 'etc', 'eg', 'answers', 'word', 'forms'];
2828

29+
/**
30+
* Implements the Porter Stemming Algorithm.
31+
* Source: https://tartarus.org/martin/PorterStemmer/js.txt
32+
*/
33+
const stemmer = (() => {
34+
const step2list = {
35+
'ational' : 'ate',
36+
'tional' : 'tion',
37+
'enci' : 'ence',
38+
'anci' : 'ance',
39+
'izer' : 'ize',
40+
'bli' : 'ble',
41+
'alli' : 'al',
42+
'entli' : 'ent',
43+
'eli' : 'e',
44+
'ousli' : 'ous',
45+
'ization' : 'ize',
46+
'ation' : 'ate',
47+
'ator' : 'ate',
48+
'alism' : 'al',
49+
'iveness' : 'ive',
50+
'fulness' : 'ful',
51+
'ousness' : 'ous',
52+
'aliti' : 'al',
53+
'iviti' : 'ive',
54+
'biliti' : 'ble',
55+
'logi' : 'log'
56+
},
57+
58+
step3list = {
59+
'icate' : 'ic',
60+
'ative' : '',
61+
'alize' : 'al',
62+
'iciti' : 'ic',
63+
'ical' : 'ic',
64+
'ful' : '',
65+
'ness' : ''
66+
},
67+
68+
c = '[^aeiou]', // consonant
69+
v = '[aeiouy]', // vowel
70+
C = c + '[^aeiouy]*', // consonant sequence
71+
V = v + '[aeiou]*', // vowel sequence
72+
73+
mgr0 = '^(' + C + ')?' + V + C, // [C]VC... is m>0
74+
meq1 = '^(' + C + ')?' + V + C + '(' + V + ')?$', // [C]VC[V] is m=1
75+
mgr1 = '^(' + C + ')?' + V + C + V + C, // [C]VCVC... is m>1
76+
s_v = '^(' + C + ')?' + v; // vowel in stem
77+
78+
return function (w) {
79+
let stem,
80+
suffix,
81+
re,
82+
re2,
83+
re3,
84+
re4;
85+
86+
if (w.length < 3) { return w; }
87+
88+
const firstch = w.substr(0,1);
89+
if (firstch == 'y') {
90+
w = firstch.toUpperCase() + w.substr(1);
91+
}
92+
93+
// Step 1a
94+
re = /^(.+?)(ss|i)es$/;
95+
re2 = /^(.+?)([^s])s$/;
96+
97+
if (re.test(w)) { w = w.replace(re,'$1$2'); }
98+
else if (re2.test(w)) { w = w.replace(re2,'$1$2'); }
99+
100+
// Step 1b
101+
re = /^(.+?)eed$/;
102+
re2 = /^(.+?)(ed|ing)$/;
103+
if (re.test(w)) {
104+
const fp = re.exec(w);
105+
re = new RegExp(mgr0);
106+
if (re.test(fp[1])) {
107+
re = /.$/;
108+
w = w.replace(re,'');
109+
}
110+
} else if (re2.test(w)) {
111+
const fp = re2.exec(w);
112+
stem = fp[1];
113+
re2 = new RegExp(s_v);
114+
if (re2.test(stem)) {
115+
w = stem;
116+
re2 = /(at|bl|iz)$/;
117+
re3 = new RegExp('([^aeiouylsz])\\1$');
118+
re4 = new RegExp('^' + C + v + '[^aeiouwxy]$');
119+
if (re2.test(w)) { w = w + 'e'; }
120+
else if (re3.test(w)) { re = /.$/; w = w.replace(re,''); }
121+
else if (re4.test(w)) { w = w + 'e'; }
122+
}
123+
}
124+
125+
// Step 1c
126+
re = /^(.+?)y$/;
127+
if (re.test(w)) {
128+
const fp = re.exec(w);
129+
stem = fp[1];
130+
re = new RegExp(s_v);
131+
if (re.test(stem)) { w = stem + 'i'; }
132+
}
133+
134+
// Step 2
135+
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
136+
if (re.test(w)) {
137+
const fp = re.exec(w);
138+
stem = fp[1];
139+
suffix = fp[2];
140+
re = new RegExp(mgr0);
141+
if (re.test(stem)) {
142+
w = stem + step2list[suffix];
143+
}
144+
}
145+
146+
// Step 3
147+
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
148+
if (re.test(w)) {
149+
const fp = re.exec(w);
150+
stem = fp[1];
151+
suffix = fp[2];
152+
re = new RegExp(mgr0);
153+
if (re.test(stem)) {
154+
w = stem + step3list[suffix];
155+
}
156+
}
157+
158+
// Step 4
159+
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
160+
re2 = /^(.+?)(s|t)(ion)$/;
161+
if (re.test(w)) {
162+
const fp = re.exec(w);
163+
stem = fp[1];
164+
re = new RegExp(mgr1);
165+
if (re.test(stem)) {
166+
w = stem;
167+
}
168+
} else if (re2.test(w)) {
169+
const fp = re2.exec(w);
170+
stem = fp[1] + fp[2];
171+
re2 = new RegExp(mgr1);
172+
if (re2.test(stem)) {
173+
w = stem;
174+
}
175+
}
176+
177+
// Step 5
178+
re = /^(.+?)e$/;
179+
if (re.test(w)) {
180+
const fp = re.exec(w);
181+
stem = fp[1];
182+
re = new RegExp(mgr1);
183+
re2 = new RegExp(meq1);
184+
re3 = new RegExp('^' + C + v + '[^aeiouwxy]$');
185+
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
186+
w = stem;
187+
}
188+
}
189+
190+
re = /ll$/;
191+
re2 = new RegExp(mgr1);
192+
if (re.test(w) && re2.test(w)) {
193+
re = /.$/;
194+
w = w.replace(re,'');
195+
}
196+
197+
// and turn initial Y back to y
198+
199+
if (firstch == 'y') {
200+
w = firstch.toLowerCase() + w.substr(1);
201+
}
202+
203+
return w;
204+
};
205+
})();
206+
29207

30208
function parseAnswerline(answerline) {
31209
const removeAllParentheses = (string) => {
@@ -34,6 +212,10 @@ function parseAnswerline(answerline) {
34212
return string;
35213
};
36214

215+
const removeHTMLTags = (string) => {
216+
return string.replace(/<[^>]*>/g, '');
217+
};
218+
37219
const removeItalics = (string) => {
38220
string = string.replace(/<i>/g, '');
39221
string = string.replace(/<\/i>/g, '');
@@ -108,6 +290,16 @@ function parseAnswerline(answerline) {
108290
.trim();
109291
};
110292

293+
const getAbbreviation = (string) => {
294+
return string
295+
.split(' ')
296+
.filter(token => token.length > 0)
297+
.map(token => removeHTMLTags(token))
298+
.map(token => token.charAt(0))
299+
.reduce((a, b) => a + b, '')
300+
.trim();
301+
};
302+
111303
answerline = removeItalics(answerline);
112304

113305
const { mainAnswer, subAnswer } = splitMainAnswer(answerline);
@@ -118,6 +310,8 @@ function parseAnswerline(answerline) {
118310
reject: []
119311
};
120312

313+
parsedAnswerline.accept.push([getAbbreviation(mainAnswer), '', '']);
314+
121315
if (mainAnswer.includes(' or ')) {
122316
const parts = mainAnswer.split(' or ');
123317
parsedAnswerline.accept.push([extractUnderlining(parts[0]), extractKeyWords(parts[0]), extractQuotes(parts[0])]);
@@ -148,7 +342,7 @@ function parseAnswerline(answerline) {
148342
* @param {Number} strictness - the number of characters per error allowed for two tokens to match.
149343
* @returns {Boolean}
150344
*/
151-
function stringMatchesReference(string, reference, strictness = 5) {
345+
function stringMatchesReference({ string, reference, strictness = 5, acceptSubstring = false }) {
152346
if (string === null || string === undefined || reference === null || reference === undefined) {
153347
return false;
154348
}
@@ -167,14 +361,6 @@ function stringMatchesReference(string, reference, strictness = 5) {
167361
return string;
168362
};
169363

170-
const stemmer = (string) => {
171-
if (string.charAt(string.length - 1) === 's') {
172-
return string.substring(0, string.length - 1);
173-
} else {
174-
return string;
175-
}
176-
};
177-
178364
string = removePunctuation(string);
179365
string = replaceSpecialCharacters(string);
180366
string = string.toLowerCase().trim();
@@ -228,7 +414,7 @@ function stringMatchesReference(string, reference, strictness = 5) {
228414
const errors = dljs.distance(stemmer(stringTokens[i]), stemmer(referenceTokens[j]));
229415

230416
// console.log(stringTokens[i], referenceTokens[j]);
231-
if (strictness * errors <= referenceTokens[j].length || referenceTokens[j].includes(stringTokens[i])) {
417+
if (strictness * errors <= referenceTokens[j].length || (acceptSubstring && referenceTokens[j].includes(stringTokens[i]))) {
232418
tokenMatches = true;
233419
break;
234420
} else {
@@ -268,9 +454,9 @@ function scoreTossup(answerline, givenAnswer, inPower, endOfQuestion) {
268454
function checkAnswer(answerline, givenAnswer) {
269455
const answerWorks = (answerline, givenAnswer, isFormattedAnswerline) => {
270456
if (isFormattedAnswerline) {
271-
return stringMatchesReference(answerline, givenAnswer);
457+
return stringMatchesReference({ string: answerline, reference: givenAnswer });
272458
} else {
273-
return stringMatchesReference(givenAnswer, answerline);
459+
return stringMatchesReference({ string: givenAnswer, reference: answerline, acceptSubstring: true });
274460
}
275461
};
276462

@@ -282,9 +468,15 @@ function checkAnswer(answerline, givenAnswer) {
282468
}
283469

284470
for (const answer of parsedAnswerline['reject']) {
285-
if (stringMatchesReference(answer[2], givenAnswer, 11) && stringMatchesReference(givenAnswer, answer[2], 11)) {
286-
return 'reject';
471+
if (!stringMatchesReference({ string: answer[2], reference: givenAnswer, strictness: 11 })) {
472+
continue;
473+
}
474+
475+
if (!stringMatchesReference({ string: givenAnswer, reference: answer[2], strictness: 11 })) {
476+
continue;
287477
}
478+
479+
return 'reject';
288480
}
289481

290482
if (answerline.includes('[accept either') || answerline.includes('(accept either')) {

tests/quizbowl.test.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
{
22
"formatted": [
3+
{
4+
"answerline": "<b><u>calcium</u></b> [or <b><u>Ca</u></b>; accept <b><u>calcination</u></b> or <b><u>calciner</u></b>]",
5+
"tests": [
6+
{ "directive": "reject", "given": "carbon" }
7+
]
8+
},
39
{
410
"answerline": "<b><u>binary fission</u></b> [accept <b><u>mitochondrial fission</u></b> until “Drp1” is read, but prompt after; prompt on <u>fission</u>, <u>fission yeast</u>, <u>cytokinesis</u>, or <u>cell division</u>; do not accept or prompt on “mitosis” or “meiosis” or “multiple fission”]",
511
"tests": [

0 commit comments

Comments
 (0)