@@ -26,6 +26,184 @@ const SUBCATEGORIES_FLATTENED = ['American Literature', 'British Literature', 'C
26
26
const SUBCATEGORIES_FLATTENED_ALL = [ 'Long Fiction' , 'Short Fiction' , 'Poetry' , 'Drama' , 'American Literature' , 'British Literature' , 'Classical Literature' , 'European Literature' , 'World Literature' , 'Other Literature' , 'American History' , 'Ancient History' , 'European History' , 'World History' , 'Other History' , 'Biology' , 'Chemistry' , 'Physics' , 'Math' , 'Other Science' , 'Visual Fine Arts' , 'Auditory Fine Arts' , 'Other Fine Arts' , 'Religion' , 'Mythology' , 'Philosophy' , 'Social Science' , 'Current Events' , 'Geography' , 'Other Academic' , 'Trash' ] ;
27
27
const METAWORDS = [ 'the' , 'like' , 'descriptions' , 'description' , 'of' , 'do' , 'not' , 'as' , 'accept' , 'or' , 'other' , 'prompt' , 'on' , 'except' , 'before' , 'after' , 'is' , 'read' , 'stated' , 'mentioned' , 'at' , 'any' , 'don\'t' , 'more' , 'specific' , 'etc' , 'eg' , 'answers' , 'word' , 'forms' ] ;
28
28
29
+ /**
30
+ * Implements the Porter Stemming Algorithm.
31
+ * Source: https://tartarus.org/martin/PorterStemmer/js.txt
32
+ */
33
+ const stemmer = ( ( ) => {
34
+ const step2list = {
35
+ 'ational' : 'ate' ,
36
+ 'tional' : 'tion' ,
37
+ 'enci' : 'ence' ,
38
+ 'anci' : 'ance' ,
39
+ 'izer' : 'ize' ,
40
+ 'bli' : 'ble' ,
41
+ 'alli' : 'al' ,
42
+ 'entli' : 'ent' ,
43
+ 'eli' : 'e' ,
44
+ 'ousli' : 'ous' ,
45
+ 'ization' : 'ize' ,
46
+ 'ation' : 'ate' ,
47
+ 'ator' : 'ate' ,
48
+ 'alism' : 'al' ,
49
+ 'iveness' : 'ive' ,
50
+ 'fulness' : 'ful' ,
51
+ 'ousness' : 'ous' ,
52
+ 'aliti' : 'al' ,
53
+ 'iviti' : 'ive' ,
54
+ 'biliti' : 'ble' ,
55
+ 'logi' : 'log'
56
+ } ,
57
+
58
+ step3list = {
59
+ 'icate' : 'ic' ,
60
+ 'ative' : '' ,
61
+ 'alize' : 'al' ,
62
+ 'iciti' : 'ic' ,
63
+ 'ical' : 'ic' ,
64
+ 'ful' : '' ,
65
+ 'ness' : ''
66
+ } ,
67
+
68
+ c = '[^aeiou]' , // consonant
69
+ v = '[aeiouy]' , // vowel
70
+ C = c + '[^aeiouy]*' , // consonant sequence
71
+ V = v + '[aeiou]*' , // vowel sequence
72
+
73
+ mgr0 = '^(' + C + ')?' + V + C , // [C]VC... is m>0
74
+ meq1 = '^(' + C + ')?' + V + C + '(' + V + ')?$' , // [C]VC[V] is m=1
75
+ mgr1 = '^(' + C + ')?' + V + C + V + C , // [C]VCVC... is m>1
76
+ s_v = '^(' + C + ')?' + v ; // vowel in stem
77
+
78
+ return function ( w ) {
79
+ let stem ,
80
+ suffix ,
81
+ re ,
82
+ re2 ,
83
+ re3 ,
84
+ re4 ;
85
+
86
+ if ( w . length < 3 ) { return w ; }
87
+
88
+ const firstch = w . substr ( 0 , 1 ) ;
89
+ if ( firstch == 'y' ) {
90
+ w = firstch . toUpperCase ( ) + w . substr ( 1 ) ;
91
+ }
92
+
93
+ // Step 1a
94
+ re = / ^ ( .+ ?) ( s s | i ) e s $ / ;
95
+ re2 = / ^ ( .+ ?) ( [ ^ s ] ) s $ / ;
96
+
97
+ if ( re . test ( w ) ) { w = w . replace ( re , '$1$2' ) ; }
98
+ else if ( re2 . test ( w ) ) { w = w . replace ( re2 , '$1$2' ) ; }
99
+
100
+ // Step 1b
101
+ re = / ^ ( .+ ?) e e d $ / ;
102
+ re2 = / ^ ( .+ ?) ( e d | i n g ) $ / ;
103
+ if ( re . test ( w ) ) {
104
+ const fp = re . exec ( w ) ;
105
+ re = new RegExp ( mgr0 ) ;
106
+ if ( re . test ( fp [ 1 ] ) ) {
107
+ re = / .$ / ;
108
+ w = w . replace ( re , '' ) ;
109
+ }
110
+ } else if ( re2 . test ( w ) ) {
111
+ const fp = re2 . exec ( w ) ;
112
+ stem = fp [ 1 ] ;
113
+ re2 = new RegExp ( s_v ) ;
114
+ if ( re2 . test ( stem ) ) {
115
+ w = stem ;
116
+ re2 = / ( a t | b l | i z ) $ / ;
117
+ re3 = new RegExp ( '([^aeiouylsz])\\1$' ) ;
118
+ re4 = new RegExp ( '^' + C + v + '[^aeiouwxy]$' ) ;
119
+ if ( re2 . test ( w ) ) { w = w + 'e' ; }
120
+ else if ( re3 . test ( w ) ) { re = / .$ / ; w = w . replace ( re , '' ) ; }
121
+ else if ( re4 . test ( w ) ) { w = w + 'e' ; }
122
+ }
123
+ }
124
+
125
+ // Step 1c
126
+ re = / ^ ( .+ ?) y $ / ;
127
+ if ( re . test ( w ) ) {
128
+ const fp = re . exec ( w ) ;
129
+ stem = fp [ 1 ] ;
130
+ re = new RegExp ( s_v ) ;
131
+ if ( re . test ( stem ) ) { w = stem + 'i' ; }
132
+ }
133
+
134
+ // Step 2
135
+ re = / ^ ( .+ ?) ( a t i o n a l | t i o n a l | e n c i | a n c i | i z e r | b l i | a l l i | e n t l i | e l i | o u s l i | i z a t i o n | a t i o n | a t o r | a l i s m | i v e n e s s | f u l n e s s | o u s n e s s | a l i t i | i v i t i | b i l i t i | l o g i ) $ / ;
136
+ if ( re . test ( w ) ) {
137
+ const fp = re . exec ( w ) ;
138
+ stem = fp [ 1 ] ;
139
+ suffix = fp [ 2 ] ;
140
+ re = new RegExp ( mgr0 ) ;
141
+ if ( re . test ( stem ) ) {
142
+ w = stem + step2list [ suffix ] ;
143
+ }
144
+ }
145
+
146
+ // Step 3
147
+ re = / ^ ( .+ ?) ( i c a t e | a t i v e | a l i z e | i c i t i | i c a l | f u l | n e s s ) $ / ;
148
+ if ( re . test ( w ) ) {
149
+ const fp = re . exec ( w ) ;
150
+ stem = fp [ 1 ] ;
151
+ suffix = fp [ 2 ] ;
152
+ re = new RegExp ( mgr0 ) ;
153
+ if ( re . test ( stem ) ) {
154
+ w = stem + step3list [ suffix ] ;
155
+ }
156
+ }
157
+
158
+ // Step 4
159
+ re = / ^ ( .+ ?) ( a l | a n c e | e n c e | e r | i c | a b l e | i b l e | a n t | e m e n t | m e n t | e n t | o u | i s m | a t e | i t i | o u s | i v e | i z e ) $ / ;
160
+ re2 = / ^ ( .+ ?) ( s | t ) ( i o n ) $ / ;
161
+ if ( re . test ( w ) ) {
162
+ const fp = re . exec ( w ) ;
163
+ stem = fp [ 1 ] ;
164
+ re = new RegExp ( mgr1 ) ;
165
+ if ( re . test ( stem ) ) {
166
+ w = stem ;
167
+ }
168
+ } else if ( re2 . test ( w ) ) {
169
+ const fp = re2 . exec ( w ) ;
170
+ stem = fp [ 1 ] + fp [ 2 ] ;
171
+ re2 = new RegExp ( mgr1 ) ;
172
+ if ( re2 . test ( stem ) ) {
173
+ w = stem ;
174
+ }
175
+ }
176
+
177
+ // Step 5
178
+ re = / ^ ( .+ ?) e $ / ;
179
+ if ( re . test ( w ) ) {
180
+ const fp = re . exec ( w ) ;
181
+ stem = fp [ 1 ] ;
182
+ re = new RegExp ( mgr1 ) ;
183
+ re2 = new RegExp ( meq1 ) ;
184
+ re3 = new RegExp ( '^' + C + v + '[^aeiouwxy]$' ) ;
185
+ if ( re . test ( stem ) || ( re2 . test ( stem ) && ! ( re3 . test ( stem ) ) ) ) {
186
+ w = stem ;
187
+ }
188
+ }
189
+
190
+ re = / l l $ / ;
191
+ re2 = new RegExp ( mgr1 ) ;
192
+ if ( re . test ( w ) && re2 . test ( w ) ) {
193
+ re = / .$ / ;
194
+ w = w . replace ( re , '' ) ;
195
+ }
196
+
197
+ // and turn initial Y back to y
198
+
199
+ if ( firstch == 'y' ) {
200
+ w = firstch . toLowerCase ( ) + w . substr ( 1 ) ;
201
+ }
202
+
203
+ return w ;
204
+ } ;
205
+ } ) ( ) ;
206
+
29
207
30
208
function parseAnswerline ( answerline ) {
31
209
const removeAllParentheses = ( string ) => {
@@ -34,6 +212,10 @@ function parseAnswerline(answerline) {
34
212
return string ;
35
213
} ;
36
214
215
+ const removeHTMLTags = ( string ) => {
216
+ return string . replace ( / < [ ^ > ] * > / g, '' ) ;
217
+ } ;
218
+
37
219
const removeItalics = ( string ) => {
38
220
string = string . replace ( / < i > / g, '' ) ;
39
221
string = string . replace ( / < \/ i > / g, '' ) ;
@@ -108,6 +290,16 @@ function parseAnswerline(answerline) {
108
290
. trim ( ) ;
109
291
} ;
110
292
293
+ const getAbbreviation = ( string ) => {
294
+ return string
295
+ . split ( ' ' )
296
+ . filter ( token => token . length > 0 )
297
+ . map ( token => removeHTMLTags ( token ) )
298
+ . map ( token => token . charAt ( 0 ) )
299
+ . reduce ( ( a , b ) => a + b , '' )
300
+ . trim ( ) ;
301
+ } ;
302
+
111
303
answerline = removeItalics ( answerline ) ;
112
304
113
305
const { mainAnswer, subAnswer } = splitMainAnswer ( answerline ) ;
@@ -118,6 +310,8 @@ function parseAnswerline(answerline) {
118
310
reject : [ ]
119
311
} ;
120
312
313
+ parsedAnswerline . accept . push ( [ getAbbreviation ( mainAnswer ) , '' , '' ] ) ;
314
+
121
315
if ( mainAnswer . includes ( ' or ' ) ) {
122
316
const parts = mainAnswer . split ( ' or ' ) ;
123
317
parsedAnswerline . accept . push ( [ extractUnderlining ( parts [ 0 ] ) , extractKeyWords ( parts [ 0 ] ) , extractQuotes ( parts [ 0 ] ) ] ) ;
@@ -148,7 +342,7 @@ function parseAnswerline(answerline) {
148
342
* @param {Number } strictness - the number of characters per error allowed for two tokens to match.
149
343
* @returns {Boolean }
150
344
*/
151
- function stringMatchesReference ( string , reference , strictness = 5 ) {
345
+ function stringMatchesReference ( { string, reference, strictness = 5 , acceptSubstring = false } ) {
152
346
if ( string === null || string === undefined || reference === null || reference === undefined ) {
153
347
return false ;
154
348
}
@@ -167,14 +361,6 @@ function stringMatchesReference(string, reference, strictness = 5) {
167
361
return string ;
168
362
} ;
169
363
170
- const stemmer = ( string ) => {
171
- if ( string . charAt ( string . length - 1 ) === 's' ) {
172
- return string . substring ( 0 , string . length - 1 ) ;
173
- } else {
174
- return string ;
175
- }
176
- } ;
177
-
178
364
string = removePunctuation ( string ) ;
179
365
string = replaceSpecialCharacters ( string ) ;
180
366
string = string . toLowerCase ( ) . trim ( ) ;
@@ -228,7 +414,7 @@ function stringMatchesReference(string, reference, strictness = 5) {
228
414
const errors = dljs . distance ( stemmer ( stringTokens [ i ] ) , stemmer ( referenceTokens [ j ] ) ) ;
229
415
230
416
// console.log(stringTokens[i], referenceTokens[j]);
231
- if ( strictness * errors <= referenceTokens [ j ] . length || referenceTokens [ j ] . includes ( stringTokens [ i ] ) ) {
417
+ if ( strictness * errors <= referenceTokens [ j ] . length || ( acceptSubstring && referenceTokens [ j ] . includes ( stringTokens [ i ] ) ) ) {
232
418
tokenMatches = true ;
233
419
break ;
234
420
} else {
@@ -268,9 +454,9 @@ function scoreTossup(answerline, givenAnswer, inPower, endOfQuestion) {
268
454
function checkAnswer ( answerline , givenAnswer ) {
269
455
const answerWorks = ( answerline , givenAnswer , isFormattedAnswerline ) => {
270
456
if ( isFormattedAnswerline ) {
271
- return stringMatchesReference ( answerline , givenAnswer ) ;
457
+ return stringMatchesReference ( { string : answerline , reference : givenAnswer } ) ;
272
458
} else {
273
- return stringMatchesReference ( givenAnswer , answerline ) ;
459
+ return stringMatchesReference ( { string : givenAnswer , reference : answerline , acceptSubstring : true } ) ;
274
460
}
275
461
} ;
276
462
@@ -282,9 +468,15 @@ function checkAnswer(answerline, givenAnswer) {
282
468
}
283
469
284
470
for ( const answer of parsedAnswerline [ 'reject' ] ) {
285
- if ( stringMatchesReference ( answer [ 2 ] , givenAnswer , 11 ) && stringMatchesReference ( givenAnswer , answer [ 2 ] , 11 ) ) {
286
- return 'reject' ;
471
+ if ( ! stringMatchesReference ( { string : answer [ 2 ] , reference : givenAnswer , strictness : 11 } ) ) {
472
+ continue ;
473
+ }
474
+
475
+ if ( ! stringMatchesReference ( { string : givenAnswer , reference : answer [ 2 ] , strictness : 11 } ) ) {
476
+ continue ;
287
477
}
478
+
479
+ return 'reject' ;
288
480
}
289
481
290
482
if ( answerline . includes ( '[accept either' ) || answerline . includes ( '(accept either' ) ) {
0 commit comments