Skip to content

Commit

Permalink
Update to ZH normalizer regex (#1453)
Browse files Browse the repository at this point in the history
* Update chinese-reading-normalizer.test.js

3 symbols added:

'
’
-

Signed-off-by: shiki-tm <36088384+shiki-tm@users.noreply.github.com>

* Update chinese.js

3 symbols added:
'’-

Signed-off-by: shiki-tm <36088384+shiki-tm@users.noreply.github.com>

* Update regex pattern for chinese.js

Signed-off-by: shiki-tm <36088384+shiki-tm@users.noreply.github.com>

* revert change

* readd separators, escape quote

---------

Signed-off-by: shiki-tm <36088384+shiki-tm@users.noreply.github.com>
Co-authored-by: Cashew <tungbinem@gmail.com>
  • Loading branch information
shiki-tm and Casheeew authored Oct 13, 2024
1 parent e0d381b commit f25775c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ext/js/language/zh/chinese.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,5 @@ export function isCodePointChinese(codePoint) {

/** @type {import('language').ReadingNormalizer} */
export function normalizePinyin(str) {
return str.normalize('NFC').toLowerCase().replace(/[\s・:]|\/\//g, '');
return str.normalize('NFC').toLowerCase().replace(/[\s・:'’-]|\/\//g, '');
}
3 changes: 3 additions & 0 deletions test/language/chinese-reading-normalizer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ const tests = [
['wán:zhěng', 'wánzhěng'],
['fān・yì', 'fānyì'],
['fān//yì', 'fānyì'],
['fān’yì', 'fānyì'],
['fān\'yì', 'fānyì'],
['fān-yì', 'fānyì'],
];

describe('Normalize Pinyin', () => {
Expand Down

0 comments on commit f25775c

Please sign in to comment.