Skip to content

Commit

Permalink
Merge pull request #1 from budokans/smart-spacing-dashes
Browse files Browse the repository at this point in the history
feat: long dash (em-dash) smart spacing
  • Loading branch information
kshetline authored Aug 25, 2021
2 parents ba51a23 + 5a40716 commit 89a403e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
5 changes: 5 additions & 0 deletions test/unidecode.mocha.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ describe('Smart spacing', function() {
assert.equal(unidecode('Café 北京, 鞋 size 10½, 33⅓ RPM', { smartSpacing: true }), 'Cafe Bei Jing, Xie size 10 1/2, 33 1/3 RPM');
});

it('should replace an em-dash straddled by word characters with " - " instead of "--"', function() {
assert.equal(
unidecode("No—I mean yes!", { smartSpacing: true }), "No - I mean yes!");
});

it('should handle deferred smart spacing', function() {
var str = unidecode('Café 北京, 鞋 size 10½, 33⅓ RPM', { deferredSmartSpacing: true });
assert.ok(/[\x80\x81]/.test(str));
Expand Down
10 changes: 8 additions & 2 deletions unidecode.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,14 @@ function unidecode_internal_replace(ch) {
}

function resolveSpacing(str) {
return str.replace(/\x80(?!\w)/g, '').replace(/\x80\x80|(\w)\x80/g, '$1\x81').replace(/\x80/g, '')
.replace(/^\x81+|\x81+$/g, '').replace(/\x81 \x81/g, ' ').replace(/\s?\x81+/g, ' ');
return str
.replace(/\x80(?!\w)/g, "")
.replace(/\x80\x80|(\w)\x80/g, "$1\x81")
.replace(/\x80/g, "")
.replace(/^\x81+|\x81+$/g, "")
.replace(/\x81 \x81/g, " ")
.replace(/\s?\x81+/g, " ")
.replace(/(\w)(--)(\w)/g, (_, p1, _2, p3) => `${p1} - ${p3}`);
}

module.exports.resolveSpacing = resolveSpacing;

0 comments on commit 89a403e

Please sign in to comment.