From 5a407167fdce7b5efa08a797b79dbaf5ddc992d1 Mon Sep 17 00:00:00 2001 From: Steven Webster Date: Sun, 25 Jul 2021 21:57:49 +1200 Subject: [PATCH] feat: long dash (em-dash) smart spacing Please see test for expected behaviour improvements. --- test/unidecode.mocha.js | 5 +++++ unidecode.js | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/test/unidecode.mocha.js b/test/unidecode.mocha.js index e96b425..7af0e93 100644 --- a/test/unidecode.mocha.js +++ b/test/unidecode.mocha.js @@ -78,6 +78,11 @@ describe('Smart spacing', function() { assert.equal(unidecode('Café 北京, 鞋 size 10½, 33⅓ RPM', { smartSpacing: true }), 'Cafe Bei Jing, Xie size 10 1/2, 33 1/3 RPM'); }); + it('should replace an em-dash straddled by word characters with " - " instead of "--"', function() { + assert.equal( + unidecode("No—I mean yes!", { smartSpacing: true }), "No - I mean yes!"); + }); + it('should handle deferred smart spacing', function() { var str = unidecode('Café 北京, 鞋 size 10½, 33⅓ RPM', { deferredSmartSpacing: true }); assert.ok(/[\x80\x81]/.test(str)); diff --git a/unidecode.js b/unidecode.js index 4e3f712..4a03bdb 100644 --- a/unidecode.js +++ b/unidecode.js @@ -105,8 +105,14 @@ function unidecode_internal_replace(ch) { } function resolveSpacing(str) { - return str.replace(/\x80(?!\w)/g, '').replace(/\x80\x80|(\w)\x80/g, '$1\x81').replace(/\x80/g, '') - .replace(/^\x81+|\x81+$/g, '').replace(/\x81 \x81/g, ' ').replace(/\s?\x81+/g, ' '); + return str + .replace(/\x80(?!\w)/g, "") + .replace(/\x80\x80|(\w)\x80/g, "$1\x81") + .replace(/\x80/g, "") + .replace(/^\x81+|\x81+$/g, "") + .replace(/\x81 \x81/g, " ") + .replace(/\s?\x81+/g, " ") + .replace(/(\w)(--)(\w)/g, (_, p1, _2, p3) => `${p1} - ${p3}`); } module.exports.resolveSpacing = resolveSpacing;