From ce3b01781317fcc6add5a76656954be028b71fd0 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Thu, 5 Mar 2015 16:18:43 -0800 Subject: [PATCH] calculate worst-case size for UTF-8 conversion rather than playing the odds. fixes #718 --- src/tm_utf8.c | 6 ++---- test/issues/issue-runtime-718.js | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 test/issues/issue-runtime-718.js diff --git a/src/tm_utf8.c b/src/tm_utf8.c index 9de95e28..8fad553f 100644 --- a/src/tm_utf8.c +++ b/src/tm_utf8.c @@ -98,10 +98,8 @@ size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t ** cons size_t tm_str_from_utf8 (const uint8_t* utf8, size_t utf8_len, const uint8_t ** const dstptr) { size_t buf_len = utf8_len; - // TODO: increase buf_len to fit actual split pairs (4 bytes become 6) and replaced non-characters (3 bytes per byte in bad sequence) - buf_len += utf8_len / 2 + 6; // HACK: this is just a glorified/dynamic fudge factor - // ugh, test/suite/crypto.js does toString on a 4K buffer of random bytes …PLS TO ADD MOAR FUDGERS!!1! - buf_len += utf8_len; + // TODO: decrease buf_len to only fit actual split pairs (4 bytes become 6) and replaced non-characters (3 bytes per byte in bad sequence) + buf_len = utf8_len * 3 + 6; // each byte could become 3 (replacement character), plus safety margin wanted by assert below uint8_t* buf = malloc(buf_len); size_t buf_pos = 0; diff --git a/test/issues/issue-runtime-718.js b/test/issues/issue-runtime-718.js new file mode 100644 index 00000000..54437fed --- /dev/null +++ b/test/issues/issue-runtime-718.js @@ -0,0 +1,3 @@ +var tap = require('../tap'); +tap.count(1); +tap.eq(Buffer("ffffffffffffffff", 'hex').toString('utf8').length, 8, 'bad UTF-8 conversion works');