From ce3b01781317fcc6add5a76656954be028b71fd0 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Thu, 5 Mar 2015 16:18:43 -0800
Subject: [PATCH] calculate worst-case size for UTF-8 conversion rather than
 playing the odds. fixes #718

---
 src/tm_utf8.c                    | 6 ++----
 test/issues/issue-runtime-718.js | 3 +++
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 test/issues/issue-runtime-718.js

diff --git a/src/tm_utf8.c b/src/tm_utf8.c
index 9de95e28..8fad553f 100644
--- a/src/tm_utf8.c
+++ b/src/tm_utf8.c
@@ -98,10 +98,8 @@ size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t ** cons
 
 size_t tm_str_from_utf8 (const uint8_t* utf8, size_t utf8_len, const uint8_t ** const dstptr) {
   size_t buf_len = utf8_len;
-  // TODO: increase buf_len to fit actual split pairs (4 bytes become 6) and replaced non-characters (3 bytes per byte in bad sequence)
-  buf_len += utf8_len / 2 + 6;    // HACK: this is just a glorified/dynamic fudge factor
-  // ugh, test/suite/crypto.js does toString on a 4K buffer of random bytes …PLS TO ADD MOAR FUDGERS!!1!
-  buf_len += utf8_len;
+  // TODO: decrease buf_len to only fit actual split pairs (4 bytes become 6) and replaced non-characters (3 bytes per byte in bad sequence)
+  buf_len = utf8_len * 3 + 6;     // each byte could become 3 (replacement character), plus safety margin wanted by assert below
   uint8_t* buf = malloc(buf_len);
   
   size_t buf_pos = 0;
diff --git a/test/issues/issue-runtime-718.js b/test/issues/issue-runtime-718.js
new file mode 100644
index 00000000..54437fed
--- /dev/null
+++ b/test/issues/issue-runtime-718.js
@@ -0,0 +1,3 @@
+var tap = require('../tap');
+tap.count(1);
+tap.eq(Buffer("ffffffffffffffff", 'hex').toString('utf8').length, 8, 'bad UTF-8 conversion works');