From 192f626deae3ac849f6f4418224f082fa21fef69 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Fri, 14 Nov 2014 18:00:20 -0800
Subject: [PATCH 01/13] big pass over buffer input/output encoding handling,
 should be mostly there but failing some tests

---
 config/libtm.gyp               |   4 +-
 src/colony/lua/colony-node.lua | 108 ++++++++++++++++-----------------
 src/colony/lua_tm.c            |  87 +++++++++++++++++++++++---
 src/tm.h                       |  11 ++++
 src/tm_utf16.c                 |  47 ++++++++++++++
 src/tm_utf7.c                  |  34 +++++++++++
 test/suite/buffer.js           |  11 +++-
 7 files changed, 237 insertions(+), 65 deletions(-)
 create mode 100644 src/tm_utf16.c
 create mode 100644 src/tm_utf7.c

diff --git a/config/libtm.gyp b/config/libtm.gyp
index eae4061c..523b963a 100644
--- a/config/libtm.gyp
+++ b/config/libtm.gyp
@@ -502,7 +502,9 @@
         '<(runtime_path)/tm_random.c',
         '<(runtime_path)/tm_deflate.c',
         '<(runtime_path)/tm_str.c',
-        '<(runtime_path)/tm_utf8.c'
+        '<(runtime_path)/tm_utf7.c',
+        '<(runtime_path)/tm_utf8.c',
+        '<(runtime_path)/tm_utf16.c',
       ],
       "include_dirs": [
         '<(runtime_path)/',
diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua
index 1274d9a4..00636520 100644
--- a/src/colony/lua/colony-node.lua
+++ b/src/colony/lua/colony-node.lua
@@ -298,22 +298,17 @@ local buffer_proto = js_obj({
     if encoding == nil then
       encoding = 'utf8'
     end
-    encoding = string.lower(encoding);
+    encoding = string.lower(encoding)
     
-    local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset);
-
+    local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset)
     if encoding == 'binary' then
-      return string.gsub(buf, '[\128-\255]', function (c)
-        -- original value must be converted to internal encoding
-        return global.String.fromCharCode(nil, string.byte(c))
-      end)
+      return tm.str_from_binary(buf);
     elseif encoding == 'ascii' then
-      -- simply strips high bit from original value
-      return string.gsub(buf, '[\128-\255]', function (c)
-        return string.char(string.byte(c) - 128)
-      end)
+      return tm.str_from_ascii(buf);
     elseif encoding == 'utf8' or encoding == 'utf-8' then
       return tm.str_from_utf8(buf);
+    elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
+      return tm.str_from_utf16le(buf)
     elseif encoding == 'base64' then
       return to_base64(buf);
     elseif encoding == 'hex' then
@@ -321,9 +316,6 @@ local buffer_proto = js_obj({
         return string.format('%02x', string.byte(c));
       end)
       return str;
-    elseif  encoding == 'ucs2' or encoding == 'ucs-2' 
-      or encoding == 'utf16le' or encoding == 'utf-16le' then
-      return error(js_new(global.NotImplementedError, 'Encoding not implemented yet: ' + encoding));
     else
       error(js_new(global.TypeError, 'Unknown encoding: ' + encoding));
     end
@@ -493,56 +485,62 @@ function _of_buffer (this, buf, length)
 end
 
 local function Buffer (this, arg, encoding)
-  -- args
-  local str, length = '', 0
-  if type(arg) == 'number' then
-    length = tonumber(arg)
-  elseif type(arg) == 'string' then
-    str = arg
-    length = #arg
-  else
-    str = arg or ''
-    length = arg and arg.length or 0
+  if encoding == nil then
+    encoding = 'utf8'
   end
-
-  -- encoding first check
-  if type(str) == 'string' and encoding == 'base64' then
-    -- "base64" string
-    str = from_base64(str)
-    length = string.len(str)
-  elseif type(str) == 'string' and encoding == 'hex' then
-    if string.len(str) % 2 ~= 0 then
+  encoding = string.lower(encoding)
+  
+  local raw, arr, hex, size
+  if type(arg) == 'number' then
+    size = arg
+  elseif type(arg) ~= 'string' then
+    -- assume an array
+    arr = arg
+  elseif encoding == 'binary' then
+    raw = tm.str_to_binary(arg)
+  elseif encoding == 'ascii' then
+    raw = tm.str_to_ascii(arg)
+  elseif encoding == 'utf8' or encoding == 'utf-8' then
+    raw = tm.str_to_utf8(arg)
+  elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
+    raw = tm.str_to_utf16le(arg)
+  elseif encoding == 'base64' then
+    raw = from_base64(arg)
+  elseif encoding == 'hex' then
+    if string.len(arg) % 2 ~= 0 then
       error(js_new(global.TypeError, 'Invalid hex string.'))
     end
-    -- Remove first occurrance of invalid char until end of string
-    str = string.lower(string.gsub(str, '[^a-fA-F0-9].*', ''))
-    length = string.len(str) / 2
+    hex = string.lower(string.gsub(arg, '[^a-fA-F0-9].*', ''))
+  else
+    error(js_new(global.TypeError, 'Unknown encoding: ' + encoding));
+  end
+  
+  if type(size) == 'number' then
+    -- all set
+  elseif arr then
+    size = arr.length
+  elseif hex then
+    size = #hex / 2
+  else
+    size = #raw
   end
-
+  
   this = {}
-  local buf = tm.buffer_create(length)
-  _of_buffer(this, buf, length)
-
-  -- Lua internally uses a "binary" encoding, that is,
-  -- operates on (1-indexable) 8-bit values.
-
-  if type(str) == 'string' and encoding == 'hex' then
-    -- "hex" string
-    for i = 1, #str, 2 do
-      this[(i - 1)/2] = tonumber(string.sub(str, i, i+1), 16)
+  local buf = tm.buffer_create(size)
+  _of_buffer(this, buf, size)
+  if arr then
+    for i = 1, size do
+      this[i - 1] = arr[i - 1]
     end
-  elseif type(str) == 'string' then
-    -- "binary" string
-    for i = 1, #str do
-      this[i - 1] = string.byte(str, i)
+  elseif hex then
+    for i = 1, #hex, 2 do
+      this[(i - 1)/2] = tonumber(string.sub(hex, i, i+1), 16)
     end
-  else
-    -- array
-    for i = 1, str.length do
-      this[i - 1] = str[i - 1]
+  elseif raw then
+    for i = 1, size do
+      this[i - 1] = string.byte(raw, i)
     end
   end
-
   return this
 end
 
diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c
index 32117ddb..531df1a8 100644
--- a/src/colony/lua_tm.c
+++ b/src/colony/lua_tm.c
@@ -922,20 +922,85 @@ static int l_tm_fs_dir_close (lua_State* L)
   return 1;
 }
 
+static int l_tm_str_to_ascii (lua_State* L)
+{
+  const uint8_t* raw;
+  size_t str_len;
+  const char* str = lua_tolstring(L, 1, &str_len);
+  size_t raw_len = tm_str_to_ascii((const uint8_t*) str, str_len + 1, &raw) - 1;    // compensate for NUL byte at end
+  lua_pushlstring(L, (const char*) raw, raw_len);
+  if ((void*) raw != (void*) str) free((uint8_t*) raw);
+  return 1;
+}
+
+static int l_tm_str_from_ascii (lua_State* L)
+{
+  const char* str;
+  size_t raw_len;
+  const char* raw = lua_tolstring(L, 1, &raw_len);
+  size_t str_len = tm_str_from_ascii((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
+  lua_pushlstring(L, str, str_len);
+  if ((void*) str != (void*) raw) free((char*) str);
+  return 1;
+}
+
+static int l_tm_str_to_binary (lua_State* L)
+{
+  const uint8_t* raw;
+  size_t str_len;
+  const char* str = lua_tolstring(L, 1, &str_len);
+  size_t raw_len = tm_str_to_binary((const uint8_t*) str, str_len + 1, &raw) - 1;    // compensate for NUL byte at end
+  lua_pushlstring(L, (const char*) raw, raw_len);
+  if ((void*) raw != (void*) str) free((uint8_t*) raw);
+  return 1;
+}
+
+static int l_tm_str_from_binary (lua_State* L)
+{
+  const char* str;
+  size_t raw_len;
+  const char* raw = lua_tolstring(L, 1, &raw_len);
+  size_t str_len = tm_str_from_binary((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
+  lua_pushlstring(L, str, str_len);
+  if ((void*) str != (void*) raw) free((char*) str);
+  return 1;
+}
+
+static int l_tm_str_to_utf16le (lua_State* L)
+{
+  const uint8_t* raw;
+  size_t str_len;
+  const char* str = lua_tolstring(L, 1, &str_len);
+  size_t raw_len = tm_str_to_utf16le((const uint8_t*) str, str_len + 1, &raw) - 1;    // compensate for NUL byte at end
+  lua_pushlstring(L, (const char*) raw, raw_len);
+  if ((void*) raw != (void*) str) free((uint8_t*) raw);
+  return 1;
+}
+
+static int l_tm_str_from_utf16le (lua_State* L)
+{
+  const char* str;
+  size_t raw_len;
+  const char* raw = lua_tolstring(L, 1, &raw_len);
+  size_t str_len = tm_str_from_utf16le((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
+  lua_pushlstring(L, str, str_len);
+  if ((void*) str != (void*) raw) free((char*) str);
+  return 1;
+}
 
 static int l_tm_str_to_utf8 (lua_State* L)
 {
-  size_t utf8_len;
-  const char* utf8 = colony_tolutf8(L, 1, &utf8_len);
-  lua_pushlstring(L, utf8, utf8_len);
+  size_t raw_len;
+  const char* raw = colony_tolutf8(L, 1, &raw_len);
+  lua_pushlstring(L, raw, raw_len);
   return 1;
 }
 
 static int l_tm_str_from_utf8 (lua_State* L)
 {
-  size_t utf8_len;
-  const char* utf8 = lua_tolstring(L, 1, &utf8_len);
-  colony_pushlutf8(L, utf8, utf8_len);
+  size_t raw_len;
+  const char* raw = lua_tolstring(L, 1, &raw_len);
+  colony_pushlutf8(L, raw, raw_len);
   return 1;
 }
 
@@ -1389,10 +1454,16 @@ LUALIB_API int luaopen_tm (lua_State *L)
     { "fs_dir_read", l_tm_fs_dir_read },
     { "fs_dir_close", l_tm_fs_dir_close },
 
-    // unicode
+    // encodings
     { "str_to_utf8", l_tm_str_to_utf8 },
     { "str_from_utf8", l_tm_str_from_utf8 },
-    
+    { "str_to_utf16le", l_tm_str_to_utf16le },
+    { "str_from_utf16le", l_tm_str_from_utf16le },
+    { "str_to_binary", l_tm_str_to_binary },
+    { "str_from_binary", l_tm_str_from_binary },
+    { "str_to_ascii", l_tm_str_to_ascii },
+    { "str_from_ascii", l_tm_str_from_ascii },
+
     // internal string manipulation
     { "str_codeat", l_tm_str_codeat },
     { "str_fromcode", l_tm_str_fromcode },
diff --git a/src/tm.h b/src/tm.h
index 3bd09e4d..1c8865c8 100644
--- a/src/tm.h
+++ b/src/tm.h
@@ -211,6 +211,17 @@ size_t tm_utf8_encode(uint8_t* buf, size_t buf_len, uint32_t uc);
 size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 
+size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+
+size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask);
+size_t _tm_str_from_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask);
+#define tm_str_to_ascii(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF)  // yes 0xFF, despite node.js doc insinuation!
+#define tm_str_from_ascii(a,b,c) _tm_str_from_8bit(a,b,c, 0x7F)
+#define tm_str_to_binary(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF)
+#define tm_str_from_binary(a,b,c) _tm_str_from_8bit(a,b,c, 0xFF)
+
+
 // INTERNAL STRING MANIPULATION
 
 uint32_t tm_str_codeat (const uint8_t* buf, size_t buf_len, size_t index);
diff --git a/src/tm_utf16.c b/src/tm_utf16.c
new file mode 100644
index 00000000..e8e82bce
--- /dev/null
+++ b/src/tm_utf16.c
@@ -0,0 +1,47 @@
+#include <assert.h>
+
+#include "tm.h"
+
+// NOTE: Ideally these would deal with native uint16_t arrays, and have separate uint16_t<->uint8_t endian helper.
+//       But it doesn't seem worth the extra pain and potential performance hit right now.
+
+#define IS_BIG_ENDIAN 0     // TODO
+
+size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
+  uint16_t* utf16 = calloc(buf_len, 2);    // NOTE: we know utf16 will be this size or less
+  size_t utf16_len = 0;
+  
+  size_t buf_pos = 0;
+  while (buf_pos < buf_len) {
+    uint32_t uchar;
+    buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
+    assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
+    assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
+    #if IS_BIG_ENDIAN
+    utf16[utf16_len] = __builtin_bswap16((uint16_t) uchar);
+    #else
+    utf16[utf16_len] = (uint16_t) uchar;
+    #endif
+    utf16_len += 1;
+  }
+  *dstptr = (uint8_t*) utf16;
+  return utf16_len;
+}
+
+size_t tm_str_from_utf16le (const uint8_t* utf16, size_t utf16_len, const uint8_t ** const dstptr) {
+  size_t buf_len = utf16_len;
+  // TODO: figure out actual length needed? (typically a right-size copy is made into Lua anyway though…)
+  buf_len *= 3;     // HACK: each incoming codepoint could require up to 3 bytes to represent
+  uint8_t* buf = malloc(buf_len);
+  
+  size_t buf_pos = 0;
+  size_t utf16_pos = 0;
+  while (utf16_pos < utf16_len) {
+    assert(buf_pos < buf_len);
+    uint16_t uchar = utf16[utf16_pos];
+    buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar);
+    utf16_pos += 1;
+  }
+  *dstptr = buf;
+  return buf_pos;
+}
diff --git a/src/tm_utf7.c b/src/tm_utf7.c
new file mode 100644
index 00000000..0fc55405
--- /dev/null
+++ b/src/tm_utf7.c
@@ -0,0 +1,34 @@
+#include <assert.h>
+
+#include "tm.h"
+
+
+size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) {
+  uint8_t* ascii_buf = malloc(buf_len);    // NOTE: we know ascii will be this size or less
+  size_t ascii_len = 0;
+  
+  size_t buf_pos = 0;
+  while (buf_pos < buf_len) {
+    uint32_t uchar;
+    buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
+    assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
+    assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
+    ascii_buf[ascii_len] = (uint8_t) uchar & mask;
+    printf("%x @ %lu <%lu\n", uchar, ascii_len, buf_pos);
+    ascii_len += 1;
+  }
+  *dstptr = ascii_buf;
+  return ascii_len;
+}
+
+size_t _tm_str_from_8bit (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr, uint8_t mask) {
+  uint8_t* buf = malloc(ascii_len);
+  
+  size_t pos = 0;
+  while (pos < ascii_len) {
+    buf[pos] = ascii_buf[pos] & mask;
+    ++pos;
+  }
+  *dstptr = buf;
+  return pos;
+}
diff --git a/test/suite/buffer.js b/test/suite/buffer.js
index 852baf34..9bc00f73 100644
--- a/test/suite/buffer.js
+++ b/test/suite/buffer.js
@@ -167,7 +167,16 @@ var b = new Buffer([0, 0x41, 0x82, 0x104]);
 tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004");
 tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004");
 tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004");
-//tap.eq(b.toString('utf16le'), "\u4100\u0482");
+tap.eq(b.toString('utf16le'), "\u4100\u0482");
+
+tap.eq(Buffer("\u8182", 'utf8')[3], 0x82);
+tap.eq(Buffer("\u8182", 'utf8').length, 3);
+tap.eq(Buffer("\u8182", 'ascii')[0], 0x82);
+tap.eq(Buffer("\u8182", 'ascii').length, 1);
+tap.eq(Buffer("\u8182", 'binary')[0], 0x82);
+tap.eq(Buffer("\u8182", 'binary').length, 1);
+tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81);
+tap.eq(Buffer("\u8182", 'utf16le').length, 2);
 
 // write
 var buf = new Buffer(256);

From 64533c9d7405f040f378b2925660f08c1105c79f Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:04:46 -0800
Subject: [PATCH 02/13] =?UTF-8?q?fix=20up=20ill-fated=20conflation=20betwe?=
 =?UTF-8?q?en=20ascii=20and=20binary=20(actually=20could=20have=20shared?=
 =?UTF-8?q?=20code=20in=20tm=5Fstr=5Fto=5FX=20case=20but=E2=80=A6meh=3F)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tm.h      | 11 +++++------
 src/tm_utf7.c | 40 ++++++++++++++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/tm.h b/src/tm.h
index 1c8865c8..3a01c098 100644
--- a/src/tm.h
+++ b/src/tm.h
@@ -214,12 +214,11 @@ size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dst
 size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 
-size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask);
-size_t _tm_str_from_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask);
-#define tm_str_to_ascii(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF)  // yes 0xFF, despite node.js doc insinuation!
-#define tm_str_from_ascii(a,b,c) _tm_str_from_8bit(a,b,c, 0x7F)
-#define tm_str_to_binary(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF)
-#define tm_str_from_binary(a,b,c) _tm_str_from_8bit(a,b,c, 0xFF)
+size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+size_t tm_str_from_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+
+size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+size_t tm_str_from_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 
 
 // INTERNAL STRING MANIPULATION
diff --git a/src/tm_utf7.c b/src/tm_utf7.c
index 0fc55405..98ccc8a4 100644
--- a/src/tm_utf7.c
+++ b/src/tm_utf7.c
@@ -2,8 +2,7 @@
 
 #include "tm.h"
 
-
-size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) {
+size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
   uint8_t* ascii_buf = malloc(buf_len);    // NOTE: we know ascii will be this size or less
   size_t ascii_len = 0;
   
@@ -13,22 +12,51 @@ size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** con
     buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
     assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
     assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
-    ascii_buf[ascii_len] = (uint8_t) uchar & mask;
-    printf("%x @ %lu <%lu\n", uchar, ascii_len, buf_pos);
+    ascii_buf[ascii_len] = (uint8_t) uchar & 0xFF;    // yes 0xFF, despite node.js doc insinuation!
     ascii_len += 1;
   }
   *dstptr = ascii_buf;
   return ascii_len;
 }
 
-size_t _tm_str_from_8bit (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr, uint8_t mask) {
+size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr) {
   uint8_t* buf = malloc(ascii_len);
   
   size_t pos = 0;
   while (pos < ascii_len) {
-    buf[pos] = ascii_buf[pos] & mask;
+    buf[pos] = ascii_buf[pos] & 0x7F;
     ++pos;
   }
   *dstptr = buf;
   return pos;
 }
+
+size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
+  uint8_t* binary_buf = malloc(buf_len);    // NOTE: we know binary will be this size or less
+  size_t binary_len = 0;
+  
+  size_t buf_pos = 0;
+  while (buf_pos < buf_len) {
+    uint32_t uchar;
+    buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
+    assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
+    assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
+    binary_buf[binary_len] = (uint8_t) uchar & 0xFF;
+    binary_len += 1;
+  }
+  *dstptr = binary_buf;
+  return binary_len;
+}
+
+size_t tm_str_from_binary (const uint8_t* binary, size_t binary_len, const uint8_t ** const dstptr) {
+  uint8_t* str = calloc(binary_len, 2);   // NOTE: size could at most double if every incoming byte is > 127
+  
+  size_t str_pos = 0;
+  size_t binary_pos = 0;
+  while (binary_pos < binary_len) {
+    str_pos += tm_utf8_encode(str + str_pos, 2, binary[binary_pos]);
+    binary_pos += 1;
+  }
+  *dstptr = str;
+  return str_pos;
+}

From 933380c460437318a143b5552e3ab3558bf69c09 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:07:53 -0800
Subject: [PATCH 03/13] share trivially shareable 7/8-bit conversion code

---
 src/tm_utf7.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/tm_utf7.c b/src/tm_utf7.c
index 98ccc8a4..56e59de5 100644
--- a/src/tm_utf7.c
+++ b/src/tm_utf7.c
@@ -2,7 +2,7 @@
 
 #include "tm.h"
 
-size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
+size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) {
   uint8_t* ascii_buf = malloc(buf_len);    // NOTE: we know ascii will be this size or less
   size_t ascii_len = 0;
   
@@ -12,13 +12,17 @@ size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** con
     buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
     assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
     assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
-    ascii_buf[ascii_len] = (uint8_t) uchar & 0xFF;    // yes 0xFF, despite node.js doc insinuation!
+    ascii_buf[ascii_len] = (uint8_t) uchar & mask;
     ascii_len += 1;
   }
   *dstptr = ascii_buf;
   return ascii_len;
 }
 
+size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
+  return _tm_str_to_8bit(buf, buf_len, dstptr, 0xFF);     // yes 0xFF, despite node.js doc insinuation!
+}
+
 size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr) {
   uint8_t* buf = malloc(ascii_len);
   
@@ -32,20 +36,7 @@ size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint
 }
 
 size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
-  uint8_t* binary_buf = malloc(buf_len);    // NOTE: we know binary will be this size or less
-  size_t binary_len = 0;
-  
-  size_t buf_pos = 0;
-  while (buf_pos < buf_len) {
-    uint32_t uchar;
-    buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
-    assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
-    assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
-    binary_buf[binary_len] = (uint8_t) uchar & 0xFF;
-    binary_len += 1;
-  }
-  *dstptr = binary_buf;
-  return binary_len;
+  return _tm_str_to_8bit(buf, buf_len, dstptr, 0xFF);
 }
 
 size_t tm_str_from_binary (const uint8_t* binary, size_t binary_len, const uint8_t ** const dstptr) {

From 825f6e8fe9c5b598414d2bfaf44bf9d3f9063b41 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:28:05 -0800
Subject: [PATCH 04/13] add/fix a few more tests and update to current count

---
 test/suite/buffer.js | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/test/suite/buffer.js b/test/suite/buffer.js
index 9bc00f73..31129d05 100644
--- a/test/suite/buffer.js
+++ b/test/suite/buffer.js
@@ -1,6 +1,6 @@
 var tap = require('../tap');
 
-tap.count(70);
+tap.count(96);
 
 function arreq (a, b) {
 	if (a.length != b.length) {
@@ -164,19 +164,43 @@ console.log('#', new Buffer('hello world').toString('hex'))
 console.log('#', new Buffer(new Buffer('hello world').toString('hex'), 'hex'))
 
 var b = new Buffer([0, 0x41, 0x82, 0x104]);
-tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004");
-tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004");
-tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004");
-tap.eq(b.toString('utf16le'), "\u4100\u0482");
-
-tap.eq(Buffer("\u8182", 'utf8')[3], 0x82);
+tap.eq(b.length, 4, "array ingested");
+tap.eq(b[0], 0x00);
+tap.eq(b[1], 0x41);
+tap.eq(b[2], 0x82);
+tap.eq(b[3], 0x04);
+tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004", "binary toString");
+tap.eq(b.toString('binary').length, 4);
+tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004", "ascii toString");
+tap.eq(b.toString('ascii').length, 4);
+tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004", "utf8 toString");
+tap.eq(b.toString('utf8').length, 4);
+tap.eq(b.toString('utf16le'), "\u4100\u0482", "utf16le toString");
+tap.eq(b.toString('utf16le').length, 2);
+tap.eq(b.toString('ucs2').length, 2);
+tap.eq(b.toString('base64'), "AEGCBA==", "base64 toString");
+tap.eq(b.toString('base64').length, 8);
+tap.eq(b.toString('hex'), "00418204", "hex toString");
+tap.eq(b.toString('base64').length, 8);
+
+tap.eq(Buffer("\u8182", 'utf8')[2], 0x82, "buffer from utf8");
 tap.eq(Buffer("\u8182", 'utf8').length, 3);
-tap.eq(Buffer("\u8182", 'ascii')[0], 0x82);
+tap.eq(Buffer("\u8182", 'ascii')[0], 0x82, "buffer from ascii");
 tap.eq(Buffer("\u8182", 'ascii').length, 1);
-tap.eq(Buffer("\u8182", 'binary')[0], 0x82);
+tap.eq(Buffer("\u8182", 'binary')[0], 0x82, "buffer from binary");
 tap.eq(Buffer("\u8182", 'binary').length, 1);
-tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81);
+tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81, "buffer from utf16le");
 tap.eq(Buffer("\u8182", 'utf16le').length, 2);
+tap.eq(Buffer("\u8182", 'ucs2')[1], 0x81);
+tap.eq(Buffer("\u8182", 'base64').length, 0, "buffer from [bad] base64");
+var threw;
+try {
+  Buffer("\u8182", 'hex');
+} catch (e) {
+  threw = e;
+}
+tap.ok(threw, "buffer from [bad] hex");
+
 
 // write
 var buf = new Buffer(256);

From 14355c668fb892e6a26960d1b5ca717ce58dd952 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:54:59 -0800
Subject: [PATCH 05/13] fix up some halfway-unconverted utf16le code

---
 src/tm_utf16.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tm_utf16.c b/src/tm_utf16.c
index e8e82bce..d8fc635f 100644
--- a/src/tm_utf16.c
+++ b/src/tm_utf16.c
@@ -25,23 +25,23 @@ size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** c
     utf16_len += 1;
   }
   *dstptr = (uint8_t*) utf16;
-  return utf16_len;
+  return (utf16_len << 1) - 1;        // include only single null *byte* (for consistency with others)
 }
 
-size_t tm_str_from_utf16le (const uint8_t* utf16, size_t utf16_len, const uint8_t ** const dstptr) {
-  size_t buf_len = utf16_len;
-  // TODO: figure out actual length needed? (typically a right-size copy is made into Lua anyway though…)
-  buf_len *= 3;     // HACK: each incoming codepoint could require up to 3 bytes to represent
-  uint8_t* buf = malloc(buf_len);
+size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr) {
+  const uint16_t* utf16 = (const uint16_t*) _utf16;
+  size_t utf16_len = _utf16_len >> 1;
+  
+  uint8_t* buf = calloc(utf16_len, 3);      // each incoming codepoint could require up to 3 bytes to represent
   
   size_t buf_pos = 0;
   size_t utf16_pos = 0;
   while (utf16_pos < utf16_len) {
-    assert(buf_pos < buf_len);
     uint16_t uchar = utf16[utf16_pos];
     buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar);
     utf16_pos += 1;
   }
+  buf[buf_pos++] = '\0';      // manually add null byte (just for consistency with other encodings)
   *dstptr = buf;
   return buf_pos;
 }

From 622ed50a7ac8229f28e60e1ee5cc70cead9e7a18 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:59:06 -0800
Subject: [PATCH 06/13] fix up overcorrection in UTF-16 conversion fix up

---
 src/tm_utf16.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tm_utf16.c b/src/tm_utf16.c
index d8fc635f..aa9fb8ee 100644
--- a/src/tm_utf16.c
+++ b/src/tm_utf16.c
@@ -41,7 +41,6 @@ size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint
     buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar);
     utf16_pos += 1;
   }
-  buf[buf_pos++] = '\0';      // manually add null byte (just for consistency with other encodings)
   *dstptr = buf;
   return buf_pos;
 }

From 43a16ce67486d6135aed81c688a8eb6a0c980105 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 17:59:52 -0800
Subject: [PATCH 07/13] improve base64 hygiene

---
 src/colony/lua/colony-node.lua | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua
index 00636520..0558a3a6 100644
--- a/src/colony/lua/colony-node.lua
+++ b/src/colony/lua/colony-node.lua
@@ -308,9 +308,9 @@ local buffer_proto = js_obj({
     elseif encoding == 'utf8' or encoding == 'utf-8' then
       return tm.str_from_utf8(buf);
     elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
-      return tm.str_from_utf16le(buf)
+      return tm.str_from_utf16le(buf);
     elseif encoding == 'base64' then
-      return to_base64(buf);
+      return tm.str_from_binary(to_base64(buf));
     elseif encoding == 'hex' then
       local str = string.gsub(buf, '(.)', function (c)
         return string.format('%02x', string.byte(c));
@@ -505,7 +505,7 @@ local function Buffer (this, arg, encoding)
   elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
     raw = tm.str_to_utf16le(arg)
   elseif encoding == 'base64' then
-    raw = from_base64(arg)
+    raw = from_base64(tm.str_to_binary(arg))
   elseif encoding == 'hex' then
     if string.len(arg) % 2 ~= 0 then
       error(js_new(global.TypeError, 'Invalid hex string.'))

From 9c50a828b77ab4a109a72898f15984b3c80c4e77 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 18:06:27 -0800
Subject: [PATCH 08/13] indent

---
 src/tm_utf8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tm_utf8.c b/src/tm_utf8.c
index 01410a3e..9de95e28 100644
--- a/src/tm_utf8.c
+++ b/src/tm_utf8.c
@@ -71,7 +71,7 @@ size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t ** cons
   size_t buf_pos = 0;
   while (buf_pos < buf_len) {
     uint32_t uchar;
-		buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
+    buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
     assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
     // NOTE: this follows new behavior http://blog.nodejs.org/2014/06/16/openssl-and-breaking-utf-8-change/
     if (hchar) {

From c76e4d97bc25e31dbcaf08b3d602efbcb9387081 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 18:06:58 -0800
Subject: [PATCH 09/13] =?UTF-8?q?avoid=20abort=5Ftrap=20on=20invalid=20bas?=
 =?UTF-8?q?e64,=20but=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/colony/lua/colony-node.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua
index 0558a3a6..5bcc07e1 100644
--- a/src/colony/lua/colony-node.lua
+++ b/src/colony/lua/colony-node.lua
@@ -186,7 +186,7 @@ function from_base64(to_decode)
     local char = string.sub(to_decode, i, i)
     local offset, _ = string.find(index_table, char)
     if offset == nil then
-      error(js_new(global.Error, "Invalid character '" .. char .. "' found."))
+      error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found."))
     end
 
     bit_pattern = bit_pattern .. string.sub(to_binary(offset-1), 3)

From d006454f8dc685ef7d600bd443de831537b4834e Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Mon, 17 Nov 2014 18:08:20 -0800
Subject: [PATCH 10/13] =?UTF-8?q?=E2=80=A6node.js=20(for=20whatever=20reas?=
 =?UTF-8?q?on=20O=5Fo)=20actually=20does=20not=20throw=20but=20returns=20e?=
 =?UTF-8?q?mpty=20buffer=20on=20malformed=20base64?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/colony/lua/colony-node.lua | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua
index 5bcc07e1..cd98114e 100644
--- a/src/colony/lua/colony-node.lua
+++ b/src/colony/lua/colony-node.lua
@@ -186,7 +186,8 @@ function from_base64(to_decode)
     local char = string.sub(to_decode, i, i)
     local offset, _ = string.find(index_table, char)
     if offset == nil then
-      error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found."))
+      --error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found."))
+      return ''
     end
 
     bit_pattern = bit_pattern .. string.sub(to_binary(offset-1), 3)

From f0dd31b0a399f16d5231217f6805edee6bf3c70d Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Wed, 19 Nov 2014 15:11:36 -0800
Subject: [PATCH 11/13] excise dead code,
 https://github.com/tessel/runtime/pull/645#discussion_r20481702

---
 src/colony/lua/colony-node.lua | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua
index cd98114e..a5559240 100644
--- a/src/colony/lua/colony-node.lua
+++ b/src/colony/lua/colony-node.lua
@@ -186,7 +186,6 @@ function from_base64(to_decode)
     local char = string.sub(to_decode, i, i)
     local offset, _ = string.find(index_table, char)
     if offset == nil then
-      --error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found."))
       return ''
     end
 

From 2a8e465810cd09542a07103352e09549fa352958 Mon Sep 17 00:00:00 2001
From: Nathan Vander Wilt <nate@calftrail.com>
Date: Tue, 25 Nov 2014 16:01:43 -0800
Subject: [PATCH 12/13] have tm_utf16 support native [and big] endian as well
 as the LE the buffer methods need, this way
 https://github.com/tessel/runtime/pull/566 will be able to share our
 tm_str_to_utf16 implemenattion

---
 src/colony/lua_tm.c |  4 ++--
 src/tm.h            | 21 +++++++++++++++++----
 src/tm_utf16.c      | 14 ++++----------
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c
index 531df1a8..8d1872c1 100644
--- a/src/colony/lua_tm.c
+++ b/src/colony/lua_tm.c
@@ -971,7 +971,7 @@ static int l_tm_str_to_utf16le (lua_State* L)
   const uint8_t* raw;
   size_t str_len;
   const char* str = lua_tolstring(L, 1, &str_len);
-  size_t raw_len = tm_str_to_utf16le((const uint8_t*) str, str_len + 1, &raw) - 1;    // compensate for NUL byte at end
+  size_t raw_len = tm_str_to_utf16((const uint8_t*) str, str_len + 1, &raw, LE) - 1;    // compensate for NUL byte at end
   lua_pushlstring(L, (const char*) raw, raw_len);
   if ((void*) raw != (void*) str) free((uint8_t*) raw);
   return 1;
@@ -982,7 +982,7 @@ static int l_tm_str_from_utf16le (lua_State* L)
   const char* str;
   size_t raw_len;
   const char* raw = lua_tolstring(L, 1, &raw_len);
-  size_t str_len = tm_str_from_utf16le((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
+  size_t str_len = tm_str_from_utf16((const uint8_t*) raw, raw_len, (const uint8_t**) &str, LE);
   lua_pushlstring(L, str, str_len);
   if ((void*) str != (void*) raw) free((char*) str);
   return 1;
diff --git a/src/tm.h b/src/tm.h
index 3a01c098..9dacab1a 100644
--- a/src/tm.h
+++ b/src/tm.h
@@ -196,14 +196,27 @@ uint32_t tm_uptime_micro ();
 double tm_timestamp ();
 int tm_timestamp_update (double millis);
 
-// BUFFER
+
+// ENDIANNESS
+
+#include "order32.h"
 
 typedef enum {
   BE = 0,
   LE
 } tm_endian_t;
 
-// UNICODE
+#define TM_ENDIAN_HOST (O32_HOST_ORDER == O32_BIG_ENDIAN ? BE : LE)
+#define TM_ENDIAN_SWAP64(e, x)      ((e != TM_ENDIAN_HOST) ? __builtin_bswap64(x) : x)
+#define TM_ENDIAN_SWAP32(e, x)      ((e != TM_ENDIAN_HOST) ? __builtin_bswap32(x) : x)
+#define TM_ENDIAN_SWAP16(e, x)      ((e != TM_ENDIAN_HOST) ? __builtin_bswap16(x) : x)
+
+// BUFFER
+
+void tm_buffer_float_write (uint8_t* buf, size_t index, float value, tm_endian_t endianness);
+void tm_buffer_double_write (uint8_t* buf, size_t index, double value, tm_endian_t endianness);
+
+// ENCODINGS (UNICODE / ASCII / BINARY)
 
 #define TM_UTF8_DECODE_ERROR UINT32_MAX
 size_t tm_utf8_decode(const uint8_t* buf, size_t buf_len, uint32_t* uc);
@@ -211,8 +224,8 @@ size_t tm_utf8_encode(uint8_t* buf, size_t buf_len, uint32_t uc);
 size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 
-size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
-size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
+size_t tm_str_to_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness);
+size_t tm_str_from_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness);
 
 size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
 size_t tm_str_from_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
diff --git a/src/tm_utf16.c b/src/tm_utf16.c
index aa9fb8ee..d6c7b48e 100644
--- a/src/tm_utf16.c
+++ b/src/tm_utf16.c
@@ -5,9 +5,7 @@
 // NOTE: Ideally these would deal with native uint16_t arrays, and have separate uint16_t<->uint8_t endian helper.
 //       But it doesn't seem worth the extra pain and potential performance hit right now.
 
-#define IS_BIG_ENDIAN 0     // TODO
-
-size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) {
+size_t tm_str_to_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, tm_endian_t endianness) {
   uint16_t* utf16 = calloc(buf_len, 2);    // NOTE: we know utf16 will be this size or less
   size_t utf16_len = 0;
   
@@ -17,18 +15,14 @@ size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** c
     buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar);
     assert(uchar != TM_UTF8_DECODE_ERROR);     // internal strings should never be malformed, 0xFFFD replacement increases length
     assert(uchar < 0x10000);                   // internal strings should only include BMP codepoints
-    #if IS_BIG_ENDIAN
-    utf16[utf16_len] = __builtin_bswap16((uint16_t) uchar);
-    #else
-    utf16[utf16_len] = (uint16_t) uchar;
-    #endif
+    utf16[utf16_len] = TM_ENDIAN_SWAP16(endianness, (uint16_t) uchar);
     utf16_len += 1;
   }
   *dstptr = (uint8_t*) utf16;
   return (utf16_len << 1) - 1;        // include only single null *byte* (for consistency with others)
 }
 
-size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr) {
+size_t tm_str_from_utf16 (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr, tm_endian_t endianness) {
   const uint16_t* utf16 = (const uint16_t*) _utf16;
   size_t utf16_len = _utf16_len >> 1;
   
@@ -37,7 +31,7 @@ size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint
   size_t buf_pos = 0;
   size_t utf16_pos = 0;
   while (utf16_pos < utf16_len) {
-    uint16_t uchar = utf16[utf16_pos];
+    uint16_t uchar = TM_ENDIAN_SWAP16(endianness, utf16[utf16_pos]);
     buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar);
     utf16_pos += 1;
   }

From 93f31fc6bf38fc9cda9a3a8a90c45064e38b72dd Mon Sep 17 00:00:00 2001
From: Tim Cameron Ryan <id@timryan.org>
Date: Mon, 15 Dec 2014 10:55:04 -0800
Subject: [PATCH 13/13] Restores compilation ability at -O0. Does not require
 HTTP to be UTF8-safe.

---
 src/colony/lua_http_parser.c | 8 ++++----
 src/colony/lua_tm.c          | 4 ++--
 src/colony/modules/http.js   | 2 ++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/colony/lua_http_parser.c b/src/colony/lua_http_parser.c
index 14111d8d..85f7e8dd 100644
--- a/src/colony/lua_http_parser.c
+++ b/src/colony/lua_http_parser.c
@@ -110,7 +110,7 @@ static int lhttp_parser_on_url(http_parser *p, const char *at, size_t length) {
     return 0;
   };
   /* Push the string argument */
-  lua_pushlstring(L, at, length);
+  colony_pushbuffer(L, (const uint8_t*) at, length);
 
   lua_call(L, 1, 1);
 
@@ -132,7 +132,7 @@ static int lhttp_parser_on_header_field(http_parser *p, const char *at, size_t l
     return 0;
   };
   /* Push the string argument */
-  lua_pushlstring(L, at, length);
+  colony_pushbuffer(L, (const uint8_t*) at, length);
 
   lua_call(L, 1, 1);
 
@@ -154,7 +154,7 @@ static int lhttp_parser_on_header_value(http_parser *p, const char *at, size_t l
     return 0;
   };
   /* Push the string argument */
-  lua_pushlstring(L, at, length);
+  colony_pushbuffer(L, (const uint8_t*) at, length);
 
   lua_call(L, 1, 1);
 
@@ -176,7 +176,7 @@ static int lhttp_parser_on_body(http_parser *p, const char *at, size_t length) {
     return 0;
   };
   /* Push the string argument */
-  lua_pushlstring(L, at, length);
+  colony_pushbuffer(L, (const uint8_t*) at, length);
 
   lua_call(L, 1, 1);
 
diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c
index 8d1872c1..a442e122 100644
--- a/src/colony/lua_tm.c
+++ b/src/colony/lua_tm.c
@@ -71,7 +71,7 @@ const char* colony_tolutf8 (lua_State* L, int index, size_t* res_len)
   return lua_tolstring(L, index, res_len);
 }
 
-inline const char* colony_toutf8 (lua_State* L, int index)
+const char* colony_toutf8 (lua_State* L, int index)
 {
   return colony_tolutf8(L, index, NULL);
 }
@@ -85,7 +85,7 @@ void colony_pushlutf8 (lua_State* L, const char* utf8, size_t utf8_len)
   if (str != utf8) free((char*) str);
 }
 
-inline void colony_pushutf8 (lua_State* L, const char* utf8)
+void colony_pushutf8 (lua_State* L, const char* utf8)
 {
   colony_pushlutf8(L, utf8, strlen(utf8));
 }
diff --git a/src/colony/modules/http.js b/src/colony/modules/http.js
index 8e3441fb..c9cb6b40 100644
--- a/src/colony/modules/http.js
+++ b/src/colony/modules/http.js
@@ -127,11 +127,13 @@ function IncomingMessage (type, socket) {
       self.url = url;
     }),
     onHeaderField: parserCallback(function (field) {
+      field = field.toString();
       var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders;
       if (arr.length + 1 > self._maxRawHeaders) return;
       arr.push(field);
     }),
     onHeaderValue: parserCallback(function (value) {
+      value = value.toString();
       var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders,
         key = arr[arr.length - 1].toLowerCase();
       if (arr.length + 1 > self._maxRawHeaders) return;