From 192f626deae3ac849f6f4418224f082fa21fef69 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Fri, 14 Nov 2014 18:00:20 -0800 Subject: [PATCH 01/13] big pass over buffer input/output encoding handling, should be mostly there but failing some tests --- config/libtm.gyp | 4 +- src/colony/lua/colony-node.lua | 108 ++++++++++++++++----------------- src/colony/lua_tm.c | 87 +++++++++++++++++++++++--- src/tm.h | 11 ++++ src/tm_utf16.c | 47 ++++++++++++++ src/tm_utf7.c | 34 +++++++++++ test/suite/buffer.js | 11 +++- 7 files changed, 237 insertions(+), 65 deletions(-) create mode 100644 src/tm_utf16.c create mode 100644 src/tm_utf7.c diff --git a/config/libtm.gyp b/config/libtm.gyp index eae4061c..523b963a 100644 --- a/config/libtm.gyp +++ b/config/libtm.gyp @@ -502,7 +502,9 @@ '<(runtime_path)/tm_random.c', '<(runtime_path)/tm_deflate.c', '<(runtime_path)/tm_str.c', - '<(runtime_path)/tm_utf8.c' + '<(runtime_path)/tm_utf7.c', + '<(runtime_path)/tm_utf8.c', + '<(runtime_path)/tm_utf16.c', ], "include_dirs": [ '<(runtime_path)/', diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua index 1274d9a4..00636520 100644 --- a/src/colony/lua/colony-node.lua +++ b/src/colony/lua/colony-node.lua @@ -298,22 +298,17 @@ local buffer_proto = js_obj({ if encoding == nil then encoding = 'utf8' end - encoding = string.lower(encoding); + encoding = string.lower(encoding) - local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset); - + local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset) if encoding == 'binary' then - return string.gsub(buf, '[\128-\255]', function (c) - -- original value must be converted to internal encoding - return global.String.fromCharCode(nil, string.byte(c)) - end) + return tm.str_from_binary(buf); elseif encoding == 'ascii' then - -- simply strips high bit from original value - return string.gsub(buf, '[\128-\255]', function (c) - return string.char(string.byte(c) - 128) - end) + return tm.str_from_ascii(buf); elseif encoding == 'utf8' or encoding == 'utf-8' then return tm.str_from_utf8(buf); + elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then + return tm.str_from_utf16le(buf) elseif encoding == 'base64' then return to_base64(buf); elseif encoding == 'hex' then @@ -321,9 +316,6 @@ local buffer_proto = js_obj({ return string.format('%02x', string.byte(c)); end) return str; - elseif encoding == 'ucs2' or encoding == 'ucs-2' - or encoding == 'utf16le' or encoding == 'utf-16le' then - return error(js_new(global.NotImplementedError, 'Encoding not implemented yet: ' + encoding)); else error(js_new(global.TypeError, 'Unknown encoding: ' + encoding)); end @@ -493,56 +485,62 @@ function _of_buffer (this, buf, length) end local function Buffer (this, arg, encoding) - -- args - local str, length = '', 0 - if type(arg) == 'number' then - length = tonumber(arg) - elseif type(arg) == 'string' then - str = arg - length = #arg - else - str = arg or '' - length = arg and arg.length or 0 + if encoding == nil then + encoding = 'utf8' end - - -- encoding first check - if type(str) == 'string' and encoding == 'base64' then - -- "base64" string - str = from_base64(str) - length = string.len(str) - elseif type(str) == 'string' and encoding == 'hex' then - if string.len(str) % 2 ~= 0 then + encoding = string.lower(encoding) + + local raw, arr, hex, size + if type(arg) == 'number' then + size = arg + elseif type(arg) ~= 'string' then + -- assume an array + arr = arg + elseif encoding == 'binary' then + raw = tm.str_to_binary(arg) + elseif encoding == 'ascii' then + raw = tm.str_to_ascii(arg) + elseif encoding == 'utf8' or encoding == 'utf-8' then + raw = tm.str_to_utf8(arg) + elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then + raw = tm.str_to_utf16le(arg) + elseif encoding == 'base64' then + raw = from_base64(arg) + elseif encoding == 'hex' then + if string.len(arg) % 2 ~= 0 then error(js_new(global.TypeError, 'Invalid hex string.')) end - -- Remove first occurrance of invalid char until end of string - str = string.lower(string.gsub(str, '[^a-fA-F0-9].*', '')) - length = string.len(str) / 2 + hex = string.lower(string.gsub(arg, '[^a-fA-F0-9].*', '')) + else + error(js_new(global.TypeError, 'Unknown encoding: ' + encoding)); + end + + if type(size) == 'number' then + -- all set + elseif arr then + size = arr.length + elseif hex then + size = #hex / 2 + else + size = #raw end - + this = {} - local buf = tm.buffer_create(length) - _of_buffer(this, buf, length) - - -- Lua internally uses a "binary" encoding, that is, - -- operates on (1-indexable) 8-bit values. - - if type(str) == 'string' and encoding == 'hex' then - -- "hex" string - for i = 1, #str, 2 do - this[(i - 1)/2] = tonumber(string.sub(str, i, i+1), 16) + local buf = tm.buffer_create(size) + _of_buffer(this, buf, size) + if arr then + for i = 1, size do + this[i - 1] = arr[i - 1] end - elseif type(str) == 'string' then - -- "binary" string - for i = 1, #str do - this[i - 1] = string.byte(str, i) + elseif hex then + for i = 1, #hex, 2 do + this[(i - 1)/2] = tonumber(string.sub(hex, i, i+1), 16) end - else - -- array - for i = 1, str.length do - this[i - 1] = str[i - 1] + elseif raw then + for i = 1, size do + this[i - 1] = string.byte(raw, i) end end - return this end diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c index 32117ddb..531df1a8 100644 --- a/src/colony/lua_tm.c +++ b/src/colony/lua_tm.c @@ -922,20 +922,85 @@ static int l_tm_fs_dir_close (lua_State* L) return 1; } +static int l_tm_str_to_ascii (lua_State* L) +{ + const uint8_t* raw; + size_t str_len; + const char* str = lua_tolstring(L, 1, &str_len); + size_t raw_len = tm_str_to_ascii((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end + lua_pushlstring(L, (const char*) raw, raw_len); + if ((void*) raw != (void*) str) free((uint8_t*) raw); + return 1; +} + +static int l_tm_str_from_ascii (lua_State* L) +{ + const char* str; + size_t raw_len; + const char* raw = lua_tolstring(L, 1, &raw_len); + size_t str_len = tm_str_from_ascii((const uint8_t*) raw, raw_len, (const uint8_t**) &str); + lua_pushlstring(L, str, str_len); + if ((void*) str != (void*) raw) free((char*) str); + return 1; +} + +static int l_tm_str_to_binary (lua_State* L) +{ + const uint8_t* raw; + size_t str_len; + const char* str = lua_tolstring(L, 1, &str_len); + size_t raw_len = tm_str_to_binary((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end + lua_pushlstring(L, (const char*) raw, raw_len); + if ((void*) raw != (void*) str) free((uint8_t*) raw); + return 1; +} + +static int l_tm_str_from_binary (lua_State* L) +{ + const char* str; + size_t raw_len; + const char* raw = lua_tolstring(L, 1, &raw_len); + size_t str_len = tm_str_from_binary((const uint8_t*) raw, raw_len, (const uint8_t**) &str); + lua_pushlstring(L, str, str_len); + if ((void*) str != (void*) raw) free((char*) str); + return 1; +} + +static int l_tm_str_to_utf16le (lua_State* L) +{ + const uint8_t* raw; + size_t str_len; + const char* str = lua_tolstring(L, 1, &str_len); + size_t raw_len = tm_str_to_utf16le((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end + lua_pushlstring(L, (const char*) raw, raw_len); + if ((void*) raw != (void*) str) free((uint8_t*) raw); + return 1; +} + +static int l_tm_str_from_utf16le (lua_State* L) +{ + const char* str; + size_t raw_len; + const char* raw = lua_tolstring(L, 1, &raw_len); + size_t str_len = tm_str_from_utf16le((const uint8_t*) raw, raw_len, (const uint8_t**) &str); + lua_pushlstring(L, str, str_len); + if ((void*) str != (void*) raw) free((char*) str); + return 1; +} static int l_tm_str_to_utf8 (lua_State* L) { - size_t utf8_len; - const char* utf8 = colony_tolutf8(L, 1, &utf8_len); - lua_pushlstring(L, utf8, utf8_len); + size_t raw_len; + const char* raw = colony_tolutf8(L, 1, &raw_len); + lua_pushlstring(L, raw, raw_len); return 1; } static int l_tm_str_from_utf8 (lua_State* L) { - size_t utf8_len; - const char* utf8 = lua_tolstring(L, 1, &utf8_len); - colony_pushlutf8(L, utf8, utf8_len); + size_t raw_len; + const char* raw = lua_tolstring(L, 1, &raw_len); + colony_pushlutf8(L, raw, raw_len); return 1; } @@ -1389,10 +1454,16 @@ LUALIB_API int luaopen_tm (lua_State *L) { "fs_dir_read", l_tm_fs_dir_read }, { "fs_dir_close", l_tm_fs_dir_close }, - // unicode + // encodings { "str_to_utf8", l_tm_str_to_utf8 }, { "str_from_utf8", l_tm_str_from_utf8 }, - + { "str_to_utf16le", l_tm_str_to_utf16le }, + { "str_from_utf16le", l_tm_str_from_utf16le }, + { "str_to_binary", l_tm_str_to_binary }, + { "str_from_binary", l_tm_str_from_binary }, + { "str_to_ascii", l_tm_str_to_ascii }, + { "str_from_ascii", l_tm_str_from_ascii }, + // internal string manipulation { "str_codeat", l_tm_str_codeat }, { "str_fromcode", l_tm_str_fromcode }, diff --git a/src/tm.h b/src/tm.h index 3bd09e4d..1c8865c8 100644 --- a/src/tm.h +++ b/src/tm.h @@ -211,6 +211,17 @@ size_t tm_utf8_encode(uint8_t* buf, size_t buf_len, uint32_t uc); size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); +size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); +size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); + +size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask); +size_t _tm_str_from_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask); +#define tm_str_to_ascii(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF) // yes 0xFF, despite node.js doc insinuation! +#define tm_str_from_ascii(a,b,c) _tm_str_from_8bit(a,b,c, 0x7F) +#define tm_str_to_binary(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF) +#define tm_str_from_binary(a,b,c) _tm_str_from_8bit(a,b,c, 0xFF) + + // INTERNAL STRING MANIPULATION uint32_t tm_str_codeat (const uint8_t* buf, size_t buf_len, size_t index); diff --git a/src/tm_utf16.c b/src/tm_utf16.c new file mode 100644 index 00000000..e8e82bce --- /dev/null +++ b/src/tm_utf16.c @@ -0,0 +1,47 @@ +#include + +#include "tm.h" + +// NOTE: Ideally these would deal with native uint16_t arrays, and have separate uint16_t<->uint8_t endian helper. +// But it doesn't seem worth the extra pain and potential performance hit right now. + +#define IS_BIG_ENDIAN 0 // TODO + +size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { + uint16_t* utf16 = calloc(buf_len, 2); // NOTE: we know utf16 will be this size or less + size_t utf16_len = 0; + + size_t buf_pos = 0; + while (buf_pos < buf_len) { + uint32_t uchar; + buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); + assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length + assert(uchar < 0x10000); // internal strings should only include BMP codepoints + #if IS_BIG_ENDIAN + utf16[utf16_len] = __builtin_bswap16((uint16_t) uchar); + #else + utf16[utf16_len] = (uint16_t) uchar; + #endif + utf16_len += 1; + } + *dstptr = (uint8_t*) utf16; + return utf16_len; +} + +size_t tm_str_from_utf16le (const uint8_t* utf16, size_t utf16_len, const uint8_t ** const dstptr) { + size_t buf_len = utf16_len; + // TODO: figure out actual length needed? (typically a right-size copy is made into Lua anyway though…) + buf_len *= 3; // HACK: each incoming codepoint could require up to 3 bytes to represent + uint8_t* buf = malloc(buf_len); + + size_t buf_pos = 0; + size_t utf16_pos = 0; + while (utf16_pos < utf16_len) { + assert(buf_pos < buf_len); + uint16_t uchar = utf16[utf16_pos]; + buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar); + utf16_pos += 1; + } + *dstptr = buf; + return buf_pos; +} diff --git a/src/tm_utf7.c b/src/tm_utf7.c new file mode 100644 index 00000000..0fc55405 --- /dev/null +++ b/src/tm_utf7.c @@ -0,0 +1,34 @@ +#include + +#include "tm.h" + + +size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) { + uint8_t* ascii_buf = malloc(buf_len); // NOTE: we know ascii will be this size or less + size_t ascii_len = 0; + + size_t buf_pos = 0; + while (buf_pos < buf_len) { + uint32_t uchar; + buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); + assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length + assert(uchar < 0x10000); // internal strings should only include BMP codepoints + ascii_buf[ascii_len] = (uint8_t) uchar & mask; + printf("%x @ %lu <%lu\n", uchar, ascii_len, buf_pos); + ascii_len += 1; + } + *dstptr = ascii_buf; + return ascii_len; +} + +size_t _tm_str_from_8bit (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr, uint8_t mask) { + uint8_t* buf = malloc(ascii_len); + + size_t pos = 0; + while (pos < ascii_len) { + buf[pos] = ascii_buf[pos] & mask; + ++pos; + } + *dstptr = buf; + return pos; +} diff --git a/test/suite/buffer.js b/test/suite/buffer.js index 852baf34..9bc00f73 100644 --- a/test/suite/buffer.js +++ b/test/suite/buffer.js @@ -167,7 +167,16 @@ var b = new Buffer([0, 0x41, 0x82, 0x104]); tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004"); tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004"); tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004"); -//tap.eq(b.toString('utf16le'), "\u4100\u0482"); +tap.eq(b.toString('utf16le'), "\u4100\u0482"); + +tap.eq(Buffer("\u8182", 'utf8')[3], 0x82); +tap.eq(Buffer("\u8182", 'utf8').length, 3); +tap.eq(Buffer("\u8182", 'ascii')[0], 0x82); +tap.eq(Buffer("\u8182", 'ascii').length, 1); +tap.eq(Buffer("\u8182", 'binary')[0], 0x82); +tap.eq(Buffer("\u8182", 'binary').length, 1); +tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81); +tap.eq(Buffer("\u8182", 'utf16le').length, 2); // write var buf = new Buffer(256); From 64533c9d7405f040f378b2925660f08c1105c79f Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:04:46 -0800 Subject: [PATCH 02/13] =?UTF-8?q?fix=20up=20ill-fated=20conflation=20betwe?= =?UTF-8?q?en=20ascii=20and=20binary=20(actually=20could=20have=20shared?= =?UTF-8?q?=20code=20in=20tm=5Fstr=5Fto=5FX=20case=20but=E2=80=A6meh=3F)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tm.h | 11 +++++------ src/tm_utf7.c | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/tm.h b/src/tm.h index 1c8865c8..3a01c098 100644 --- a/src/tm.h +++ b/src/tm.h @@ -214,12 +214,11 @@ size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dst size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); -size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask); -size_t _tm_str_from_8bit (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, uint8_t mask); -#define tm_str_to_ascii(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF) // yes 0xFF, despite node.js doc insinuation! -#define tm_str_from_ascii(a,b,c) _tm_str_from_8bit(a,b,c, 0x7F) -#define tm_str_to_binary(a,b,c) _tm_str_to_8bit(a,b,c, 0xFF) -#define tm_str_from_binary(a,b,c) _tm_str_from_8bit(a,b,c, 0xFF) +size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); +size_t tm_str_from_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); + +size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); +size_t tm_str_from_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); // INTERNAL STRING MANIPULATION diff --git a/src/tm_utf7.c b/src/tm_utf7.c index 0fc55405..98ccc8a4 100644 --- a/src/tm_utf7.c +++ b/src/tm_utf7.c @@ -2,8 +2,7 @@ #include "tm.h" - -size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) { +size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { uint8_t* ascii_buf = malloc(buf_len); // NOTE: we know ascii will be this size or less size_t ascii_len = 0; @@ -13,22 +12,51 @@ size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** con buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length assert(uchar < 0x10000); // internal strings should only include BMP codepoints - ascii_buf[ascii_len] = (uint8_t) uchar & mask; - printf("%x @ %lu <%lu\n", uchar, ascii_len, buf_pos); + ascii_buf[ascii_len] = (uint8_t) uchar & 0xFF; // yes 0xFF, despite node.js doc insinuation! ascii_len += 1; } *dstptr = ascii_buf; return ascii_len; } -size_t _tm_str_from_8bit (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr, uint8_t mask) { +size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr) { uint8_t* buf = malloc(ascii_len); size_t pos = 0; while (pos < ascii_len) { - buf[pos] = ascii_buf[pos] & mask; + buf[pos] = ascii_buf[pos] & 0x7F; ++pos; } *dstptr = buf; return pos; } + +size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { + uint8_t* binary_buf = malloc(buf_len); // NOTE: we know binary will be this size or less + size_t binary_len = 0; + + size_t buf_pos = 0; + while (buf_pos < buf_len) { + uint32_t uchar; + buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); + assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length + assert(uchar < 0x10000); // internal strings should only include BMP codepoints + binary_buf[binary_len] = (uint8_t) uchar & 0xFF; + binary_len += 1; + } + *dstptr = binary_buf; + return binary_len; +} + +size_t tm_str_from_binary (const uint8_t* binary, size_t binary_len, const uint8_t ** const dstptr) { + uint8_t* str = calloc(binary_len, 2); // NOTE: size could at most double if every incoming byte is > 127 + + size_t str_pos = 0; + size_t binary_pos = 0; + while (binary_pos < binary_len) { + str_pos += tm_utf8_encode(str + str_pos, 2, binary[binary_pos]); + binary_pos += 1; + } + *dstptr = str; + return str_pos; +} From 933380c460437318a143b5552e3ab3558bf69c09 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:07:53 -0800 Subject: [PATCH 03/13] share trivially shareable 7/8-bit conversion code --- src/tm_utf7.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/tm_utf7.c b/src/tm_utf7.c index 98ccc8a4..56e59de5 100644 --- a/src/tm_utf7.c +++ b/src/tm_utf7.c @@ -2,7 +2,7 @@ #include "tm.h" -size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { +size_t _tm_str_to_8bit (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, uint8_t mask) { uint8_t* ascii_buf = malloc(buf_len); // NOTE: we know ascii will be this size or less size_t ascii_len = 0; @@ -12,13 +12,17 @@ size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** con buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length assert(uchar < 0x10000); // internal strings should only include BMP codepoints - ascii_buf[ascii_len] = (uint8_t) uchar & 0xFF; // yes 0xFF, despite node.js doc insinuation! + ascii_buf[ascii_len] = (uint8_t) uchar & mask; ascii_len += 1; } *dstptr = ascii_buf; return ascii_len; } +size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { + return _tm_str_to_8bit(buf, buf_len, dstptr, 0xFF); // yes 0xFF, despite node.js doc insinuation! +} + size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint8_t ** const dstptr) { uint8_t* buf = malloc(ascii_len); @@ -32,20 +36,7 @@ size_t tm_str_from_ascii (const uint8_t* ascii_buf, size_t ascii_len, const uint } size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { - uint8_t* binary_buf = malloc(buf_len); // NOTE: we know binary will be this size or less - size_t binary_len = 0; - - size_t buf_pos = 0; - while (buf_pos < buf_len) { - uint32_t uchar; - buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); - assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length - assert(uchar < 0x10000); // internal strings should only include BMP codepoints - binary_buf[binary_len] = (uint8_t) uchar & 0xFF; - binary_len += 1; - } - *dstptr = binary_buf; - return binary_len; + return _tm_str_to_8bit(buf, buf_len, dstptr, 0xFF); } size_t tm_str_from_binary (const uint8_t* binary, size_t binary_len, const uint8_t ** const dstptr) { From 825f6e8fe9c5b598414d2bfaf44bf9d3f9063b41 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:28:05 -0800 Subject: [PATCH 04/13] add/fix a few more tests and update to current count --- test/suite/buffer.js | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/test/suite/buffer.js b/test/suite/buffer.js index 9bc00f73..31129d05 100644 --- a/test/suite/buffer.js +++ b/test/suite/buffer.js @@ -1,6 +1,6 @@ var tap = require('../tap'); -tap.count(70); +tap.count(96); function arreq (a, b) { if (a.length != b.length) { @@ -164,19 +164,43 @@ console.log('#', new Buffer('hello world').toString('hex')) console.log('#', new Buffer(new Buffer('hello world').toString('hex'), 'hex')) var b = new Buffer([0, 0x41, 0x82, 0x104]); -tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004"); -tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004"); -tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004"); -tap.eq(b.toString('utf16le'), "\u4100\u0482"); - -tap.eq(Buffer("\u8182", 'utf8')[3], 0x82); +tap.eq(b.length, 4, "array ingested"); +tap.eq(b[0], 0x00); +tap.eq(b[1], 0x41); +tap.eq(b[2], 0x82); +tap.eq(b[3], 0x04); +tap.eq(b.toString('binary'), "\u0000\u0041\u0082\u0004", "binary toString"); +tap.eq(b.toString('binary').length, 4); +tap.eq(b.toString('ascii'), "\u0000\u0041\u0002\u0004", "ascii toString"); +tap.eq(b.toString('ascii').length, 4); +tap.eq(b.toString('utf8'), "\u0000\u0041\uFFFD\u0004", "utf8 toString"); +tap.eq(b.toString('utf8').length, 4); +tap.eq(b.toString('utf16le'), "\u4100\u0482", "utf16le toString"); +tap.eq(b.toString('utf16le').length, 2); +tap.eq(b.toString('ucs2').length, 2); +tap.eq(b.toString('base64'), "AEGCBA==", "base64 toString"); +tap.eq(b.toString('base64').length, 8); +tap.eq(b.toString('hex'), "00418204", "hex toString"); +tap.eq(b.toString('base64').length, 8); + +tap.eq(Buffer("\u8182", 'utf8')[2], 0x82, "buffer from utf8"); tap.eq(Buffer("\u8182", 'utf8').length, 3); -tap.eq(Buffer("\u8182", 'ascii')[0], 0x82); +tap.eq(Buffer("\u8182", 'ascii')[0], 0x82, "buffer from ascii"); tap.eq(Buffer("\u8182", 'ascii').length, 1); -tap.eq(Buffer("\u8182", 'binary')[0], 0x82); +tap.eq(Buffer("\u8182", 'binary')[0], 0x82, "buffer from binary"); tap.eq(Buffer("\u8182", 'binary').length, 1); -tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81); +tap.eq(Buffer("\u8182", 'utf16le')[1], 0x81, "buffer from utf16le"); tap.eq(Buffer("\u8182", 'utf16le').length, 2); +tap.eq(Buffer("\u8182", 'ucs2')[1], 0x81); +tap.eq(Buffer("\u8182", 'base64').length, 0, "buffer from [bad] base64"); +var threw; +try { + Buffer("\u8182", 'hex'); +} catch (e) { + threw = e; +} +tap.ok(threw, "buffer from [bad] hex"); + // write var buf = new Buffer(256); From 14355c668fb892e6a26960d1b5ca717ce58dd952 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:54:59 -0800 Subject: [PATCH 05/13] fix up some halfway-unconverted utf16le code --- src/tm_utf16.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tm_utf16.c b/src/tm_utf16.c index e8e82bce..d8fc635f 100644 --- a/src/tm_utf16.c +++ b/src/tm_utf16.c @@ -25,23 +25,23 @@ size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** c utf16_len += 1; } *dstptr = (uint8_t*) utf16; - return utf16_len; + return (utf16_len << 1) - 1; // include only single null *byte* (for consistency with others) } -size_t tm_str_from_utf16le (const uint8_t* utf16, size_t utf16_len, const uint8_t ** const dstptr) { - size_t buf_len = utf16_len; - // TODO: figure out actual length needed? (typically a right-size copy is made into Lua anyway though…) - buf_len *= 3; // HACK: each incoming codepoint could require up to 3 bytes to represent - uint8_t* buf = malloc(buf_len); +size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr) { + const uint16_t* utf16 = (const uint16_t*) _utf16; + size_t utf16_len = _utf16_len >> 1; + + uint8_t* buf = calloc(utf16_len, 3); // each incoming codepoint could require up to 3 bytes to represent size_t buf_pos = 0; size_t utf16_pos = 0; while (utf16_pos < utf16_len) { - assert(buf_pos < buf_len); uint16_t uchar = utf16[utf16_pos]; buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar); utf16_pos += 1; } + buf[buf_pos++] = '\0'; // manually add null byte (just for consistency with other encodings) *dstptr = buf; return buf_pos; } From 622ed50a7ac8229f28e60e1ee5cc70cead9e7a18 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:59:06 -0800 Subject: [PATCH 06/13] fix up overcorrection in UTF-16 conversion fix up --- src/tm_utf16.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tm_utf16.c b/src/tm_utf16.c index d8fc635f..aa9fb8ee 100644 --- a/src/tm_utf16.c +++ b/src/tm_utf16.c @@ -41,7 +41,6 @@ size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar); utf16_pos += 1; } - buf[buf_pos++] = '\0'; // manually add null byte (just for consistency with other encodings) *dstptr = buf; return buf_pos; } From 43a16ce67486d6135aed81c688a8eb6a0c980105 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 17:59:52 -0800 Subject: [PATCH 07/13] improve base64 hygiene --- src/colony/lua/colony-node.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua index 00636520..0558a3a6 100644 --- a/src/colony/lua/colony-node.lua +++ b/src/colony/lua/colony-node.lua @@ -308,9 +308,9 @@ local buffer_proto = js_obj({ elseif encoding == 'utf8' or encoding == 'utf-8' then return tm.str_from_utf8(buf); elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then - return tm.str_from_utf16le(buf) + return tm.str_from_utf16le(buf); elseif encoding == 'base64' then - return to_base64(buf); + return tm.str_from_binary(to_base64(buf)); elseif encoding == 'hex' then local str = string.gsub(buf, '(.)', function (c) return string.format('%02x', string.byte(c)); @@ -505,7 +505,7 @@ local function Buffer (this, arg, encoding) elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then raw = tm.str_to_utf16le(arg) elseif encoding == 'base64' then - raw = from_base64(arg) + raw = from_base64(tm.str_to_binary(arg)) elseif encoding == 'hex' then if string.len(arg) % 2 ~= 0 then error(js_new(global.TypeError, 'Invalid hex string.')) From 9c50a828b77ab4a109a72898f15984b3c80c4e77 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 18:06:27 -0800 Subject: [PATCH 08/13] indent --- src/tm_utf8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tm_utf8.c b/src/tm_utf8.c index 01410a3e..9de95e28 100644 --- a/src/tm_utf8.c +++ b/src/tm_utf8.c @@ -71,7 +71,7 @@ size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t ** cons size_t buf_pos = 0; while (buf_pos < buf_len) { uint32_t uchar; - buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); + buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length // NOTE: this follows new behavior http://blog.nodejs.org/2014/06/16/openssl-and-breaking-utf-8-change/ if (hchar) { From c76e4d97bc25e31dbcaf08b3d602efbcb9387081 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 18:06:58 -0800 Subject: [PATCH 09/13] =?UTF-8?q?avoid=20abort=5Ftrap=20on=20invalid=20bas?= =?UTF-8?q?e64,=20but=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/colony/lua/colony-node.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua index 0558a3a6..5bcc07e1 100644 --- a/src/colony/lua/colony-node.lua +++ b/src/colony/lua/colony-node.lua @@ -186,7 +186,7 @@ function from_base64(to_decode) local char = string.sub(to_decode, i, i) local offset, _ = string.find(index_table, char) if offset == nil then - error(js_new(global.Error, "Invalid character '" .. char .. "' found.")) + error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found.")) end bit_pattern = bit_pattern .. string.sub(to_binary(offset-1), 3) From d006454f8dc685ef7d600bd443de831537b4834e Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Mon, 17 Nov 2014 18:08:20 -0800 Subject: [PATCH 10/13] =?UTF-8?q?=E2=80=A6node.js=20(for=20whatever=20reas?= =?UTF-8?q?on=20O=5Fo)=20actually=20does=20not=20throw=20but=20returns=20e?= =?UTF-8?q?mpty=20buffer=20on=20malformed=20base64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/colony/lua/colony-node.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua index 5bcc07e1..cd98114e 100644 --- a/src/colony/lua/colony-node.lua +++ b/src/colony/lua/colony-node.lua @@ -186,7 +186,8 @@ function from_base64(to_decode) local char = string.sub(to_decode, i, i) local offset, _ = string.find(index_table, char) if offset == nil then - error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found.")) + --error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found.")) + return '' end bit_pattern = bit_pattern .. string.sub(to_binary(offset-1), 3) From f0dd31b0a399f16d5231217f6805edee6bf3c70d Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Wed, 19 Nov 2014 15:11:36 -0800 Subject: [PATCH 11/13] excise dead code, https://github.com/tessel/runtime/pull/645#discussion_r20481702 --- src/colony/lua/colony-node.lua | 1 - 1 file changed, 1 deletion(-) diff --git a/src/colony/lua/colony-node.lua b/src/colony/lua/colony-node.lua index cd98114e..a5559240 100644 --- a/src/colony/lua/colony-node.lua +++ b/src/colony/lua/colony-node.lua @@ -186,7 +186,6 @@ function from_base64(to_decode) local char = string.sub(to_decode, i, i) local offset, _ = string.find(index_table, char) if offset == nil then - --error(js_new(global.Error, "Invalid character '" .. tm.str_from_binary(char) .. "' found.")) return '' end From 2a8e465810cd09542a07103352e09549fa352958 Mon Sep 17 00:00:00 2001 From: Nathan Vander Wilt Date: Tue, 25 Nov 2014 16:01:43 -0800 Subject: [PATCH 12/13] have tm_utf16 support native [and big] endian as well as the LE the buffer methods need, this way https://github.com/tessel/runtime/pull/566 will be able to share our tm_str_to_utf16 implemenattion --- src/colony/lua_tm.c | 4 ++-- src/tm.h | 21 +++++++++++++++++---- src/tm_utf16.c | 14 ++++---------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c index 531df1a8..8d1872c1 100644 --- a/src/colony/lua_tm.c +++ b/src/colony/lua_tm.c @@ -971,7 +971,7 @@ static int l_tm_str_to_utf16le (lua_State* L) const uint8_t* raw; size_t str_len; const char* str = lua_tolstring(L, 1, &str_len); - size_t raw_len = tm_str_to_utf16le((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end + size_t raw_len = tm_str_to_utf16((const uint8_t*) str, str_len + 1, &raw, LE) - 1; // compensate for NUL byte at end lua_pushlstring(L, (const char*) raw, raw_len); if ((void*) raw != (void*) str) free((uint8_t*) raw); return 1; @@ -982,7 +982,7 @@ static int l_tm_str_from_utf16le (lua_State* L) const char* str; size_t raw_len; const char* raw = lua_tolstring(L, 1, &raw_len); - size_t str_len = tm_str_from_utf16le((const uint8_t*) raw, raw_len, (const uint8_t**) &str); + size_t str_len = tm_str_from_utf16((const uint8_t*) raw, raw_len, (const uint8_t**) &str, LE); lua_pushlstring(L, str, str_len); if ((void*) str != (void*) raw) free((char*) str); return 1; diff --git a/src/tm.h b/src/tm.h index 3a01c098..9dacab1a 100644 --- a/src/tm.h +++ b/src/tm.h @@ -196,14 +196,27 @@ uint32_t tm_uptime_micro (); double tm_timestamp (); int tm_timestamp_update (double millis); -// BUFFER + +// ENDIANNESS + +#include "order32.h" typedef enum { BE = 0, LE } tm_endian_t; -// UNICODE +#define TM_ENDIAN_HOST (O32_HOST_ORDER == O32_BIG_ENDIAN ? BE : LE) +#define TM_ENDIAN_SWAP64(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap64(x) : x) +#define TM_ENDIAN_SWAP32(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap32(x) : x) +#define TM_ENDIAN_SWAP16(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap16(x) : x) + +// BUFFER + +void tm_buffer_float_write (uint8_t* buf, size_t index, float value, tm_endian_t endianness); +void tm_buffer_double_write (uint8_t* buf, size_t index, double value, tm_endian_t endianness); + +// ENCODINGS (UNICODE / ASCII / BINARY) #define TM_UTF8_DECODE_ERROR UINT32_MAX size_t tm_utf8_decode(const uint8_t* buf, size_t buf_len, uint32_t* uc); @@ -211,8 +224,8 @@ size_t tm_utf8_encode(uint8_t* buf, size_t buf_len, uint32_t uc); size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); -size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); -size_t tm_str_from_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); +size_t tm_str_to_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness); +size_t tm_str_from_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness); size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); size_t tm_str_from_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr); diff --git a/src/tm_utf16.c b/src/tm_utf16.c index aa9fb8ee..d6c7b48e 100644 --- a/src/tm_utf16.c +++ b/src/tm_utf16.c @@ -5,9 +5,7 @@ // NOTE: Ideally these would deal with native uint16_t arrays, and have separate uint16_t<->uint8_t endian helper. // But it doesn't seem worth the extra pain and potential performance hit right now. -#define IS_BIG_ENDIAN 0 // TODO - -size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr) { +size_t tm_str_to_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t ** const dstptr, tm_endian_t endianness) { uint16_t* utf16 = calloc(buf_len, 2); // NOTE: we know utf16 will be this size or less size_t utf16_len = 0; @@ -17,18 +15,14 @@ size_t tm_str_to_utf16le (const uint8_t* buf, size_t buf_len, const uint8_t ** c buf_pos += tm_utf8_decode(buf + buf_pos, buf_len - buf_pos, &uchar); assert(uchar != TM_UTF8_DECODE_ERROR); // internal strings should never be malformed, 0xFFFD replacement increases length assert(uchar < 0x10000); // internal strings should only include BMP codepoints - #if IS_BIG_ENDIAN - utf16[utf16_len] = __builtin_bswap16((uint16_t) uchar); - #else - utf16[utf16_len] = (uint16_t) uchar; - #endif + utf16[utf16_len] = TM_ENDIAN_SWAP16(endianness, (uint16_t) uchar); utf16_len += 1; } *dstptr = (uint8_t*) utf16; return (utf16_len << 1) - 1; // include only single null *byte* (for consistency with others) } -size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr) { +size_t tm_str_from_utf16 (const uint8_t* _utf16, size_t _utf16_len, const uint8_t ** const dstptr, tm_endian_t endianness) { const uint16_t* utf16 = (const uint16_t*) _utf16; size_t utf16_len = _utf16_len >> 1; @@ -37,7 +31,7 @@ size_t tm_str_from_utf16le (const uint8_t* _utf16, size_t _utf16_len, const uint size_t buf_pos = 0; size_t utf16_pos = 0; while (utf16_pos < utf16_len) { - uint16_t uchar = utf16[utf16_pos]; + uint16_t uchar = TM_ENDIAN_SWAP16(endianness, utf16[utf16_pos]); buf_pos += tm_utf8_encode(buf + buf_pos, 3, uchar); utf16_pos += 1; } From 93f31fc6bf38fc9cda9a3a8a90c45064e38b72dd Mon Sep 17 00:00:00 2001 From: Tim Cameron Ryan Date: Mon, 15 Dec 2014 10:55:04 -0800 Subject: [PATCH 13/13] Restores compilation ability at -O0. Does not require HTTP to be UTF8-safe. --- src/colony/lua_http_parser.c | 8 ++++---- src/colony/lua_tm.c | 4 ++-- src/colony/modules/http.js | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/colony/lua_http_parser.c b/src/colony/lua_http_parser.c index 14111d8d..85f7e8dd 100644 --- a/src/colony/lua_http_parser.c +++ b/src/colony/lua_http_parser.c @@ -110,7 +110,7 @@ static int lhttp_parser_on_url(http_parser *p, const char *at, size_t length) { return 0; }; /* Push the string argument */ - lua_pushlstring(L, at, length); + colony_pushbuffer(L, (const uint8_t*) at, length); lua_call(L, 1, 1); @@ -132,7 +132,7 @@ static int lhttp_parser_on_header_field(http_parser *p, const char *at, size_t l return 0; }; /* Push the string argument */ - lua_pushlstring(L, at, length); + colony_pushbuffer(L, (const uint8_t*) at, length); lua_call(L, 1, 1); @@ -154,7 +154,7 @@ static int lhttp_parser_on_header_value(http_parser *p, const char *at, size_t l return 0; }; /* Push the string argument */ - lua_pushlstring(L, at, length); + colony_pushbuffer(L, (const uint8_t*) at, length); lua_call(L, 1, 1); @@ -176,7 +176,7 @@ static int lhttp_parser_on_body(http_parser *p, const char *at, size_t length) { return 0; }; /* Push the string argument */ - lua_pushlstring(L, at, length); + colony_pushbuffer(L, (const uint8_t*) at, length); lua_call(L, 1, 1); diff --git a/src/colony/lua_tm.c b/src/colony/lua_tm.c index 8d1872c1..a442e122 100644 --- a/src/colony/lua_tm.c +++ b/src/colony/lua_tm.c @@ -71,7 +71,7 @@ const char* colony_tolutf8 (lua_State* L, int index, size_t* res_len) return lua_tolstring(L, index, res_len); } -inline const char* colony_toutf8 (lua_State* L, int index) +const char* colony_toutf8 (lua_State* L, int index) { return colony_tolutf8(L, index, NULL); } @@ -85,7 +85,7 @@ void colony_pushlutf8 (lua_State* L, const char* utf8, size_t utf8_len) if (str != utf8) free((char*) str); } -inline void colony_pushutf8 (lua_State* L, const char* utf8) +void colony_pushutf8 (lua_State* L, const char* utf8) { colony_pushlutf8(L, utf8, strlen(utf8)); } diff --git a/src/colony/modules/http.js b/src/colony/modules/http.js index 8e3441fb..c9cb6b40 100644 --- a/src/colony/modules/http.js +++ b/src/colony/modules/http.js @@ -127,11 +127,13 @@ function IncomingMessage (type, socket) { self.url = url; }), onHeaderField: parserCallback(function (field) { + field = field.toString(); var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders; if (arr.length + 1 > self._maxRawHeaders) return; arr.push(field); }), onHeaderValue: parserCallback(function (value) { + value = value.toString(); var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders, key = arr[arr.length - 1].toLowerCase(); if (arr.length + 1 > self._maxRawHeaders) return;