Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion config/libtm.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,9 @@
'<(runtime_path)/tm_random.c',
'<(runtime_path)/tm_deflate.c',
'<(runtime_path)/tm_str.c',
'<(runtime_path)/tm_utf8.c'
'<(runtime_path)/tm_utf7.c',
'<(runtime_path)/tm_utf8.c',
'<(runtime_path)/tm_utf16.c',
],
"include_dirs": [
'<(runtime_path)/',
Expand Down
112 changes: 55 additions & 57 deletions src/colony/lua/colony-node.lua
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ function from_base64(to_decode)
local char = string.sub(to_decode, i, i)
local offset, _ = string.find(index_table, char)
if offset == nil then
error(js_new(global.Error, "Invalid character '" .. char .. "' found."))
return ''
end

bit_pattern = bit_pattern .. string.sub(to_binary(offset-1), 3)
Expand Down Expand Up @@ -298,32 +298,24 @@ local buffer_proto = js_obj({
if encoding == nil then
encoding = 'utf8'
end
encoding = string.lower(encoding);
encoding = string.lower(encoding)

local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset);

local buf = tm.buffer_tobytestring(getmetatable(this).buffer, offset, endOffset)
if encoding == 'binary' then
return string.gsub(buf, '[\128-\255]', function (c)
-- original value must be converted to internal encoding
return global.String.fromCharCode(nil, string.byte(c))
end)
return tm.str_from_binary(buf);
elseif encoding == 'ascii' then
-- simply strips high bit from original value
return string.gsub(buf, '[\128-\255]', function (c)
return string.char(string.byte(c) - 128)
end)
return tm.str_from_ascii(buf);
elseif encoding == 'utf8' or encoding == 'utf-8' then
return tm.str_from_utf8(buf);
elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
return tm.str_from_utf16le(buf);
elseif encoding == 'base64' then
return to_base64(buf);
return tm.str_from_binary(to_base64(buf));
elseif encoding == 'hex' then
local str = string.gsub(buf, '(.)', function (c)
return string.format('%02x', string.byte(c));
end)
return str;
elseif encoding == 'ucs2' or encoding == 'ucs-2'
or encoding == 'utf16le' or encoding == 'utf-16le' then
return error(js_new(global.NotImplementedError, 'Encoding not implemented yet: ' + encoding));
else
error(js_new(global.TypeError, 'Unknown encoding: ' + encoding));
end
Expand Down Expand Up @@ -493,56 +485,62 @@ function _of_buffer (this, buf, length)
end

local function Buffer (this, arg, encoding)
-- args
local str, length = '', 0
if type(arg) == 'number' then
length = tonumber(arg)
elseif type(arg) == 'string' then
str = arg
length = #arg
else
str = arg or ''
length = arg and arg.length or 0
if encoding == nil then
encoding = 'utf8'
end

-- encoding first check
if type(str) == 'string' and encoding == 'base64' then
-- "base64" string
str = from_base64(str)
length = string.len(str)
elseif type(str) == 'string' and encoding == 'hex' then
if string.len(str) % 2 ~= 0 then
encoding = string.lower(encoding)

local raw, arr, hex, size
if type(arg) == 'number' then
size = arg
elseif type(arg) ~= 'string' then
-- assume an array
arr = arg
elseif encoding == 'binary' then
raw = tm.str_to_binary(arg)
elseif encoding == 'ascii' then
raw = tm.str_to_ascii(arg)
elseif encoding == 'utf8' or encoding == 'utf-8' then
raw = tm.str_to_utf8(arg)
elseif encoding == 'ucs2' or encoding == 'ucs-2' or encoding == 'utf16le' or encoding == 'utf-16le' then
raw = tm.str_to_utf16le(arg)
elseif encoding == 'base64' then
raw = from_base64(tm.str_to_binary(arg))
elseif encoding == 'hex' then
if string.len(arg) % 2 ~= 0 then
error(js_new(global.TypeError, 'Invalid hex string.'))
end
-- Remove first occurrance of invalid char until end of string
str = string.lower(string.gsub(str, '[^a-fA-F0-9].*', ''))
length = string.len(str) / 2
hex = string.lower(string.gsub(arg, '[^a-fA-F0-9].*', ''))
else
error(js_new(global.TypeError, 'Unknown encoding: ' + encoding));
end

if type(size) == 'number' then
-- all set
elseif arr then
size = arr.length
elseif hex then
size = #hex / 2
else
size = #raw
end

this = {}
local buf = tm.buffer_create(length)
_of_buffer(this, buf, length)

-- Lua internally uses a "binary" encoding, that is,
-- operates on (1-indexable) 8-bit values.

if type(str) == 'string' and encoding == 'hex' then
-- "hex" string
for i = 1, #str, 2 do
this[(i - 1)/2] = tonumber(string.sub(str, i, i+1), 16)
local buf = tm.buffer_create(size)
_of_buffer(this, buf, size)
if arr then
for i = 1, size do
this[i - 1] = arr[i - 1]
end
elseif type(str) == 'string' then
-- "binary" string
for i = 1, #str do
this[i - 1] = string.byte(str, i)
elseif hex then
for i = 1, #hex, 2 do
this[(i - 1)/2] = tonumber(string.sub(hex, i, i+1), 16)
end
else
-- array
for i = 1, str.length do
this[i - 1] = str[i - 1]
elseif raw then
for i = 1, size do
this[i - 1] = string.byte(raw, i)
end
end

return this
end

Expand Down
8 changes: 4 additions & 4 deletions src/colony/lua_http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ static int lhttp_parser_on_url(http_parser *p, const char *at, size_t length) {
return 0;
};
/* Push the string argument */
lua_pushlstring(L, at, length);
colony_pushbuffer(L, (const uint8_t*) at, length);

lua_call(L, 1, 1);

Expand All @@ -132,7 +132,7 @@ static int lhttp_parser_on_header_field(http_parser *p, const char *at, size_t l
return 0;
};
/* Push the string argument */
lua_pushlstring(L, at, length);
colony_pushbuffer(L, (const uint8_t*) at, length);

lua_call(L, 1, 1);

Expand All @@ -154,7 +154,7 @@ static int lhttp_parser_on_header_value(http_parser *p, const char *at, size_t l
return 0;
};
/* Push the string argument */
lua_pushlstring(L, at, length);
colony_pushbuffer(L, (const uint8_t*) at, length);

lua_call(L, 1, 1);

Expand All @@ -176,7 +176,7 @@ static int lhttp_parser_on_body(http_parser *p, const char *at, size_t length) {
return 0;
};
/* Push the string argument */
lua_pushlstring(L, at, length);
colony_pushbuffer(L, (const uint8_t*) at, length);

lua_call(L, 1, 1);

Expand Down
91 changes: 81 additions & 10 deletions src/colony/lua_tm.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ const char* colony_tolutf8 (lua_State* L, int index, size_t* res_len)
return lua_tolstring(L, index, res_len);
}

inline const char* colony_toutf8 (lua_State* L, int index)
const char* colony_toutf8 (lua_State* L, int index)
{
return colony_tolutf8(L, index, NULL);
}
Expand All @@ -85,7 +85,7 @@ void colony_pushlutf8 (lua_State* L, const char* utf8, size_t utf8_len)
if (str != utf8) free((char*) str);
}

inline void colony_pushutf8 (lua_State* L, const char* utf8)
void colony_pushutf8 (lua_State* L, const char* utf8)
{
colony_pushlutf8(L, utf8, strlen(utf8));
}
Expand Down Expand Up @@ -922,20 +922,85 @@ static int l_tm_fs_dir_close (lua_State* L)
return 1;
}

static int l_tm_str_to_ascii (lua_State* L)
{
const uint8_t* raw;
size_t str_len;
const char* str = lua_tolstring(L, 1, &str_len);
size_t raw_len = tm_str_to_ascii((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end
lua_pushlstring(L, (const char*) raw, raw_len);
if ((void*) raw != (void*) str) free((uint8_t*) raw);
return 1;
}

static int l_tm_str_from_ascii (lua_State* L)
{
const char* str;
size_t raw_len;
const char* raw = lua_tolstring(L, 1, &raw_len);
size_t str_len = tm_str_from_ascii((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
lua_pushlstring(L, str, str_len);
if ((void*) str != (void*) raw) free((char*) str);
return 1;
}

static int l_tm_str_to_binary (lua_State* L)
{
const uint8_t* raw;
size_t str_len;
const char* str = lua_tolstring(L, 1, &str_len);
size_t raw_len = tm_str_to_binary((const uint8_t*) str, str_len + 1, &raw) - 1; // compensate for NUL byte at end
lua_pushlstring(L, (const char*) raw, raw_len);
if ((void*) raw != (void*) str) free((uint8_t*) raw);
return 1;
}

static int l_tm_str_from_binary (lua_State* L)
{
const char* str;
size_t raw_len;
const char* raw = lua_tolstring(L, 1, &raw_len);
size_t str_len = tm_str_from_binary((const uint8_t*) raw, raw_len, (const uint8_t**) &str);
lua_pushlstring(L, str, str_len);
if ((void*) str != (void*) raw) free((char*) str);
return 1;
}

static int l_tm_str_to_utf16le (lua_State* L)
{
const uint8_t* raw;
size_t str_len;
const char* str = lua_tolstring(L, 1, &str_len);
size_t raw_len = tm_str_to_utf16((const uint8_t*) str, str_len + 1, &raw, LE) - 1; // compensate for NUL byte at end
lua_pushlstring(L, (const char*) raw, raw_len);
if ((void*) raw != (void*) str) free((uint8_t*) raw);
return 1;
}

static int l_tm_str_from_utf16le (lua_State* L)
{
const char* str;
size_t raw_len;
const char* raw = lua_tolstring(L, 1, &raw_len);
size_t str_len = tm_str_from_utf16((const uint8_t*) raw, raw_len, (const uint8_t**) &str, LE);
lua_pushlstring(L, str, str_len);
if ((void*) str != (void*) raw) free((char*) str);
return 1;
}

static int l_tm_str_to_utf8 (lua_State* L)
{
size_t utf8_len;
const char* utf8 = colony_tolutf8(L, 1, &utf8_len);
lua_pushlstring(L, utf8, utf8_len);
size_t raw_len;
const char* raw = colony_tolutf8(L, 1, &raw_len);
lua_pushlstring(L, raw, raw_len);
return 1;
}

static int l_tm_str_from_utf8 (lua_State* L)
{
size_t utf8_len;
const char* utf8 = lua_tolstring(L, 1, &utf8_len);
colony_pushlutf8(L, utf8, utf8_len);
size_t raw_len;
const char* raw = lua_tolstring(L, 1, &raw_len);
colony_pushlutf8(L, raw, raw_len);
return 1;
}

Expand Down Expand Up @@ -1389,10 +1454,16 @@ LUALIB_API int luaopen_tm (lua_State *L)
{ "fs_dir_read", l_tm_fs_dir_read },
{ "fs_dir_close", l_tm_fs_dir_close },

// unicode
// encodings
{ "str_to_utf8", l_tm_str_to_utf8 },
{ "str_from_utf8", l_tm_str_from_utf8 },

{ "str_to_utf16le", l_tm_str_to_utf16le },
{ "str_from_utf16le", l_tm_str_from_utf16le },
{ "str_to_binary", l_tm_str_to_binary },
{ "str_from_binary", l_tm_str_from_binary },
{ "str_to_ascii", l_tm_str_to_ascii },
{ "str_from_ascii", l_tm_str_from_ascii },

// internal string manipulation
{ "str_codeat", l_tm_str_codeat },
{ "str_fromcode", l_tm_str_fromcode },
Expand Down
2 changes: 2 additions & 0 deletions src/colony/modules/http.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,13 @@ function IncomingMessage (type, socket) {
self.url = url;
}),
onHeaderField: parserCallback(function (field) {
field = field.toString();
var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders;
if (arr.length + 1 > self._maxRawHeaders) return;
arr.push(field);
}),
onHeaderValue: parserCallback(function (value) {
value = value.toString();
var arr = (self._headersComplete) ? self.rawTrailers : self.rawHeaders,
key = arr[arr.length - 1].toLowerCase();
if (arr.length + 1 > self._maxRawHeaders) return;
Expand Down
27 changes: 25 additions & 2 deletions src/tm.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,21 +196,44 @@ uint32_t tm_uptime_micro ();
double tm_timestamp ();
int tm_timestamp_update (double millis);

// BUFFER

// ENDIANNESS

#include "order32.h"

typedef enum {
BE = 0,
LE
} tm_endian_t;

// UNICODE
#define TM_ENDIAN_HOST (O32_HOST_ORDER == O32_BIG_ENDIAN ? BE : LE)
#define TM_ENDIAN_SWAP64(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap64(x) : x)
#define TM_ENDIAN_SWAP32(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap32(x) : x)
#define TM_ENDIAN_SWAP16(e, x) ((e != TM_ENDIAN_HOST) ? __builtin_bswap16(x) : x)

// BUFFER

void tm_buffer_float_write (uint8_t* buf, size_t index, float value, tm_endian_t endianness);
void tm_buffer_double_write (uint8_t* buf, size_t index, double value, tm_endian_t endianness);

// ENCODINGS (UNICODE / ASCII / BINARY)

#define TM_UTF8_DECODE_ERROR UINT32_MAX
size_t tm_utf8_decode(const uint8_t* buf, size_t buf_len, uint32_t* uc);
size_t tm_utf8_encode(uint8_t* buf, size_t buf_len, uint32_t uc);
size_t tm_str_to_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
size_t tm_str_from_utf8 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);

size_t tm_str_to_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness);
size_t tm_str_from_utf16 (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr, tm_endian_t endianness);

size_t tm_str_to_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
size_t tm_str_from_ascii (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);

size_t tm_str_to_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);
size_t tm_str_from_binary (const uint8_t* buf, size_t buf_len, const uint8_t **dstptr);


// INTERNAL STRING MANIPULATION

uint32_t tm_str_codeat (const uint8_t* buf, size_t buf_len, size_t index);
Expand Down
Loading