Skip to content

Commit

Permalink
More UTF handling for streams
Browse files Browse the repository at this point in the history
  • Loading branch information
LunaTheFoxgirl committed Oct 9, 2024
1 parent c73e820 commit 225a693
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 23 deletions.
26 changes: 23 additions & 3 deletions source/numem/io/endian.d
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
module numem.io.endian;
import numem.core;
import numem.collections.vector;
import numem.string;
import std.traits : isNumeric, isIntegral, isBasicType;
import std.traits : Unqual;
import std.bitmanip;

@nogc nothrow:

Expand Down Expand Up @@ -53,13 +53,33 @@ private {
}
}

/**
Converts endianess of string
*/
@trusted
basic_string!(StringCharType!T) toEndian(T)(T str, Endianess endianess) if (isSomeSafeString!T) {
static if (StringCharSize!T > 1) {
if (endianess != NATIVE_ENDIAN) {

// Flip all the bytes around
basic_string!(StringCharType!T) tmp;
foreach(i, ref c; str) {
tmp ~= c.toEndianReinterpret(endianess);
}

return tmp;
}
}
return basic_string!(StringCharType!T)(str);
}

/**
Converts a value to an array of the specified endianness.
Is no-op if provided endianness is the same as the system's
*/
@trusted
ubyte[T.sizeof] toEndian(T)(T value, Endianess endianness) {
ubyte[T.sizeof] toEndian(T)(T value, Endianess endianess) if (isBasicType!T) {
union tmp {
Unqual!T value;
ubyte[T.sizeof] bytes;
Expand All @@ -69,7 +89,7 @@ ubyte[T.sizeof] toEndian(T)(T value, Endianess endianness) {
tmp_.value = value;

// Swap endianness if neccesary
if (endianness != NATIVE_ENDIAN) {
if (endianess != NATIVE_ENDIAN) {
ubyte[] slice = tmp_.bytes[0..$];
swapEndian(slice);
}
Expand Down
42 changes: 31 additions & 11 deletions source/numem/io/stream/reader.d
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,51 @@ public:

/// Ditto
@trusted
int read(T)(ref T val, size_t length) if (isSomeSafeString!T && StringCharSize!T == 1) {
if (length > val.length)
return -1;
int read(T)(ref T val, size_t length = 0) if (isSomeSafeString!T) {
import numem.text.unicode;

alias type = StringCharType!(T)*;
alias CharType = StringCharType!(T);

// User wants to read it all
if (length == 0)
length = val.length;

// TODO: Handle encoding schemes other than UTF8
// No out of bounds reads allowed.
if (length > val.length)
return -1;

// Size of a single unit
vector!ubyte tmp = vector!ubyte(length*StringCharSize!T);
// Vector to read bytes into.
vector!ubyte tmp = vector!ubyte(CharType.sizeof*length);

// Attempt reading data
int r = cast(int)stream.read(tmp, 0, length);
int r = cast(int)stream.read(tmp, 0, tmp.length);
if (r < 0)
return r;

// Reinterpret the data.
CharType[] reinterpreted = (cast(CharType*)tmp.ptr)[0..length];

static if (CharType.sizeof > 1) {

// If there's a BOM that will take precedence.
val = reinterpreted.toEndian(endian);
} else {

(cast(CharType*)val.ptr)[0..length] = reinterpreted[0..length];
}

// "Convert" the data via type punning.
(cast(type)val.ptr)[0..length] = (cast(type)tmp.ptr)[0..length];
return r;
}

/// Ditto
@trusted
int read(T)(ref T val, size_t length) if (isSomeVector!T) {
int read(T)(ref T val, size_t length = 0) if (isSomeVector!T) {

// User wants to read it all
if (length == 0)
length = val.length;

// No out of bounds reads allowed.
if (length > val.length)
return -1;

Expand Down
29 changes: 28 additions & 1 deletion source/numem/io/stream/writer.d
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ public:
/// Ditto
@trusted
void write(T)(T val) if (isSomeSafeString!T) {
stream.write(cast(ubyte[])val[0..$]);
static if (StringCharSize!T > 1) {
auto data = val.toEndian(endian);
stream.write((cast(ubyte*)data.ptr)[0..data.length*StringCharSize!T]);
} else {
stream.write(cast(ubyte[])val[0..$]);
}
}

/// Ditto
Expand Down Expand Up @@ -129,4 +134,26 @@ unittest {
ulong val;
reader.read!ulong(val);
assert(val == MAGIC);
}

@("RW: UTF-32")
unittest {
import numem.io.stream.memstream : MemoryStream;
import numem.io.stream.reader : StreamReader;
alias TestReader = StreamReader!(Endianess.bigEndian);
alias TestWriter = StreamWriter!(Endianess.bigEndian);

ubyte[128] buffer;
auto stream = new MemoryStream(buffer.ptr, buffer.length);
auto writer = new TestWriter(stream);
auto reader = new TestReader(stream);

enum MAGIC = "Hello, world!"d;
ndstring val = ndstring(MAGIC.length);

writer.write(MAGIC);
stream.seek(0);

reader.read(val);
assert(val == MAGIC);
}
10 changes: 6 additions & 4 deletions source/numem/text/unicode/utf16.d
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,19 @@ unittest {
/**
Returns a string which is [str] converted to machine order.
If the string has no BOM it is assumed it's already in
machine order.
If the string has no BOM the specified fallback endian will be used.
*/
nwstring toMachineOrder(inout(wchar)[] str) {
nwstring toMachineOrder(inout(wchar)[] str, Endianess fallbackEndian = NATIVE_ENDIAN) {

if (str.length == 0)
return nwstring.init;

codepoint bom = getBOM(str);
Endianess endian = getEndianFromBOM(bom);
if (bom != 0 && endian != NATIVE_ENDIAN) {
if (bom == 0)
endian = fallbackEndian;

if (endian != NATIVE_ENDIAN) {

// Flip all the bytes around.
nwstring tmp;
Expand Down
10 changes: 6 additions & 4 deletions source/numem/text/unicode/utf32.d
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,20 @@ codepoint getBOM(inout(dchar)[] str) {
/**
Returns a string which is [str] converted to machine order.
If the string has no BOM it is assumed it's already in
machine order.
If the string has no BOM the specified fallback endian will be used.
*/
ndstring toMachineOrder(inout(dchar)[] str) {
ndstring toMachineOrder(inout(dchar)[] str, Endianess fallbackEndian = NATIVE_ENDIAN) {

// Empty string early escape.
if (str.length == 0)
return ndstring.init;

codepoint bom = getBOM(str);
Endianess endian = getEndianFromBOM(bom);
if (bom != 0 && endian != NATIVE_ENDIAN) {
if (bom == 0)
endian = fallbackEndian;

if (endian != NATIVE_ENDIAN) {

// Flip all the bytes around
ndstring tmp;
Expand Down

0 comments on commit 225a693

Please sign in to comment.