From 001e5a6b0f282196208ba908fa0850ca8b5778ff Mon Sep 17 00:00:00 2001
From: Luna <cli-luna@protonmail.com>
Date: Wed, 9 Oct 2024 11:01:11 +0200
Subject: [PATCH] Add UTF-32, BOM and string conversion support

---
 source/numem/conv.d                 |   8 +-
 source/numem/format.d               |  19 +--
 source/numem/io/endian.d            |  22 +++-
 source/numem/string.d               |  67 +++++++++-
 source/numem/text/encoding.d        | 101 +++++++++++++++
 source/numem/text/package.d         |   5 +-
 source/numem/text/unicode/package.d |  73 ++++++++++-
 source/numem/text/unicode/utf16.d   | 184 ++++++++++++++++++++++++++--
 source/numem/text/unicode/utf32.d   | 122 ++++++++++++++++++
 source/numem/text/unicode/utf8.d    |  35 ++++--
 10 files changed, 591 insertions(+), 45 deletions(-)
 create mode 100644 source/numem/text/encoding.d
 create mode 100644 source/numem/text/unicode/utf32.d

diff --git a/source/numem/conv.d b/source/numem/conv.d
index 76651de..a18432a 100644
--- a/source/numem/conv.d
+++ b/source/numem/conv.d
@@ -9,11 +9,9 @@
     Utilities for converting between some basic types
 */
 module numem.conv;
-import numem.all;
 import core.stdc.stdlib;
 import std.traits;
-import numem.core.exception;
-import numem.format;
+import numem.all;
 
 @nogc:
 
@@ -148,6 +146,10 @@ nstring toString(T)(T item) if (__traits(hasMember, T, "toNString")) {
     return item.toNString();
 }
 
+nwstring toUTF16(T)(ref auto T str) if (isSomeString!T) {
+
+}
+
 @("toString")
 unittest {
     assert((32u).toString() == "32");
diff --git a/source/numem/format.d b/source/numem/format.d
index 1f8e422..4acc4f7 100644
--- a/source/numem/format.d
+++ b/source/numem/format.d
@@ -4,27 +4,14 @@ import numem.text.ascii;
 import numem.conv;
 import numem.collections;
 
-import std.traits;
+import std.traits : isBasicType;
 
 private {
-    enum CanConvertToNString(T) =
-        __traits(hasMember, T, "toNString") &&
-        is(T.toNString : nstring function()) &&
-        hasUDA(T.toNString, nogc);
-
-    enum CanConvertToDString(T) =
-        __traits(hasMember, T, "toString") &&
-        is(T.toNString : string function()) &&
-        hasUDA(T.toNString, nogc);
-
     nstring _formatSingle(T)(T element) {
-        static if(CanConvertToNString!T) {
-
-            return element.toNString();
-        } else static if(CanConvertToDString!T) {
+        static if(isStringable!T) {
 
             return nstring(element.toString());
-        } else static if (is(T : string)) {
+        } else static if (isSomeString!T) {
 
             return nstring(element);
         } else static if (isBasicType!T) {
diff --git a/source/numem/io/endian.d b/source/numem/io/endian.d
index 8b33564..45206ed 100644
--- a/source/numem/io/endian.d
+++ b/source/numem/io/endian.d
@@ -56,7 +56,7 @@ private {
 
     Is no-op if provided endianness is the same as the system's
 */
-ubyte[T.sizeof] toEndian(T)(T value, Endianess endianness) if (isNumeric!T) {
+ubyte[T.sizeof] toEndian(T)(T value, Endianess endianness) {
 
     // Get bytes from value
     ubyte[T.sizeof] output;
@@ -74,6 +74,26 @@ ubyte[T.sizeof] toEndian(T)(T value, Endianess endianness) if (isNumeric!T) {
     return output;
 }
 
+/**
+    Flips the bytes in the provided value to be in the specified endianness.
+
+    Is no-op if provided endianness is the same as the system's
+*/
+T toEndianReinterpret(T)(T in_, Endianess endianness) {
+    if (endianness != NATIVE_ENDIAN) {
+        union tmp {
+            T value;
+            ubyte[T.sizeof] bytes;
+        }
+
+        tmp toConvert;
+        toConvert.bytes = toEndian!T(in_, endianness);
+        return toConvert.value;
+    }
+
+    return in_;
+}
+
 /**
     Gets a value from a different endianness.
 
diff --git a/source/numem/string.d b/source/numem/string.d
index d5c884e..36fbc2b 100644
--- a/source/numem/string.d
+++ b/source/numem/string.d
@@ -8,23 +8,67 @@ module numem.string;
 import numem.collections.vector;
 import numem.core;
 import std.string;
+import std.traits;
+import core.stdcpp.string;
+
+/// Gets whether the provided type is some type of string.
+enum isSomeString(T) =
+    isSomeSafeString!T ||
+    isSomeCString!T;
+
+/**
+    Gets whether the provided type is some type of string
+    which is length denoted and therefore "safe"
+*/
+enum isSomeSafeString(T) =
+    isSomeNString!T ||
+    isSomeDString!T;
+
 
 /// Gets whether the provided type is some type of nstring.
 enum isSomeNString(T) = 
-    is(T == nstring) || is (T == nwstring) || is(T == ndstring);
+    is(inout(T) == inout(basic_string!C), C) && isSomeChar!C;
 
 /// Gets whether the provided type is some type of null terminated C string.
 enum isSomeCString(T) =
-    is(T : inout(char)*) || is(T : inout(wchar)*)|| is(T : inout(dchar)*);
+    is(inout(T) == inout(C)*, C) && isSomeChar!C;
 
 /// Gets whether the provided type is some type of D string slice.
 enum isSomeDString(T) =
-    is(T : inout(char)[]) || is(T : inout(wchar)[])|| is(T : inout(dchar)[]);
+    is(immutable(T) == immutable(C[]), C) && isSomeChar!C;
 
 /// Gets whether the provided type is a character
 enum isSomeChar(T) =
     is(T == char) || is(T == wchar) || is(T == dchar);
 
+/**
+    Gets whether [T] is convertible to any form of [nstring]
+*/
+enum isStringable(T) = 
+    __traits(hasMember, T, "toString") &&
+    isSomeString!(ReturnType!(T.toString)) &&
+    hasUDA(T.toString, nogc);
+
+/**
+    Gets the size of the element in a string-ish type in bytes.
+*/
+enum StringCharSize(T) =
+    StringCharType!T.sizeof;
+
+/**
+    Gets the type of the element in a string-ish type.
+*/
+template StringCharType(T) {
+    static if (isSomeString!T) {
+        static if(isSomeNString!T)
+            alias StringCharType = T.valueType;
+        else
+            alias StringCharType = typeof(T.init[0].init);
+    } else {
+        alias StringCharType = void;
+    }
+}
+
 /**
     Basic string type.
 
@@ -98,6 +142,15 @@ public:
         this.set_(text);
     }
 
+    /**
+        Creates a string from a string with a different
+        encoding.
+    */
+    this(T)(ref auto T rhs) if (isSomeSafeString!T) {
+        import numem.text.unicode : decode, encode;
+        this = encode!selfType(decode!T(rhs));
+    }
+
     /**
         Makes a copy of a string
     */
@@ -521,4 +574,12 @@ unittest {
     assert(struct_[1].str == "b");
 
     assert(copy.size() == 0);
+}
+
+@("string: encoding-conversion")
+unittest {
+    nwstring wstr = "Hello, world!"w;
+    nstring str = wstr;
+
+    assert(str == "Hello, world!");
 }
\ No newline at end of file
diff --git a/source/numem/text/encoding.d b/source/numem/text/encoding.d
new file mode 100644
index 0000000..1d56dc6
--- /dev/null
+++ b/source/numem/text/encoding.d
@@ -0,0 +1,101 @@
+module numem.text.encoding;
+import numem.string;
+import numem.text.ascii;
+import numem.text.unicode;
+import numem.text.unicode.utf8;
+import numem.text.unicode.utf16;
+
+/**
+    Currently supported encodings
+*/
+enum Encoding {
+
+    /**
+        Unknown encoding
+    */
+    unknown,
+
+    /**
+        ASCII
+    */
+    ascii,
+    
+    /**
+        UTF-8
+    */
+    utf8,
+
+    /**
+        UTF-16
+    */
+    utf16,
+
+    /**
+        UTF-16 w/ BOM
+    */
+    utf16LE,
+
+    /**
+        UTF-16 w/ BOM
+    */
+    utf16BE,
+
+    /**
+    
+    */
+    utf32
+}
+
+/**
+    Gets the encoding of a run of text.
+*/
+Encoding getEncoding(T)(auto ref T str) @nogc if (isSomeString!T) {
+    static if (StringCharSize!T == 1) {
+        nstring nstr = str;
+
+        foreach(char c; str[]) {
+            if (!isASCII(c)) {
+                if (validate(nstr))
+                    return Encoding.utf8;
+                else
+                    return Encoding.unknown;
+            }
+        }
+        return Encoding.ascii;
+
+    } else static if (StringCharSize!T == 2) {
+        
+        nwstring nstr = str;
+        auto bom = getBOM(nstr);
+        if (bom != 0) {
+            return bom == 0x0000FEFF ? 
+                Encoding.utf16BE : 
+                Encoding.utf16LE;
+        } else if (validate(nstr)) {
+
+            return Encoding.utf16;
+        }
+        return Encoding.unknown;
+
+    } else static if (StringCharSize!T == 4) {
+
+        return validate(str) ? 
+            Encoding.utf32 : 
+            Encoding.unknown;
+    } else {
+
+        return Encoding.unknown;
+    }
+} 
+
+@("Get encoding")
+unittest {
+    import std.stdio : writeln;
+
+    assert("Hello, world!".getEncoding() == Encoding.ascii);
+    assert("あえおう".getEncoding() == Encoding.utf8);
+
+    assert("Hello, world!"w.getEncoding() == Encoding.utf16);
+    assert("\uFEFFHello, world!"w.getEncoding() == Encoding.utf16BE);
+    assert("\uFFFEHello, world!"w.getEncoding() == Encoding.utf16LE);
+}
\ No newline at end of file
diff --git a/source/numem/text/package.d b/source/numem/text/package.d
index a7bf380..967ecdf 100644
--- a/source/numem/text/package.d
+++ b/source/numem/text/package.d
@@ -8,4 +8,7 @@
 /**
     Numem text transformation utilities
 */
-module numem.text;
\ No newline at end of file
+module numem.text;
+
+public import numem.text.encoding;
+public import numem.text.ascii;
\ No newline at end of file
diff --git a/source/numem/text/unicode/package.d b/source/numem/text/unicode/package.d
index 96f9b48..5d6e2b6 100644
--- a/source/numem/text/unicode/package.d
+++ b/source/numem/text/unicode/package.d
@@ -5,8 +5,19 @@
     Authors: Luna the Foxgirl
 */
 
-module numem.unicode;
+module numem.text.unicode;
 import numem.collections.vector;
+import numem.io.endian;
+import numem.string;
+
+public import numem.text.unicode.utf8;
+public import numem.text.unicode.utf16;
+public import numem.text.unicode.utf32;
+
+// For encoding dispatch
+import utf8 = numem.text.unicode.utf8;
+import utf16 = numem.text.unicode.utf16;
+import utf32 = numem.text.unicode.utf32;
 
 @nogc nothrow:
 
@@ -29,6 +40,66 @@ bool hasSurrogatePairs(codepoint code) {
     return (code >= 0x0000D800 && code <= 0x0000DFFF);
 }
 
+/**
+    Gets whether the character is a BOM
+*/
+bool isBOM(codepoint c) {
+    return isLittleEndianBOM(c) || isBigEndianBOM(c); 
+}
+
+/**
+    Gets whether the byte order mark is little endian
+*/
+pragma(inline, true)
+bool isLittleEndianBOM(codepoint c) {
+    return (c == 0xFFFE0000 || c == 0x0000FFFE);
+}
+
+/**
+    Gets whether the byte order mark is big endian
+*/
+pragma(inline, true)
+bool isBigEndianBOM(codepoint c) {
+    return (c == 0xFEFF0000 || c == 0x0000FEFF);
+}
+
+/**
+    Gets the endianess from a BOM
+*/
+Endianess getEndianFromBOM(codepoint c) {
+    return isBigEndianBOM(c) ? 
+        Endianess.bigEndian : 
+        Endianess.littleEndian;
+}
+
+/**
+    Decodes a string
+*/
+UnicodeSequence decode(T)(ref auto T str) if (isSomeSafeString!T) {
+    static if (StringCharSize!T == 1)
+        return utf8.decode(str);
+    static if (StringCharSize!T == 2)
+        return utf16.decode(str);
+    static if (StringCharSize!T == 4)
+        return utf32.decode(str);
+    else
+        assert(0, "String type not supported.");
+}
+
+/**
+    Encodes a string
+*/
+T encode(T)(ref auto UnicodeSequence seq) if (isSomeNString!T) {
+    static if (StringCharSize!T == 1)
+        return utf8.encode(seq);
+    static if (StringCharSize!T == 2)
+        return utf16.encode(seq);
+    static if (StringCharSize!T == 4)
+        return utf32.encode(seq);
+    else
+        assert(0, "String type not supported.");
+}
+
 /**
     Validates whether the codepoint is within spec
 */
diff --git a/source/numem/text/unicode/utf16.d b/source/numem/text/unicode/utf16.d
index 0740545..6afb964 100644
--- a/source/numem/text/unicode/utf16.d
+++ b/source/numem/text/unicode/utf16.d
@@ -5,10 +5,12 @@
     Authors: Luna the Foxgirl
 */
 
-module numem.unicode.utf16;
-import numem.unicode;
+module numem.text.unicode.utf16;
+import numem.text.unicode.utf32;
+import numem.text.unicode;
 import numem.collections.vector;
 import numem.string;
+import numem.io.endian;
 
 nothrow @nogc:
 
@@ -36,6 +38,82 @@ bool validate(wchar[2] c) {
         ((c[0] & utf16_smask) == utf16_lead && ((c[1] & utf16_smask) == utf16_trail));
 }
 
+/**
+    Validates whether the given nwstring is a valid UTF-16 string.
+
+    This function assumes that the string is in machine-native
+    endianess.
+*/
+bool validate(nwstring str) {
+    return validate(str[]);
+}
+
+
+/**
+    Validates whether the given nwstring is a valid UTF-16 string.
+
+    This function assumes that the string is in machine-native
+    endianess.
+*/
+bool validate(inout(wchar)[] str) {
+    nwstring tmp = str;
+
+    // Handle endianess.
+    codepoint bom = getBOM(str);
+    if (bom != 0 && getEndianFromBOM(bom) != NATIVE_ENDIAN) {
+        tmp = toMachineOrder(str);
+    }
+
+    size_t i = 0;
+    while(i < tmp.length) {
+        wchar[2] txt;
+
+        // Validate length
+        size_t clen = getLength(tmp[i]);
+        if (clen >= i+tmp.length) return false;
+        if (clen == 0) return false;
+
+        txt[0..clen] = tmp[i..i+clen];
+        if (!validate(txt)) return false;
+
+        i += clen;
+    }
+
+    return true;
+}
+
+/**
+    Gets the BOM of the nwstring if it has one.
+
+    Otherwise returns a NUL character.
+*/
+codepoint getBOM(inout(wchar)[] str) {
+    if (str.length == 0) 
+        return 0;
+
+    union tmp {
+        wchar c;
+        ubyte[2] bytes;
+    }
+    tmp tmp_;
+    tmp_.c = str[0];
+
+    if (isBOM(cast(codepoint)tmp_.c)) {
+        return cast(codepoint)tmp_.c;
+    }
+
+    return 0;
+}
+
+/**
+    Gets the BOM of the nwstring if it has one.
+
+    Otherwise returns a NUL character.
+*/
+codepoint getBOM(nwstring str) {
+    return getBOM(str[]);
+}
+
 /**
     Gets how many utf-16 units are in the specified character
 */
@@ -71,10 +149,52 @@ unittest {
 }
 
 /**
-    Decodes a single utf-16 character
+    Returns a string which is [str] converted to machine order.
+
+    If the string has no BOM it is assumed it's already in
+    machine order.
+*/
+nwstring toMachineOrder(inout(wchar)[] str) {
+
+    if (str.length == 0)
+        return nwstring.init;
+
+    codepoint bom = getBOM(str);
+    Endianess endian = getEndianFromBOM(bom);
+    if (bom != 0 && endian != NATIVE_ENDIAN) {
+
+        // Flip all the bytes around.
+        nwstring tmp;
+        foreach(i, ref const(wchar) c; str) {
+            tmp ~= c.toEndianReinterpret(endian);
+        }
+        return tmp;
+    }
+
+    // Already local order.
+    return nwstring(str);
+}
+
+/**
+    Returns a string which is [str] converted to machine order.
+
+    If the string has no BOM it is assumed it's already in
+    machine order.
+*/
+nwstring toMachineOrder(nwstring str) {
+    return toMachineOrder(str[]);
+}
+
+/**
+    Decodes a single utf-16 character,
+
+    Character is assumed to be in the same
+    endianness as the system!
 */
 codepoint decode(wchar[2] chr, ref size_t read) {
+    // Handle endianness
     read = chr[0].getLength();
+    
     switch(read) {
         default:
             read = 1;
@@ -93,18 +213,59 @@ codepoint decode(wchar[2] chr, ref size_t read) {
 }
 
 /**
-    Decodes a utf-16 string
+    Decodes a single utf-16 character from a 
+    nwstring.
 */
-UnicodeSequence decode(nwstring str) {
+codepoint decodeOne(nwstring str, size_t offset = 0) {
+    if (str.length == 0) 
+        return unicodeReplacementCharacter;
+
+    // Gets the string in the current machine order.
+    str = str.toMachineOrder();
+
+    // Get length of first character.
+    size_t read = getLength(str[0]);
+    size_t i;
+    while(i < offset++) {
+
+        // We're out of characters to read.
+        if (read > str.length)
+            return unicodeReplacementCharacter;
+
+        read = getLength(str[read]);
+    }
+    
+    // Decode to UTF-32 to avoid duplication
+    // of effort.
+    wchar[2] tmp;
+    tmp[0..read] = str[0..read];
+    return decode(tmp, read);
+}
+
+/**
+    Decodes a UTF-16 string.
+
+    This function will automatically detect BOMs
+    and handle endianness where applicable.
+*/
+UnicodeSequence decode(nwstring str, bool stripBOM = false) {
     UnicodeSequence code;
 
+    // Gets the string in the current machine order.
+    nwstring tmp = str.toMachineOrder();
     size_t i = 0;
-    while(i < str.size()) {
+
+    // Strip BOM if there is one.
+    if (stripBOM && getBOM(tmp)) {
+        i++;
+    }
+
+    while(i < tmp.size()) {
         wchar[2] txt;
 
         // Validate length, add FFFD if invalid.
-        size_t clen = str[i].getLength();
-        if (clen >= i+str.size() || clen == 0) {
+        size_t clen = tmp[i].getLength();
+        if (clen >= i+tmp.size() || clen == 0) {
             code ~= unicodeReplacementCharacter;
             i++;
         }
@@ -128,9 +289,14 @@ unittest {
 /**
     Encodes a unicode sequence to UTF-16
 */
-nwstring encode(UnicodeSlice slice) {
+nwstring encode(UnicodeSlice slice, bool addBOM = false) {
     nwstring out_;
 
+    // Add BOM if requested.
+    if (addBOM) {
+        out_ ~= cast(wchar)0xFEFF;
+    }
+
     size_t i = 0;
     while(i < slice.length) {
         wchar[2] txt;
diff --git a/source/numem/text/unicode/utf32.d b/source/numem/text/unicode/utf32.d
new file mode 100644
index 0000000..fc521f3
--- /dev/null
+++ b/source/numem/text/unicode/utf32.d
@@ -0,0 +1,122 @@
+/*
+    Copyright © 2024, Inochi2D Project
+    Distributed under the 2-Clause BSD License, see LICENSE file.
+
+    Authors: Luna the Foxgirl
+*/
+
+module numem.text.unicode.utf32;
+import numem.text.unicode;
+import numem.string;
+import numem.io.endian;
+
+@nogc nothrow:
+
+/**
+    Validates a UTF32 codepoint
+*/
+bool validate(dchar c) {
+    return validate(c);
+}
+
+/**
+    Validates a UTF32 string
+*/
+bool validate(ndstring str) {
+    return validate(str[]);
+}
+
+/**
+    Validates a UTF32 string
+*/
+bool validate(inout(dchar)[] str) {
+    ndstring tmp = str;
+
+    // Handle endianess.
+    codepoint bom = getBOM(str);
+    if (bom != 0 && getEndianFromBOM(bom) != NATIVE_ENDIAN) {
+        tmp = toMachineOrder(str);
+    }
+
+    foreach(dchar c; tmp) {
+        if (!validate(c)) 
+            return false;
+    }
+
+    return true;
+}
+
+/**
+    Gets the BOM
+*/
+codepoint getBOM(inout(dchar)[] str) {
+    if (str.length == 0)
+        return 0;
+    
+    // This is UTF32.
+    if (isBOM(str[0]))
+        return str[0];
+
+    return 0;
+}
+
+/**
+    Returns a string which is [str] converted to machine order.
+
+    If the string has no BOM it is assumed it's already in
+    machine order.
+*/
+ndstring toMachineOrder(inout(dchar)[] str) {
+    
+    // Empty string early escape.
+    if (str.length == 0) 
+        return ndstring.init;
+
+    codepoint bom = getBOM(str);
+    Endianess endian = getEndianFromBOM(bom);
+    if (bom != 0 && endian != NATIVE_ENDIAN) {
+
+        // Flip all the bytes around
+        ndstring tmp;
+        foreach(i, ref const(dchar) c; str) {
+            tmp ~= c.toEndianReinterpret(endian);
+        }
+
+        return tmp;
+    }
+
+    return ndstring(str);
+}
+
+/**
+    Decodes a single UTF-32 character
+*/
+codepoint decode(dchar c) {
+    if (!validate(c))
+        return unicodeReplacementCharacter;
+    return c;
+}
+
+/**
+    Decodes a single UTF-32 string
+*/
+nwstring decode(inout(dchar)[] str) {
+    nwstring tmp;
+
+    foreach(ref c; str) {
+        tmp ~= cast(wchar)decode(c);
+    }
+
+    return tmp;
+}
+
+/**
+    Encodes a UTF-32 string.
+
+    Since UnicodeSequence is already technically
+    UTF-32 this doesn't do much other than
+    throw the data into a nwstring.
+*/
+nwstring encode(UnicodeSequence sequence) {
+    return nwstring(cast(wchar[])sequence[0..$]);
+}
\ No newline at end of file
diff --git a/source/numem/text/unicode/utf8.d b/source/numem/text/unicode/utf8.d
index fadc298..cd55e8d 100644
--- a/source/numem/text/unicode/utf8.d
+++ b/source/numem/text/unicode/utf8.d
@@ -5,13 +5,11 @@
     Authors: Luna the Foxgirl
 */
 
-module numem.unicode.utf8;
-import numem.unicode;
+module numem.text.unicode.utf8;
+import numem.text.unicode;
 import numem.collections.vector;
 import numem.string;
-
-// For some reason D really wants this import.
-import numem.unicode : validate;
+import numem.text.unicode : validate;
 
 @nogc nothrow:
 
@@ -112,16 +110,23 @@ unittest {
 }
 
 /**
-    Returns whether the specified string is a valid UTF-8 string
+    Returns whether the given nstring is a valid UTF-8 string
 */
 bool validate(nstring str) {
+    return validate(str[]);
+}   
+
+/**
+    Returns whether the given nstring is a valid UTF-8 string
+*/
+bool validate(inout(char)[] str) {
     size_t i = 0;
-    while(i < str.size) {
+    while(i < str.length) {
         char[4] txt;
 
         // Validate length
         size_t clen = getLength(str[i]);
-        if (clen >= i+str.size()) return false;
+        if (clen >= i+str.length) return false;
         if (clen == 0) return false;
         
         // Validate sequence
@@ -276,16 +281,16 @@ unittest {
     Decodes a string to a vector of codepoints.
     Invalid codes will be replaced with unicodeReplacementCharacter
 */
-UnicodeSequence decode(nstring str) {
+UnicodeSequence decode(inout(char)[] str) {
     UnicodeSequence code;
 
     size_t i = 0;
-    while(i < str.size()) {
+    while(i < str.length) {
         char[4] txt;
 
         // Validate length, add FFFD if invalid.
         size_t clen = str[i].getLength();
-        if (clen >= i+str.size() || clen == 0) {
+        if (clen >= i+str.length || clen == 0) {
             code ~= unicodeReplacementCharacter;
             i++;
         }
@@ -298,6 +303,14 @@ UnicodeSequence decode(nstring str) {
     return code;
 }
 
+/**
+    Decodes a string to a vector of codepoints.
+    Invalid codes will be replaced with unicodeReplacementCharacter
+*/
+UnicodeSequence decode(nstring str) {
+    return decode(str[]);
+}
+
 @("decode: UTF-8 string")
 unittest {
     import std.stdio : writeln;