-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add UTF-32, BOM and string conversion support
- Loading branch information
1 parent
5821b1a
commit 001e5a6
Showing
10 changed files
with
591 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
module numem.text.encoding; | ||
import numem.string; | ||
import numem.text.ascii; | ||
import numem.text.unicode; | ||
import numem.text.unicode.utf8; | ||
import numem.text.unicode.utf16; | ||
|
||
/** | ||
Currently supported encodings | ||
*/ | ||
enum Encoding { | ||
|
||
/** | ||
Unknown encoding | ||
*/ | ||
unknown, | ||
|
||
/** | ||
ASCII | ||
*/ | ||
ascii, | ||
|
||
/** | ||
UTF-8 | ||
*/ | ||
utf8, | ||
|
||
/** | ||
UTF-16 | ||
*/ | ||
utf16, | ||
|
||
/** | ||
UTF-16 w/ BOM | ||
*/ | ||
utf16LE, | ||
|
||
/** | ||
UTF-16 w/ BOM | ||
*/ | ||
utf16BE, | ||
|
||
/** | ||
*/ | ||
utf32 | ||
} | ||
|
||
/** | ||
Gets the encoding of a run of text. | ||
*/ | ||
Encoding getEncoding(T)(auto ref T str) @nogc if (isSomeString!T) { | ||
static if (StringCharSize!T == 1) { | ||
nstring nstr = str; | ||
|
||
foreach(char c; str[]) { | ||
if (!isASCII(c)) { | ||
if (validate(nstr)) | ||
return Encoding.utf8; | ||
else | ||
return Encoding.unknown; | ||
} | ||
} | ||
return Encoding.ascii; | ||
|
||
} else static if (StringCharSize!T == 2) { | ||
|
||
nwstring nstr = str; | ||
auto bom = getBOM(nstr); | ||
if (bom != 0) { | ||
return bom == 0x0000FEFF ? | ||
Encoding.utf16BE : | ||
Encoding.utf16LE; | ||
} else if (validate(nstr)) { | ||
|
||
return Encoding.utf16; | ||
} | ||
return Encoding.unknown; | ||
|
||
} else static if (StringCharSize!T == 4) { | ||
|
||
return validate(str) ? | ||
Encoding.utf32 : | ||
Encoding.unknown; | ||
} else { | ||
|
||
return Encoding.unknown; | ||
} | ||
} | ||
|
||
@("Get encoding") | ||
unittest { | ||
import std.stdio : writeln; | ||
|
||
assert("Hello, world!".getEncoding() == Encoding.ascii); | ||
assert("あえおう".getEncoding() == Encoding.utf8); | ||
|
||
assert("Hello, world!"w.getEncoding() == Encoding.utf16); | ||
assert("\uFEFFHello, world!"w.getEncoding() == Encoding.utf16BE); | ||
assert("\uFFFEHello, world!"w.getEncoding() == Encoding.utf16LE); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.