-
Notifications
You must be signed in to change notification settings - Fork 1
/
README.scala
45 lines (40 loc) · 1.42 KB
/
README.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/**
* These classes represent individual Unicode characters.
*/
sealed abstract class CodePoint {
def boxed: CodePoint.Utf32
def asInt: Int
def toString: java.lang.String
}
object CodePoint {
case class Utf8(private val underlying: Array[Byte]) extends CodePoint { /* ... */ }
case class Utf16(private val underlying: Array[Char]) extends CodePoint { /* ... */ }
case class Utf32(private val underlying: Int) extends CodePoint { /* ... */ }
}
/**
* These classes represent Unicode strings, and conversions between encodings.
*/
abstract class UtfString[U] extends Seq[CodePoint.Utf32] {
val underlying: Array[U] /* Backed by Arrays of values, not refs */
def length: Int
def toUtf32String: Utf32String
def toUtf16String: Utf16String
def toUtf8String: Utf8String
// ...
}
/** Utf32 supports O(1) lookup **/
class Utf32String extends UtfString[Int] with IndexedSeqOptimized[/*...*/] { /* ... */ }
/** Utf8 and Utf16 support O(n) lookup **/
class Utf16String extends UtfString[Char] { /* ... */ }
class Utf8String extends UtfString[Byte] { /* ... */ }
/**
* Here is a little test.
*/
import net.arya.utf.Implicits._
object Test1 extends Application {
val singles = "«küßî»"
val doubles = "“ЌύБЇ”"
val doubles8: Utf8String = doubles // UTF-8 uses 14 Bytes
val doubles16: Utf16String = doubles // UTF-16 uses 6 Chars = 12 bytes
val foo: String = doubles8.mkString(":") // .mkString is defined on Seq...
}