-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathw1251.erl
100 lines (91 loc) · 2.12 KB
/
w1251.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
-module(w1251).
-export([encode/1, decode/1]).
mapping() -> [
{16#0402, 2}, % 80
{16#201A, 1},
{16#0453, 1},
{16#201E, 1},
{16#2026, 1},
{16#2020, 2},
{16#20AC, 1}, % 88
{16#2030, 1},
{16#0409, 1},
{16#2039, 1},
{16#040A, 1},
{16#040C, 1},
{16#040B, 1},
{16#040F, 1},
{16#0452, 1}, % 90
{16#2018, 2},
{16#201C, 2},
{16#2022, 1},
{16#2013, 2},
{16#0000, 1}, % 98
{16#2122, 1},
{16#0459, 1},
{16#203A, 1},
{16#045A, 1},
{16#045C, 1},
{16#045B, 1},
{16#045F, 1},
{16#00A0, 1}, % A0
{16#040E, 1},
{16#045E, 1},
{16#0408, 1},
{16#00A4, 1},
{16#0490, 1},
{16#00A6, 2},
{16#0401, 1}, % A8
{16#00A9, 1},
{16#0404, 1},
{16#00AB, 4},
{16#0407, 1},
{16#00B0, 2}, % B0
{16#0406, 1},
{16#0456, 1},
{16#0491, 1},
{16#00B5, 3},
{16#0451, 1}, % B8
{16#2116, 1},
{16#0454, 1},
{16#00BB, 1},
{16#0458, 1},
{16#0405, 1},
{16#0455, 1},
{16#0457, 1},
{16#0410, 64} % C0-FF
].
encode_single(Codepoint) ->
encode_single(Codepoint, 16#80, mapping()).
encode_single(_, _, []) -> $?;
encode_single(Codepoint, N, [{Point, Count}|_]) when Point =< Codepoint andalso Codepoint < Point+Count ->
N + (Codepoint - Point);
encode_single(Codepoint, N, [{Point, Count}|Rest]) ->
encode_single(Codepoint, N+Count, Rest).
decode_single(Byte) ->
decode_single(Byte - 16#80, mapping()).
decode_single(_, []) -> 16#FFFD; % U+FFFD REPLACEMENT CHARACTER
decode_single(Offset, [{Point, Count}|_]) when Offset < Count ->
Point + Offset;
decode_single(Offset, [{Point, Count}|Rest]) ->
decode_single(Offset - Count, Rest).
encode(UTF8) when is_list(UTF8) ->
binary_to_list(encode(list_to_binary(UTF8)));
encode(UTF8) ->
encode(UTF8, <<>>).
encode(<<>>, E) -> E;
encode(<<T/utf8, R/binary>>, E) when T < 16#7F ->
encode(R, <<E/binary, T:8>>);
encode(<<T/utf8, R/binary>>, E) ->
V = encode_single(T),
encode(R, <<E/binary, V:8>>).
decode(W1251) when is_list(W1251) ->
binary_to_list(decode(list_to_binary(W1251)));
decode(W1251) ->
decode(W1251, <<>>).
decode(<<>>, E) -> E;
decode(<<T:8, R/binary>>, E) when T < 16#7F ->
decode(R, <<E/binary, T/utf8>>);
decode(<<T:8, R/binary>>, E) ->
V = decode_single(T),
decode(R, <<E/binary, V/utf8>>).