Skip to content

Commit eabb530

Browse files
committed
poc: uint8array-based encoding
1 parent d11ab4b commit eabb530

File tree

1 file changed

+148
-4
lines changed

1 file changed

+148
-4
lines changed

index.ts

Lines changed: 148 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,65 @@ function alphabet(letters: string | string[]): Coder<number[], string[]> {
118118
};
119119
}
120120

121+
/**
122+
* Encodes integer radix representation to Uint8Array of charcodes using alphabet and back.
123+
* Could also be array of strings.
124+
* @__NO_SIDE_EFFECTS__
125+
*/
126+
function alphabet32(letters: string): Coder<Uint8Array, Uint8Array> {
127+
astr('alphabet', letters)
128+
const lettersA = new TextEncoder().encode(letters)
129+
const len = lettersA.length;
130+
131+
// mapping "b" to 1
132+
const indexes = new Int8Array(256).fill(-1)
133+
for (let i = 0; i < len; i++) {
134+
const letter = lettersA[i]
135+
if (letter > 128) throw new Error('alphabet: non-ascii digit')
136+
if (indexes[letter] !== -1) throw new Error('alphabet: duplicate digit')
137+
indexes[letter] = i
138+
}
139+
140+
return {
141+
encode: (digits: Uint8Array): Uint8Array => {
142+
abytes(digits);
143+
return digits.map((i) => {
144+
if (i < 0 || i >= len)
145+
throw new Error(
146+
`alphabet.encode: digit index outside alphabet "${i}". Allowed: ${letters}`
147+
);
148+
return lettersA[i]!;
149+
});
150+
},
151+
decode: (input: Uint8Array): Uint8Array => {
152+
abytes(input);
153+
return input.map((letter) => {
154+
const i = indexes[letter]
155+
if (i === -1) throw new Error(`Unknown letter: "${letter}". Allowed: ${letters}`);
156+
return i;
157+
});
158+
},
159+
};
160+
}
161+
162+
/**
163+
* @__NO_SIDE_EFFECTS__
164+
*/
165+
function charcodes(): Coder<Uint8Array, string> {
166+
return {
167+
encode: (from) => {
168+
abytes(from);
169+
return new TextDecoder().decode(from);
170+
},
171+
decode: (to) => {
172+
astr('charcodes.decode', to);
173+
const bytes = new TextEncoder().encode(to);
174+
if (bytes.length !== to.length) throw new Error('Invalid characters in input')
175+
return bytes
176+
},
177+
};
178+
}
179+
121180
/**
122181
* @__NO_SIDE_EFFECTS__
123182
*/
@@ -135,6 +194,34 @@ function join(separator = ''): Coder<string[], string> {
135194
};
136195
}
137196

197+
/**
198+
* Pad strings so it has integer number of bits
199+
* @__NO_SIDE_EFFECTS__
200+
*/
201+
function strpadding(bits: number, chr = '='): Coder<string, string> {
202+
anumber(bits);
203+
astr('strpadding', chr);
204+
return {
205+
encode(data: string): string {
206+
astr('strpadding.encode', data);
207+
while ((data.length * bits) % 8) data += chr
208+
return data
209+
},
210+
decode(input: string): string {
211+
astr('strpadding.decode', input);
212+
let end = input.length;
213+
if ((end * bits) % 8)
214+
throw new Error('padding: invalid, string should have whole number of bytes');
215+
for (; end > 0 && input[end - 1] === chr; end--) {
216+
const last = end - 1;
217+
const byte = last * bits;
218+
if (byte % 8 === 0) throw new Error('padding: invalid, string has too much padding');
219+
}
220+
return input.slice(0, end);
221+
},
222+
};
223+
}
224+
138225
/**
139226
* Pad strings array so it has integer number of bits
140227
* @__NO_SIDE_EFFECTS__
@@ -262,6 +349,40 @@ function convertRadix2(data: number[], from: number, to: number, padding: boolea
262349
return res;
263350
}
264351

352+
function convertRadix2ua(data: Uint8Array, from: number, to: number, padding: boolean): Uint8Array {
353+
abytes(data);
354+
if (from <= 0 || from > 32) throw new Error(`convertRadix2: wrong from=${from}`);
355+
if (to <= 0 || to > 32) throw new Error(`convertRadix2: wrong to=${to}`);
356+
if (radix2carry(from, to) > 32) {
357+
throw new Error(
358+
`convertRadix2: carry overflow from=${from} to=${to} carryBits=${radix2carry(from, to)}`
359+
);
360+
}
361+
let carry = 0;
362+
let pos = 0; // bitwise position in current element
363+
const max = powers[from]!;
364+
const mask = powers[to]! - 1;
365+
const dataLength = data.length
366+
const res = new Uint8Array(Math.ceil(dataLength * from / to))
367+
let out = 0
368+
for (let i = 0; i < dataLength; i++) {
369+
const n = data[i]
370+
if (n >= max) throw new Error(`convertRadix2: invalid data word=${n} from=${from}`);
371+
carry = (carry << from) | n;
372+
if (pos + from > 32) throw new Error(`convertRadix2: carry overflow pos=${pos} from=${from}`);
373+
pos += from;
374+
for (; pos >= to; pos -= to) res[out++] = ((carry >> (pos - to)) & mask) >>> 0;
375+
const pow = powers[pos];
376+
if (pow === undefined) throw new Error('invalid carry');
377+
carry &= pow - 1; // clean carry, otherwise it will cause overflow
378+
}
379+
carry = (carry << (to - pos)) & mask;
380+
if (!padding && pos >= from) throw new Error('Excess padding');
381+
if (!padding && carry > 0) throw new Error(`Non-zero padding: ${carry}`);
382+
if (padding && pos > 0) res[out++] = carry >>> 0;
383+
return res.subarray(0, out)
384+
}
385+
265386
/**
266387
* @__NO_SIDE_EFFECTS__
267388
*/
@@ -302,6 +423,29 @@ function radix2(bits: number, revPadding = false): Coder<Uint8Array, number[]> {
302423
};
303424
}
304425

426+
/**
427+
* If both bases are power of same number (like `2**8 <-> 2**64`),
428+
* there is a linear algorithm. For now we have implementation for power-of-two bases only.
429+
* @__NO_SIDE_EFFECTS__
430+
*/
431+
function radix2ua(bits: number, revPadding = false): Coder<Uint8Array, Uint8Array> {
432+
anumber(bits);
433+
if (bits <= 0 || bits > 32) throw new Error('radix2: bits should be in (0..32]');
434+
if (radix2carry(8, bits) > 32 || radix2carry(bits, 8) > 32)
435+
throw new Error('radix2: carry overflow');
436+
return {
437+
encode: (bytes: Uint8Array) => {
438+
if (!isBytes(bytes)) throw new Error('radix2.encode input should be Uint8Array');
439+
return convertRadix2ua(bytes, 8, bits, !revPadding);
440+
},
441+
decode: (digits: Uint8Array) => {
442+
abytes(digits);
443+
return convertRadix2ua(digits, bits, 8, revPadding);
444+
},
445+
};
446+
}
447+
448+
305449
type ArgumentTypes<F extends Function> = F extends (...args: infer A) => any ? A : never;
306450
function unsafeWrapper<T extends (...args: any) => any>(fn: T) {
307451
afn(fn);
@@ -477,10 +621,10 @@ export const base64: BytesCoder = hasBase64Builtin ? {
477621
encode(b) { abytes(b); return (b as any).toBase64(); },
478622
decode(s) { return decodeBase64Builtin(s, false); },
479623
} : chain(
480-
radix2(6),
481-
alphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'),
482-
padding(6),
483-
join('')
624+
radix2ua(6),
625+
alphabet32('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'),
626+
charcodes(),
627+
strpadding(6),
484628
);
485629
/**
486630
* base64 from RFC 4648. No padding.

0 commit comments

Comments
 (0)