Skip to content

Commit

Permalink
feat: Add iterators over character units and codes
Browse files Browse the repository at this point in the history
  • Loading branch information
jajaperson committed Aug 12, 2022
1 parent 1eef987 commit 47bd89c
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ and this project adheres to

## [Unreleased]

### Added

- `iter.create.fromChars()` and `iter.create.fromCharCodes()` for iterating over UTF-16 character units and codes respectively.

## [2.4.2] - 2020-07-30

### Changed

- Another attempt to fix the NPM release
(debugging actions can be awful sometimes).

## [2.4.1] - 2020-07-30

### Changed

- Fixed NPM release action.

## [2.4.0] - 2020-07-30
Expand Down
93 changes: 93 additions & 0 deletions lib/generators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,96 @@ export function range(
},
};
}

/**
* Creates an iterable over a string's char codes.
*
* Note that, in contrast to [`String.prototype[@@iterator]`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/@@iterator),
* this does not treat astral codepoints as single characters,
* but rather as the constituent surrogate pair.
* Each char code is therefore between `0x0000` and `0xffff = 2¹⁶ - 1`.
* See the example below.
* @param str - A string to extract char codes from.
* @returns An iterable over the char codes.
* @example
* ```ts
* import * as iter from "https://deno.land/x/iter/mod.ts";
*
* const str = "🦀💦🥱";
* const chars = Uint16Array.from(iter.create.fromCharCodes(str));
*
* console.log(chars.length); // -> 6
* console.log(chars[0].toString(16)); // -> d83e
* console.log(chars[1].toString(16)); // -> dd80
* console.log(chars[2].toString(16)); // -> d83d
* console.log(chars[3].toString(16)); // -> dca6
* console.log(chars[4].toString(16)); // -> d83e
* console.log(chars[5].tostring(16)); // -> dd71
* console.log("\ud83e\udd80\ud83d\udca6\ud83e\udd71"); // -> 🦀💦🥱
* ```
*/
export function fromCharCodes(str: string): IterableCircular<number> {
return {
*[Symbol.iterator]() {
let i = 0;

while (true) {
const c = str.charCodeAt(i);

if (c !== c) {
return;
} else {
yield c;
}

i++;
}
},
};
}

/**
* Creates an iterable over a string's chars.
*
* Note that, in contrast to [`String.prototype[@@iterator]`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/@@iterator),
* this does not treat astral codepoints as single characters,
* but rather as the constituent surrogate pair.
* Each char code is therefore between `0x0000` and `0xffff = 2¹⁶ - 1`.
* See the example below.
* @param str - A string to extract char codes from.
* @returns An iterable over the char codes.
* @example
* ```ts
* import * as iter from "https://deno.land/x/iter/mod.ts";
*
* const str = "🦀💦🥱";
* const chars = iter.create.fromChars(str)[Symbol.iterator]()
*
* console.log(chars.next().value === "\ud83e"); // -> true
* console.log(chars.next().value === "\udd80"); // -> true
* console.log(chars.next().value === "\ud83d"); // -> true
* console.log(chars.next().value === "\udca6"); // -> true
* console.log(chars.next().value === "\ud83e"); // -> true
* console.log(chars.next().value === "\udd71"); // -> true
* console.log("\ud83e\udd80\ud83d\udca6\ud83e\udd71"); // -> 🦀💦🥱
* ```
*/
export function fromChars(str: string): IterableCircular<string> {
return {
*[Symbol.iterator]() {
let i = 0;

while (true) {
const c = str.charAt(i);

if (c !== c) {
return;
} else {
yield c;
}

i++;
}
},
};
}
25 changes: 25 additions & 0 deletions lib/generators_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,28 @@ Deno.test("fromResults", () => {
done = r1.done || false;
}
});

Deno.test("fromCharCodes", () => {
const str = "\u0077\u006f\u0074\u0020\ud83e\udee5\u003f";
const iter = generators.fromCharCodes(str)[Symbol.iterator]();

assertEquals(iter.next().value, 0x0077);
assertEquals(iter.next().value, 0x006f);
assertEquals(iter.next().value, 0x0074);
assertEquals(iter.next().value, 0x0020);
assertEquals(iter.next().value, 0xd83e);
assertEquals(iter.next().value, 0xdee5);
assertEquals(iter.next().value, 0x003f);
});

Deno.test("fromChars", () => {
const str = "🦀💦🥱";
const chars = generators.fromChars(str)[Symbol.iterator]();

assertEquals(chars.next().value, "\ud83e");
assertEquals(chars.next().value, "\udd80");
assertEquals(chars.next().value, "\ud83d");
assertEquals(chars.next().value, "\udca6");
assertEquals(chars.next().value, "\ud83e");
assertEquals(chars.next().value, "\udd71");
});

0 comments on commit 47bd89c

Please sign in to comment.