Skip to content

Commit 046a8d9

Browse files
committed
Add major improvements to the importer
* Add items previouly elided due to name conflicts (e.g. UpButton2) * Add group and subgroup properties to `SingleEmoji` (see #7) * Automate code generation (supporting both web and headless node)
1 parent 5ed63b8 commit 046a8d9

File tree

10 files changed

+27974
-16877
lines changed

10 files changed

+27974
-16877
lines changed

importers/emoji-importer.html

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121
let code = generator.generate();
2222

2323
let results = document.getElementById("results");
24-
for (const emoji of code.emoji) {
25-
results.append(document.createTextNode(emoji));
26-
}
24+
results.append(document.createTextNode(code.emoji));
2725
})();
2826
</script>
2927
</body>

importers/importer.js

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@ const firstRegex = /\b1st/;
44
const secondRegex = /\b2nd/;
55
const thirdRegex = /\b3rd/;
66

7+
const intro = `namespace NeoSmart.Unicode
8+
{
9+
// This file is machine-generated from the official Unicode Consortium UTR51 publication
10+
// See the \`importers\` folder for the generators.
11+
`;
12+
13+
const extro = `
14+
}`;
15+
716
// Implementation of Lazy derived from the code at
817
// https://dev.to/nestedsoftware/lazy-evaluation-in-javascript-with-generators-map-filter-and-reduce--36h5
918
class Lazy {
@@ -168,17 +177,30 @@ function makeStringArray(keywords) {
168177
.join(", ");
169178
}
170179

171-
function makeSortedSet(name, emoji) {
172-
result = `public static readonly SortedSet<SingleEmoji> ${name} = new SortedSet<SingleEmoji>() {
180+
function makeSortedSet(name, emoji, summary = "") {
181+
result = `using System.Collections.Generic;
182+
183+
${intro}
184+
public static partial class Emoji
185+
{
186+
/// <summary>
187+
/// ${summary}
188+
/// </summary>
189+
#if NET20 || NET30 || NET35
190+
public static readonly List<SingleEmoji> ${name} = new List<SingleEmoji>() {
191+
#else
192+
public static readonly SortedSet<SingleEmoji> ${name} = new SortedSet<SingleEmoji>() {
193+
#endif
173194
`;
174195

175196
for (const e of emoji) {
176-
result += `\t/* ${e.symbol} */ ${CamelCase(e.name)},
197+
result += ` /* ${e.symbol} */ ${CamelCase(e.name)},
177198
`;
178199
}
179-
result += `};
200+
result += ` };
201+
}`;
180202

181-
`;
203+
result += extro;
182204

183205
return result;
184206
}
@@ -196,16 +218,16 @@ function isUngenderedEmoji(emoji) {
196218
}
197219

198220
function emojiToCSharp(emoji) {
199-
return `/* ${emoji.symbol} */
200-
public static readonly SingleEmoji ${CamelCase(emoji.name)} = new SingleEmoji(
201-
sequence: new UnicodeSequence("${emoji.sequence}"),
202-
name: "${emoji.name}",
203-
group: "${emoji.group}",
204-
subgroup: "${emoji.subgroup}",
205-
searchTerms: new [] { ${makeStringArray(emoji.name)} },
206-
sortOrder: ${emoji.index},
207-
);
208-
221+
return `
222+
/* ${emoji.symbol} */
223+
public static readonly SingleEmoji ${CamelCase(emoji.name)} = new SingleEmoji(
224+
sequence: new UnicodeSequence("${emoji.sequence}"),
225+
name: "${emoji.name}",
226+
group: "${emoji.group}",
227+
subgroup: "${emoji.subgroup}",
228+
searchTerms: new [] { ${makeStringArray(emoji.name)} },
229+
sortOrder: ${emoji.index}
230+
);
209231
`;
210232
}
211233

@@ -242,8 +264,10 @@ function *parse(data) {
242264
const groupRegex = /\bgroup: \s*(\S.+?)\s*$/;
243265
const subgroupRegex = /subgroup: \s*(\S.+?)\s*$/;
244266

267+
let deduplicator = new Set();
245268
let group = "";
246269
let subgroup = "";
270+
let sortIndex = 0;
247271
for (let i = 0; i < lines.length; ++i) {
248272
const line = lines[i];
249273
if (line.startsWith("#") || !line.includes("fully-qualified")) {
@@ -257,19 +281,33 @@ function *parse(data) {
257281

258282
let results = line.match(parser);
259283

260-
yield {
284+
const emoji = {
261285
"sequence": results[1],
262286
"symbol": results[2],
263287
"name": results[3],
264-
"index": i++,
288+
"index": sortIndex++,
265289
"group": group,
266290
"subgroup": subgroup,
267291
};
292+
293+
if (deduplicator.has(emoji.name)) {
294+
continue;
295+
}
296+
297+
let oldName = emoji.name;
298+
let version = 2;
299+
while (deduplicator.has(CamelCase(emoji.name))) {
300+
emoji.name = oldName + version++;
301+
}
302+
deduplicator.add(emoji.name);
303+
deduplicator.add(CamelCase(emoji.name));
304+
305+
yield emoji;
268306
}
269307
}
270308

271309
function parseEmoji(data) {
272-
return new Lazy(parse(data), () => true);
310+
return new Lazy(parse(data));
273311
}
274312

275313
const manWomanRegex = /^(man|woman)/i;
@@ -295,15 +333,22 @@ class CodeGenerator {
295333
let emoji = Array.from(parseEmoji(this.data));
296334

297335
let csharp = {
298-
emoji: [],
336+
emoji: "",
299337
lists: {},
300338
};
301339

302340
// Dump actual emoji objects.
303341
// All other operations print only references to these.
342+
let code = [];
343+
code.push(intro);
344+
code.push(" public static partial class Emoji\n");
345+
code.push(" {");
304346
for (const e of emoji) {
305-
csharp.emoji.push(emojiToCSharp(e));
347+
code.push(emojiToCSharp(e));
306348
}
349+
code.push(" }");
350+
code.push(extro);
351+
csharp.emoji = code.join("");
307352

308353
// Dump all emoji list
309354
csharp.lists.all = makeSortedSet("All", emoji);
@@ -315,11 +360,22 @@ class CodeGenerator {
315360
// Narrow it down to emoji supported by Segoe UI Emoji
316361
let supportedEmoji = basicUngenderedEmoji
317362
.filter(isBasicEmoji)
318-
.filter(e => fontSupportsEmoji(this.font, e));
363+
.filter(e => fontSupportsEmoji(this.font, e))
319364

320365
// Dump list of ungendered emoji
321-
csharp.lists.basic = makeSortedSet("Basic", supportedEmoji);
366+
csharp.lists.basic = makeSortedSet("Basic", supportedEmoji,
367+
"A (sorted) enumeration of all emoji without skin variations and no duplicate " +
368+
"gendered vs gender-neutral emoji, ideal for displaying. " +
369+
"Emoji without supported glyphs in Segoe UI Emoji are also omitted from this list.");
322370

323371
return csharp;
324372
}
325373
}
374+
375+
if (this.module == undefined) {
376+
this.module = {};
377+
}
378+
379+
module.exports = {
380+
CodeGenerator: CodeGenerator,
381+
};

importers/index.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
const fs = require("fs").promises;
2+
const path = require("path");
3+
const fontkit = require("fontkit");
4+
const importer = require("./importer.js");
5+
6+
const codeRoot = "../unicode";
7+
(async function() {
8+
let text = await fs.readFile("emoji-test.txt", { encoding: "utf-8" });
9+
let font = fontkit.create(await fs.readFile("seguiemj.ttf"));
10+
11+
let generator = new importer.CodeGenerator(font, text);
12+
13+
let code = generator.generate();
14+
15+
await fs.writeFile(codeRoot + "/Emoji-Emojis.cs", code.emoji, { encoding: "utf-8" });
16+
await fs.writeFile(codeRoot + "/Emoji-All.cs", code.lists.all, { encoding: "utf-8" });
17+
await fs.writeFile(codeRoot + "/Emoji-Basic.cs", code.lists.basic, { encoding: "utf-8" });
18+
})();

importers/package.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"name": "importers",
3+
"version": "1.0.0",
4+
"main": "index.js",
5+
"license": "MIT",
6+
"dependencies": {
7+
"fontkit": "^1.8.0"
8+
}
9+
}

0 commit comments

Comments
 (0)