@@ -4,6 +4,15 @@ const firstRegex = /\b1st/;
4
4
const secondRegex = / \b 2 n d / ;
5
5
const thirdRegex = / \b 3 r d / ;
6
6
7
+ const intro = `namespace NeoSmart.Unicode
8
+ {
9
+ // This file is machine-generated from the official Unicode Consortium UTR51 publication
10
+ // See the \`importers\` folder for the generators.
11
+ ` ;
12
+
13
+ const extro = `
14
+ }` ;
15
+
7
16
// Implementation of Lazy derived from the code at
8
17
// https://dev.to/nestedsoftware/lazy-evaluation-in-javascript-with-generators-map-filter-and-reduce--36h5
9
18
class Lazy {
@@ -168,17 +177,30 @@ function makeStringArray(keywords) {
168
177
. join ( ", " ) ;
169
178
}
170
179
171
- function makeSortedSet ( name , emoji ) {
172
- result = `public static readonly SortedSet<SingleEmoji> ${ name } = new SortedSet<SingleEmoji>() {
180
+ function makeSortedSet ( name , emoji , summary = "" ) {
181
+ result = `using System.Collections.Generic;
182
+
183
+ ${ intro }
184
+ public static partial class Emoji
185
+ {
186
+ /// <summary>
187
+ /// ${ summary }
188
+ /// </summary>
189
+ #if NET20 || NET30 || NET35
190
+ public static readonly List<SingleEmoji> ${ name } = new List<SingleEmoji>() {
191
+ #else
192
+ public static readonly SortedSet<SingleEmoji> ${ name } = new SortedSet<SingleEmoji>() {
193
+ #endif
173
194
` ;
174
195
175
196
for ( const e of emoji ) {
176
- result += `\t /* ${ e . symbol } */ ${ CamelCase ( e . name ) } ,
197
+ result += ` /* ${ e . symbol } */ ${ CamelCase ( e . name ) } ,
177
198
` ;
178
199
}
179
- result += `};
200
+ result += ` };
201
+ }` ;
180
202
181
- ` ;
203
+ result += extro ;
182
204
183
205
return result ;
184
206
}
@@ -196,16 +218,16 @@ function isUngenderedEmoji(emoji) {
196
218
}
197
219
198
220
function emojiToCSharp ( emoji ) {
199
- return `/* ${ emoji . symbol } */
200
- public static readonly SingleEmoji ${ CamelCase ( emoji . name ) } = new SingleEmoji(
201
- sequence: new UnicodeSequence(" ${ emoji . sequence } "),
202
- name: "${ emoji . name } " ,
203
- group : "${ emoji . group } ",
204
- subgroup : "${ emoji . subgroup } ",
205
- searchTerms: new [] { ${ makeStringArray ( emoji . name ) } } ,
206
- sortOrder: ${ emoji . index } ,
207
- );
208
-
221
+ return `
222
+ /* ${ emoji . symbol } */
223
+ public static readonly SingleEmoji ${ CamelCase ( emoji . name ) } = new SingleEmoji(
224
+ sequence: new UnicodeSequence( "${ emoji . sequence } ") ,
225
+ name : "${ emoji . name } ",
226
+ group : "${ emoji . group } ",
227
+ subgroup: " ${ emoji . subgroup } " ,
228
+ searchTerms: new [] { ${ makeStringArray ( emoji . name ) } },
229
+ sortOrder: ${ emoji . index }
230
+ );
209
231
` ;
210
232
}
211
233
@@ -242,8 +264,10 @@ function *parse(data) {
242
264
const groupRegex = / \b g r o u p : \s * ( \S .+ ?) \s * $ / ;
243
265
const subgroupRegex = / s u b g r o u p : \s * ( \S .+ ?) \s * $ / ;
244
266
267
+ let deduplicator = new Set ( ) ;
245
268
let group = "" ;
246
269
let subgroup = "" ;
270
+ let sortIndex = 0 ;
247
271
for ( let i = 0 ; i < lines . length ; ++ i ) {
248
272
const line = lines [ i ] ;
249
273
if ( line . startsWith ( "#" ) || ! line . includes ( "fully-qualified" ) ) {
@@ -257,19 +281,33 @@ function *parse(data) {
257
281
258
282
let results = line . match ( parser ) ;
259
283
260
- yield {
284
+ const emoji = {
261
285
"sequence" : results [ 1 ] ,
262
286
"symbol" : results [ 2 ] ,
263
287
"name" : results [ 3 ] ,
264
- "index" : i ++ ,
288
+ "index" : sortIndex ++ ,
265
289
"group" : group ,
266
290
"subgroup" : subgroup ,
267
291
} ;
292
+
293
+ if ( deduplicator . has ( emoji . name ) ) {
294
+ continue ;
295
+ }
296
+
297
+ let oldName = emoji . name ;
298
+ let version = 2 ;
299
+ while ( deduplicator . has ( CamelCase ( emoji . name ) ) ) {
300
+ emoji . name = oldName + version ++ ;
301
+ }
302
+ deduplicator . add ( emoji . name ) ;
303
+ deduplicator . add ( CamelCase ( emoji . name ) ) ;
304
+
305
+ yield emoji ;
268
306
}
269
307
}
270
308
271
309
function parseEmoji ( data ) {
272
- return new Lazy ( parse ( data ) , ( ) => true ) ;
310
+ return new Lazy ( parse ( data ) ) ;
273
311
}
274
312
275
313
const manWomanRegex = / ^ ( m a n | w o m a n ) / i;
@@ -295,15 +333,22 @@ class CodeGenerator {
295
333
let emoji = Array . from ( parseEmoji ( this . data ) ) ;
296
334
297
335
let csharp = {
298
- emoji : [ ] ,
336
+ emoji : "" ,
299
337
lists : { } ,
300
338
} ;
301
339
302
340
// Dump actual emoji objects.
303
341
// All other operations print only references to these.
342
+ let code = [ ] ;
343
+ code . push ( intro ) ;
344
+ code . push ( " public static partial class Emoji\n" ) ;
345
+ code . push ( " {" ) ;
304
346
for ( const e of emoji ) {
305
- csharp . emoji . push ( emojiToCSharp ( e ) ) ;
347
+ code . push ( emojiToCSharp ( e ) ) ;
306
348
}
349
+ code . push ( " }" ) ;
350
+ code . push ( extro ) ;
351
+ csharp . emoji = code . join ( "" ) ;
307
352
308
353
// Dump all emoji list
309
354
csharp . lists . all = makeSortedSet ( "All" , emoji ) ;
@@ -315,11 +360,22 @@ class CodeGenerator {
315
360
// Narrow it down to emoji supported by Segoe UI Emoji
316
361
let supportedEmoji = basicUngenderedEmoji
317
362
. filter ( isBasicEmoji )
318
- . filter ( e => fontSupportsEmoji ( this . font , e ) ) ;
363
+ . filter ( e => fontSupportsEmoji ( this . font , e ) )
319
364
320
365
// Dump list of ungendered emoji
321
- csharp . lists . basic = makeSortedSet ( "Basic" , supportedEmoji ) ;
366
+ csharp . lists . basic = makeSortedSet ( "Basic" , supportedEmoji ,
367
+ "A (sorted) enumeration of all emoji without skin variations and no duplicate " +
368
+ "gendered vs gender-neutral emoji, ideal for displaying. " +
369
+ "Emoji without supported glyphs in Segoe UI Emoji are also omitted from this list." ) ;
322
370
323
371
return csharp ;
324
372
}
325
373
}
374
+
375
+ if ( this . module == undefined ) {
376
+ this . module = { } ;
377
+ }
378
+
379
+ module . exports = {
380
+ CodeGenerator : CodeGenerator ,
381
+ } ;
0 commit comments