99import java .util .Set ;
1010import java .util .TreeMap ;
1111import java .util .TreeSet ;
12+ import java .util .concurrent .ConcurrentHashMap ;
13+ import java .util .concurrent .atomic .AtomicInteger ;
14+ import java .util .logging .Logger ;
1215import java .util .regex .Pattern ;
1316
1417import com .ibm .icu .dev .util .CollectionUtilities ;
2427 * @author markdavis
2528 */
2629public class ScriptTester {
30+ static Logger logger = Logger .getLogger (ScriptTester .class .getName ());
2731 private final UnicodeMap <BitSet > character_compatibleScripts ;
2832
2933
3034 public enum CompatibilityLevel {Highly_Restrictive , Moderately_Restrictive }
3135 public enum ScriptSpecials {on , off }
3236
37+
38+ /**
39+ * Space reserved for script codes not in ICU
40+ */
41+ public static final int EXTRA_COUNT = 16 ; // should be enough, hard working as UTC is!
42+ public static final Map <String ,Integer > extraScripts = new ConcurrentHashMap <>(EXTRA_COUNT );
3343 /**
3444 * Extended scripts; note that they do not have stable numbers, and should not be persisted.
3545 */
36- public static final int
46+ public static final int
3747 //HANT = UScript.CODE_LIMIT,
3848 //HANS = HANT + 1,
39- LIMIT = UScript .CODE_LIMIT ; // HANS + 1;
40-
41- private static String [][] EXTENDED_NAME = {{"Hant" , "Han Traditional" }, {"Hans" , "Han Simplified" }};
49+ LIMIT = UScript .CODE_LIMIT + EXTRA_COUNT ; // HANS + 1;
50+
51+ private static String [][] EXTENDED_NAME = {
52+ // Scripts without stable numbers
53+ {"Hant" , "Han Traditional" }, {"Hans" , "Han Simplified" },
54+ };
55+
56+ static AtomicInteger scriptCounter = new AtomicInteger (UScript .CODE_LIMIT );
57+
58+ static int getScriptCode (String script ) {
59+ try {
60+ // If ICU has it, great
61+ return UCharacter .getPropertyValueEnum (UProperty .SCRIPT , script );
62+ } catch (com .ibm .icu .impl .IllegalIcuArgumentException iiae ) {
63+ // Make something up
64+ int newCode = extraScripts .computeIfAbsent (script , script2 -> {
65+ int i = scriptCounter .getAndIncrement ();
66+ logger .warning ("Synthesized scriptCode " + i + " for unrecognized script extension '" +script +"'" );
67+ return i ;
68+ });
69+ // Verify we didn't run over
70+ if (newCode >= LIMIT ) {
71+ throw new RuntimeException ("computed script code of " + newCode + " for '" +script +"' overflows: have " + extraScripts .size () +
72+ " scripts but EXTRA_COUNT=" + EXTRA_COUNT );
73+ }
74+ return newCode ;
75+ }
76+ }
4277
4378 public static String getScriptName (int extendedScriptCode , int choice ) {
4479 if (extendedScriptCode >= UScript .CODE_LIMIT ) {
45- return EXTENDED_NAME [extendedScriptCode - UScript .CODE_LIMIT ][choice ];
80+ if (extendedScriptCode >= LIMIT ) {
81+ return EXTENDED_NAME [extendedScriptCode - LIMIT ][choice ];
82+ } else {
83+ for (Map .Entry <String , Integer > e : extraScripts .entrySet ()) {
84+ if (e .getValue () == extendedScriptCode ) {
85+ if (choice == 0 ) {
86+ return e .getKey ();
87+ } else {
88+ return "New Script '" + e .getKey () + "'" ;
89+ }
90+ }
91+ }
92+ throw new IllegalArgumentException ("Unknown extended script code " + extendedScriptCode );
93+ }
4694 }
4795 return UCharacter .getPropertyValueName (UProperty .SCRIPT , extendedScriptCode , choice );
4896 }
@@ -128,12 +176,12 @@ public boolean isOk(CharSequence input) {
128176 // check numbers
129177 return true ;
130178 }
131-
132-
179+
180+
133181
134182 // TODO, cache results
135183 private BitSet getActualScripts (int cp ) {
136- BitSet actualScripts = scriptSpecials .get (cp );
184+ BitSet actualScripts = getScriptSpecials () .get (cp );
137185 if (actualScripts == null ) {
138186 actualScripts = new BitSet (LIMIT );
139187 int script = UCharacter .getIntPropertyValue (cp , UProperty .SCRIPT );
@@ -143,7 +191,7 @@ private BitSet getActualScripts(int cp) {
143191 }
144192
145193 public boolean filterTable (List <Set <String >> table ) {
146-
194+
147195 // We make one pass forward and one backward, finding if each characters scripts
148196 // are compatible with the ones before.
149197 // We then make a second pass for the ones after.
@@ -248,7 +296,7 @@ private boolean contains(BitSet set1, BitSet set2) {
248296 }
249297
250298 public static class ScriptExtensions {
251-
299+
252300 public static final Comparator <BitSet > COMPARATOR = new Comparator <BitSet >() {
253301
254302 public int compare (BitSet o1 , BitSet o2 ) {
@@ -260,13 +308,13 @@ public int compare(BitSet o1, BitSet o2) {
260308 return n1 .compareToIgnoreCase (n2 );
261309 }
262310 };
263-
311+
264312 private UnicodeMap <BitSet > scriptSpecials ;
265-
313+
266314 public Collection <BitSet > getAvailableValues () {
267315 return scriptSpecials .getAvailableValues ();
268316 }
269-
317+
270318 public UnicodeSet getSet (BitSet value ) {
271319 return scriptSpecials .getSet (value );
272320 }
@@ -279,21 +327,21 @@ private static class MyHandler extends FileUtilities.SemiFileReader {
279327 public boolean handleLine (int start , int end , String [] items ) {
280328 BitSet bitSet = new BitSet (LIMIT );
281329 for (String script : SPACES .split (items [1 ])) {
282- int scriptCode = UCharacter . getPropertyValueEnum ( UProperty . SCRIPT , script );
330+ int scriptCode = getScriptCode ( script );
283331 bitSet .set (scriptCode );
284332 }
285333 map .putAll (start , end , bitSet );
286334 return true ;
287335 }
288336 }
289-
337+
290338 public static ScriptExtensions make (String directory , String filename ) {
291339 ScriptExtensions result = new ScriptExtensions ();
292340 result .scriptSpecials = ((MyHandler ) new MyHandler ()
293341 .process (directory , filename )).map .freeze ();
294342 return result ;
295343 }
296-
344+
297345 public static ScriptExtensions make (Class aClass , String filename ) {
298346 ScriptExtensions result = new ScriptExtensions ();
299347 result .scriptSpecials = ((MyHandler ) new MyHandler ()
@@ -312,7 +360,7 @@ public void putAllInto(UnicodeMap<BitSet> char2scripts) {
312360 public static String getNames (BitSet value , int choice , String separator ) {
313361 return getNames (value , choice , separator , new TreeSet <String >());
314362 }
315-
363+
316364 public static String getNames (BitSet value , int choice , String separator , Set <String > names ) {
317365 names .clear ();
318366 for (int i = value .nextSetBit (0 ); i >= 0 ; i = value .nextSetBit (i +1 )) {
@@ -321,12 +369,24 @@ public static String getNames(BitSet value, int choice, String separator, Set<St
321369 return CollectionUtilities .join (names , separator ).toString ();
322370 }
323371 }
324-
325- static ScriptExtensions scriptSpecials = ScriptExtensions .make (ScriptExtensions .class , "ScriptExtensions.txt" );
372+
373+ static final class ScriptExtensionsHelper {
374+ ScriptExtensions scriptSpecials ;
375+
376+ ScriptExtensionsHelper () {
377+ scriptSpecials = ScriptExtensions .make (ScriptExtensions .class , "ScriptExtensions.txt" );
378+ }
379+
380+ static ScriptExtensionsHelper INSTANCE = new ScriptExtensionsHelper ();
381+ }
382+
383+ static final ScriptExtensions getScriptSpecials () {
384+ return ScriptExtensionsHelper .INSTANCE .scriptSpecials ;
385+ }
326386
327387 public static BitSet getScriptSpecials (int codepoint ) {
328388 BitSet output = new BitSet (LIMIT );
329- BitSet actualScripts = scriptSpecials .get (codepoint );
389+ BitSet actualScripts = getScriptSpecials () .get (codepoint );
330390 if (actualScripts != null ) {
331391 output .or (actualScripts );
332392 } else {
@@ -340,14 +400,14 @@ public static UnicodeMap<String> getScriptSpecialsNames() {
340400 UnicodeMap <String > result = new UnicodeMap <String >();
341401 Set <String > names = new TreeSet <String >(); // to alphabetize
342402
343- for (BitSet value : scriptSpecials .getAvailableValues ()) {
344- result .putAll (scriptSpecials .getSet (value ), ScriptExtensions .getNames (value , UProperty .NameChoice .LONG , "," , names ));
403+ for (BitSet value : getScriptSpecials () .getAvailableValues ()) {
404+ result .putAll (getScriptSpecials () .getSet (value ), ScriptExtensions .getNames (value , UProperty .NameChoice .LONG , "," , names ));
345405 }
346406 return result ;
347407 }
348-
408+
349409 public static String [][] getScriptSpecialsAlternates () {
350- Collection <BitSet > availableValues = scriptSpecials .getAvailableValues ();
410+ Collection <BitSet > availableValues = getScriptSpecials () .getAvailableValues ();
351411 String [][] result = new String [availableValues .size ()][];
352412 Set <String > names = new TreeSet <String >(); // to alphabetize
353413
@@ -387,7 +447,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) {
387447 addCompatible (UScript .LATIN , i );
388448 }
389449 // FALL THRU!
390- case Highly_Restrictive :
450+ case Highly_Restrictive :
391451 addCompatible (UScript .LATIN , UScript .HAN , UScript .HIRAGANA , UScript .KATAKANA );
392452 //addCompatible(UScript.LATIN, HANT, UScript.HIRAGANA, UScript.KATAKANA);
393453 //addCompatible(UScript.LATIN, HANS, UScript.HIRAGANA, UScript.KATAKANA);
@@ -413,7 +473,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) {
413473 // fix the char2scripts mapping
414474
415475 if (specials == ScriptSpecials .on ){
416- scriptSpecials .putAllInto (char2scripts );
476+ getScriptSpecials () .putAllInto (char2scripts );
417477 }
418478 }
419479
0 commit comments