Skip to content

Commit 122b9b9

Browse files
srl295markusicu
andauthored
bump JSP to Unicode 14 (#93)
* jsp: to Beta 14.0.0 data (cut 1) Step 1a text files For #88 - GenerateSubtagNames - Extra{Property,PropertyValue}Aliases.txt - PropertyAliases.txt from 14.0.0 - copied props with CopyPropsToUnicodeJsp * use beta * make up scriptCodes for unrecognized scripts - leave some space between ICU and collections such as Hans - replace some synchornized and static init with Bill Pugh singletons for #88 * mkdir to fix #103 * Add UpdateJspFiles to update all JSP files. - updated docs For: #88 * re-run UpdateJspFiles for U14.0.0 For: #88 * different fix for #103 changes data * Updates per review comments - typo/tab fixes - add a unit test for Settings.UnicodeTools.DataDir - Apply other suggestions from code review Co-authored-by: Markus Scherer <markus.icu@gmail.com> Co-authored-by: Markus Scherer <markus.icu@gmail.com>
1 parent 059a726 commit 122b9b9

File tree

130 files changed

+5176
-2494
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+5176
-2494
lines changed

UnicodeJsps/pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
For ICU versions, see https://github.com/orgs/unicode-org/packages?repo_name=icu
2222
Note that we can't use the general ICU maven packages, because utilities isn't exported (yet).
2323
-->
24-
<icu.version>69.1-SNAPSHOT-cldr-2021-02-17</icu.version>
24+
<icu.version>70.0.1-SNAPSHOT-cldr-2021-06-15</icu.version>
2525

2626
<!--
2727
For CLDR versions, see https://github.com/orgs/unicode-org/packages?repo_name=cldr
@@ -71,19 +71,19 @@
7171
<scope>provided</scope>
7272
<version>${jsp.version}</version>
7373
</dependency>
74-
74+
7575
<dependency>
7676
<groupId>com.google.code.gson</groupId>
7777
<artifactId>gson</artifactId>
7878
<version>2.8.6</version>
7979
</dependency>
80-
80+
8181
<dependency>
8282
<groupId>com.google.guava</groupId>
8383
<artifactId>guava</artifactId>
8484
<version>29.0-jre</version>
8585
</dependency>
86-
86+
8787
<dependency>
8888
<groupId>xerces</groupId>
8989
<artifactId>xercesImpl</artifactId>

UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import com.ibm.icu.util.VersionInfo;
3232

3333
public class CachedProps {
34-
public static final boolean IS_BETA = false;
34+
public static final boolean IS_BETA = true;
3535

3636
public static final Splitter HASH_SPLITTER = Splitter.on('#').trimResults();
3737
public static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults();
@@ -44,7 +44,7 @@ public class CachedProps {
4444
final BiMultimap<String,String> nameToAliases = new BiMultimap<String,String>(null,null);
4545
final Map<String,BiMultimap<String,String>> nameToValueToAliases = new LinkedHashMap();
4646

47-
static CachedProps CACHED_PROPS = getInstance(VersionInfo.getInstance(12));
47+
static CachedProps CACHED_PROPS = getInstance(VersionInfo.getInstance(14));
4848

4949
static UnicodeProperty NAMES = CachedProps.CACHED_PROPS.getProperty("Name");
5050

@@ -144,8 +144,8 @@ class DelayedUnicodeProperty extends UnicodeProperty {
144144
private List<String> nameAliases;
145145
private Multimap<String,String> valueToAliases;
146146

147-
public DelayedUnicodeProperty(VersionInfo version, String propName,
148-
Collection<String> nameAliases,
147+
public DelayedUnicodeProperty(VersionInfo version, String propName,
148+
Collection<String> nameAliases,
149149
BiMultimap<String, String> biMultimap) {
150150
this.version = version;
151151
Collection<String> temp;

UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java

Lines changed: 86 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import java.util.Set;
1010
import java.util.TreeMap;
1111
import java.util.TreeSet;
12+
import java.util.concurrent.ConcurrentHashMap;
13+
import java.util.concurrent.atomic.AtomicInteger;
14+
import java.util.logging.Logger;
1215
import java.util.regex.Pattern;
1316

1417
import com.ibm.icu.dev.util.CollectionUtilities;
@@ -24,25 +27,70 @@
2427
* @author markdavis
2528
*/
2629
public class ScriptTester {
30+
static Logger logger = Logger.getLogger(ScriptTester.class.getName());
2731
private final UnicodeMap<BitSet> character_compatibleScripts;
2832

2933

3034
public enum CompatibilityLevel {Highly_Restrictive, Moderately_Restrictive}
3135
public enum ScriptSpecials {on, off}
3236

37+
38+
/**
39+
* Space reserved for script codes not in ICU
40+
*/
41+
public static final int EXTRA_COUNT = 16; // should be enough, hard working as UTC is!
42+
public static final Map<String,Integer> extraScripts = new ConcurrentHashMap<>(EXTRA_COUNT);
3343
/**
3444
* Extended scripts; note that they do not have stable numbers, and should not be persisted.
3545
*/
36-
public static final int
46+
public static final int
3747
//HANT = UScript.CODE_LIMIT,
3848
//HANS = HANT + 1,
39-
LIMIT = UScript.CODE_LIMIT; // HANS + 1;
40-
41-
private static String[][] EXTENDED_NAME = {{"Hant", "Han Traditional"}, {"Hans", "Han Simplified"}};
49+
LIMIT = UScript.CODE_LIMIT + EXTRA_COUNT; // HANS + 1;
50+
51+
private static String[][] EXTENDED_NAME = {
52+
// Scripts without stable numbers
53+
{"Hant", "Han Traditional"}, {"Hans", "Han Simplified"},
54+
};
55+
56+
static AtomicInteger scriptCounter = new AtomicInteger(UScript.CODE_LIMIT);
57+
58+
static int getScriptCode(String script) {
59+
try {
60+
// If ICU has it, great
61+
return UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script);
62+
} catch (com.ibm.icu.impl.IllegalIcuArgumentException iiae) {
63+
// Make something up
64+
int newCode = extraScripts.computeIfAbsent(script, script2 -> {
65+
int i = scriptCounter.getAndIncrement();
66+
logger.warning("Synthesized scriptCode " + i + " for unrecognized script extension '"+script+"'");
67+
return i;
68+
});
69+
// Verify we didn't run over
70+
if (newCode >= LIMIT) {
71+
throw new RuntimeException("computed script code of " + newCode + " for '"+script+"' overflows: have " + extraScripts.size() +
72+
" scripts but EXTRA_COUNT=" + EXTRA_COUNT);
73+
}
74+
return newCode;
75+
}
76+
}
4277

4378
public static String getScriptName(int extendedScriptCode, int choice) {
4479
if (extendedScriptCode >= UScript.CODE_LIMIT) {
45-
return EXTENDED_NAME[extendedScriptCode - UScript.CODE_LIMIT][choice];
80+
if (extendedScriptCode >= LIMIT) {
81+
return EXTENDED_NAME[extendedScriptCode - LIMIT][choice];
82+
} else {
83+
for (Map.Entry<String, Integer> e : extraScripts.entrySet()) {
84+
if(e.getValue() == extendedScriptCode) {
85+
if(choice == 0) {
86+
return e.getKey();
87+
} else {
88+
return "New Script '"+ e.getKey() + "'";
89+
}
90+
}
91+
}
92+
throw new IllegalArgumentException("Unknown extended script code " + extendedScriptCode);
93+
}
4694
}
4795
return UCharacter.getPropertyValueName(UProperty.SCRIPT, extendedScriptCode, choice);
4896
}
@@ -128,12 +176,12 @@ public boolean isOk(CharSequence input) {
128176
// check numbers
129177
return true;
130178
}
131-
132-
179+
180+
133181

134182
// TODO, cache results
135183
private BitSet getActualScripts(int cp) {
136-
BitSet actualScripts = scriptSpecials.get(cp);
184+
BitSet actualScripts = getScriptSpecials().get(cp);
137185
if (actualScripts == null) {
138186
actualScripts = new BitSet(LIMIT);
139187
int script = UCharacter.getIntPropertyValue(cp, UProperty.SCRIPT);
@@ -143,7 +191,7 @@ private BitSet getActualScripts(int cp) {
143191
}
144192

145193
public boolean filterTable(List<Set<String>> table) {
146-
194+
147195
// We make one pass forward and one backward, finding if each characters scripts
148196
// are compatible with the ones before.
149197
// We then make a second pass for the ones after.
@@ -248,7 +296,7 @@ private boolean contains(BitSet set1, BitSet set2) {
248296
}
249297

250298
public static class ScriptExtensions {
251-
299+
252300
public static final Comparator<BitSet> COMPARATOR = new Comparator<BitSet>() {
253301

254302
public int compare(BitSet o1, BitSet o2) {
@@ -260,13 +308,13 @@ public int compare(BitSet o1, BitSet o2) {
260308
return n1.compareToIgnoreCase(n2);
261309
}
262310
};
263-
311+
264312
private UnicodeMap<BitSet> scriptSpecials;
265-
313+
266314
public Collection<BitSet> getAvailableValues() {
267315
return scriptSpecials.getAvailableValues();
268316
}
269-
317+
270318
public UnicodeSet getSet(BitSet value) {
271319
return scriptSpecials.getSet(value);
272320
}
@@ -279,21 +327,21 @@ private static class MyHandler extends FileUtilities.SemiFileReader {
279327
public boolean handleLine(int start, int end, String[] items) {
280328
BitSet bitSet = new BitSet(LIMIT);
281329
for (String script : SPACES.split(items[1])) {
282-
int scriptCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script);
330+
int scriptCode = getScriptCode(script);
283331
bitSet.set(scriptCode);
284332
}
285333
map.putAll(start, end, bitSet);
286334
return true;
287335
}
288336
}
289-
337+
290338
public static ScriptExtensions make(String directory, String filename) {
291339
ScriptExtensions result = new ScriptExtensions();
292340
result.scriptSpecials = ((MyHandler) new MyHandler()
293341
.process(directory, filename)).map.freeze();
294342
return result;
295343
}
296-
344+
297345
public static ScriptExtensions make(Class aClass, String filename) {
298346
ScriptExtensions result = new ScriptExtensions();
299347
result.scriptSpecials = ((MyHandler) new MyHandler()
@@ -312,7 +360,7 @@ public void putAllInto(UnicodeMap<BitSet> char2scripts) {
312360
public static String getNames(BitSet value, int choice, String separator) {
313361
return getNames(value, choice, separator, new TreeSet<String>());
314362
}
315-
363+
316364
public static String getNames(BitSet value, int choice, String separator, Set<String> names) {
317365
names.clear();
318366
for (int i = value.nextSetBit(0); i >= 0; i = value.nextSetBit(i+1)) {
@@ -321,12 +369,24 @@ public static String getNames(BitSet value, int choice, String separator, Set<St
321369
return CollectionUtilities.join(names, separator).toString();
322370
}
323371
}
324-
325-
static ScriptExtensions scriptSpecials = ScriptExtensions.make(ScriptExtensions.class, "ScriptExtensions.txt");
372+
373+
static final class ScriptExtensionsHelper {
374+
ScriptExtensions scriptSpecials;
375+
376+
ScriptExtensionsHelper() {
377+
scriptSpecials = ScriptExtensions.make(ScriptExtensions.class, "ScriptExtensions.txt");
378+
}
379+
380+
static ScriptExtensionsHelper INSTANCE = new ScriptExtensionsHelper();
381+
}
382+
383+
static final ScriptExtensions getScriptSpecials() {
384+
return ScriptExtensionsHelper.INSTANCE.scriptSpecials;
385+
}
326386

327387
public static BitSet getScriptSpecials(int codepoint) {
328388
BitSet output = new BitSet(LIMIT);
329-
BitSet actualScripts = scriptSpecials.get(codepoint);
389+
BitSet actualScripts = getScriptSpecials().get(codepoint);
330390
if (actualScripts != null) {
331391
output.or(actualScripts);
332392
} else {
@@ -340,14 +400,14 @@ public static UnicodeMap<String> getScriptSpecialsNames() {
340400
UnicodeMap<String> result = new UnicodeMap<String>();
341401
Set<String> names = new TreeSet<String>(); // to alphabetize
342402

343-
for (BitSet value : scriptSpecials.getAvailableValues()) {
344-
result.putAll(scriptSpecials.getSet(value), ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names));
403+
for (BitSet value : getScriptSpecials().getAvailableValues()) {
404+
result.putAll(getScriptSpecials().getSet(value), ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names));
345405
}
346406
return result;
347407
}
348-
408+
349409
public static String[][] getScriptSpecialsAlternates() {
350-
Collection<BitSet> availableValues = scriptSpecials.getAvailableValues();
410+
Collection<BitSet> availableValues = getScriptSpecials().getAvailableValues();
351411
String[][] result = new String[availableValues.size()][];
352412
Set<String> names = new TreeSet<String>(); // to alphabetize
353413

@@ -387,7 +447,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) {
387447
addCompatible(UScript.LATIN, i);
388448
}
389449
// FALL THRU!
390-
case Highly_Restrictive:
450+
case Highly_Restrictive:
391451
addCompatible(UScript.LATIN, UScript.HAN, UScript.HIRAGANA, UScript.KATAKANA);
392452
//addCompatible(UScript.LATIN, HANT, UScript.HIRAGANA, UScript.KATAKANA);
393453
//addCompatible(UScript.LATIN, HANS, UScript.HIRAGANA, UScript.KATAKANA);
@@ -413,7 +473,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) {
413473
// fix the char2scripts mapping
414474

415475
if (specials == ScriptSpecials.on){
416-
scriptSpecials.putAllInto(char2scripts);
476+
getScriptSpecials().putAllInto(char2scripts);
417477
}
418478
}
419479

0 commit comments

Comments
 (0)