Skip to content

Commit 7631d20

Browse files
authored
Fix remaining issues with multivalued (#618)
* Fix remaining issues with multivalued * Fixed some review issues
1 parent 8846d0d commit 7631d20

File tree

5 files changed

+110
-18
lines changed

5 files changed

+110
-18
lines changed

UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import com.ibm.icu.lang.UScript;
88
import com.ibm.icu.text.Normalizer;
99
import com.ibm.icu.text.UnicodeSet;
10+
import java.util.ArrayList;
1011
import java.util.BitSet;
1112
import java.util.Collection;
1213
import java.util.Comparator;
@@ -20,6 +21,7 @@
2021
import java.util.concurrent.atomic.AtomicInteger;
2122
import java.util.logging.Logger;
2223
import java.util.regex.Pattern;
24+
import org.unicode.props.UnicodeProperty;
2325

2426
/**
2527
* Class for testing whether strings have allowed combinations of multiple scripts.
@@ -319,6 +321,7 @@ public static class ScriptExtensions {
319321
public static final Comparator<BitSet> COMPARATOR =
320322
new Comparator<BitSet>() {
321323

324+
@Override
322325
public int compare(BitSet o1, BitSet o2) {
323326
int diff = o1.cardinality() - o2.cardinality();
324327
if (diff != 0) return diff;
@@ -344,6 +347,7 @@ private static class MyHandler extends FileUtilities.SemiFileReader {
344347

345348
UnicodeMap<BitSet> map = new UnicodeMap<BitSet>();
346349

350+
@Override
347351
public boolean handleLine(int start, int end, String[] items) {
348352
BitSet bitSet = new BitSet(LIMIT);
349353
for (String script : SPACES.split(items[1])) {
@@ -429,21 +433,39 @@ public static UnicodeMap<String> getScriptSpecialsNames() {
429433
return result;
430434
}
431435

432-
public static String[][] getScriptSpecialsAlternates() {
436+
public static String[][] getScriptSpecialsAlternates(UnicodeProperty scriptProp) {
433437
Collection<BitSet> availableValues = getScriptSpecials().getAvailableValues();
434-
String[][] result = new String[availableValues.size()][];
438+
List<String[]> result = new ArrayList<>();
435439
Set<String> names = new TreeSet<String>(); // to alphabetize
436440

437-
int i = 0;
438441
for (BitSet value : availableValues) {
439442
String baseName =
440443
ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names);
441444
String altName =
442445
ScriptExtensions.getNames(value, UProperty.NameChoice.SHORT, ",", names);
443446
String[] row = {baseName, altName};
444-
result[i++] = row;
447+
result.add(row);
445448
}
446-
return result;
449+
450+
// Get the single values, and build alternate values for the property, for isValidValue
451+
// of a single script (eg Arab)
452+
List<String> values = scriptProp.getAvailableValues();
453+
for (String value : values) {
454+
List<String> row = new ArrayList<>();
455+
row.add(value);
456+
for (String alias : scriptProp.getValueAliases(value)) {
457+
if (!alias.equals(value)) {
458+
row.add(alias);
459+
}
460+
}
461+
// duplicate it whenever singular, because the tooling expects at least 2 values (ugg)
462+
if (row.size() == 1) {
463+
row.add(value);
464+
}
465+
result.add(row.toArray(new String[row.size()]));
466+
}
467+
468+
return result.toArray(new String[result.size()][]);
447469
}
448470

449471
private ScriptTester(UnicodeMap<BitSet> character_scripts) {

UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ public MySymbolTable() {
150150
// return null;
151151
// }
152152

153+
@Override
153154
public boolean applyPropertyAlias(
154155
String propertyName, String propertyValue, UnicodeSet result) {
155156
boolean status = false;
@@ -201,9 +202,11 @@ public boolean applyPropertyAlias(
201202
}
202203
;
203204
if (!status) {
204-
try {
205-
status = applyPropertyAlias0(prop, "No", result, !invert);
206-
} catch (Exception e) {
205+
if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) {
206+
try {
207+
status = applyPropertyAlias0(prop, "No", result, !invert);
208+
} catch (Exception e) {
209+
}
207210
}
208211
;
209212
if (!status) {
@@ -336,6 +339,7 @@ public ComparisonMatcher(String pattern, Relation comparator) {
336339
this.pattern = pattern;
337340
}
338341

342+
@Override
339343
public boolean test(String value) {
340344
int comp = comparator.compare(pattern, value.toString());
341345
switch (relation) {
@@ -352,6 +356,7 @@ public boolean test(String value) {
352356
}
353357
}
354358

359+
@Override
355360
public PatternMatcher set(String pattern) {
356361
this.pattern = pattern;
357362
return this;

UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,15 +269,18 @@ public String transform(Integer source) {
269269

270270
// set up the special script property
271271
UnicodeProperty scriptProp = base.getProperty("sc");
272+
273+
// Compose the function and add
272274
UnicodeMap<String> specialMap = new UnicodeMap<String>();
273-
specialMap.putAll(scriptProp.getUnicodeMap());
275+
specialMap.putAll(
276+
scriptProp.getUnicodeMap()); // if there is no value, use the script property
274277
specialMap.putAll(ScriptTester.getScriptSpecialsNames());
275278
add(
276279
new UnicodeProperty.UnicodeMapProperty()
277280
.set(specialMap)
278281
.setMain("Script_Extensions", "scx", UnicodeProperty.ENUMERATED, "1.1")
279282
.addValueAliases(
280-
ScriptTester.getScriptSpecialsAlternates(),
283+
ScriptTester.getScriptSpecialsAlternates(scriptProp),
281284
AliasAddAction.IGNORE_IF_MISSING)
282285
.setMultivalued(true));
283286

@@ -359,6 +362,7 @@ private void addExamplarProperty(
359362

360363
// convert to UnicodeMap
361364
UnicodeMap<String> unicodeMap = new UnicodeMap<>();
365+
unicodeMap.putAll(0, 0x10FFFF, ""); // default is empty string
362366
for (Entry<Integer, Collection<String>> entry : data.asMap().entrySet()) {
363367
String value = JOIN_COMMAS.join(entry.getValue()).intern();
364368
unicodeMap.put(entry.getKey(), value);
@@ -383,11 +387,7 @@ private void addExamplarProperty(
383387
add(
384388
new UnicodeProperty.UnicodeMapProperty()
385389
.set(unicodeMap)
386-
.setMain(
387-
propertyName,
388-
propertyAbbreviation,
389-
UnicodeProperty.ENUMERATED,
390-
"1.1")
390+
.setMain(propertyName, propertyAbbreviation, UnicodeProperty.STRING, "1.1")
391391
.addValueAliases(locales, AliasAddAction.ADD_MAIN_ALIAS)
392392
.setMultivalued(true));
393393
}

UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,23 @@
33
import com.ibm.icu.text.UnicodeSet;
44
import org.junit.jupiter.api.Test;
55
import org.unicode.jsp.UnicodeSetUtilities;
6+
import org.unicode.jsp.XPropertyFactory;
7+
import org.unicode.props.UnicodeProperty;
68
import org.unicode.unittest.TestFmwkMinusMinus;
79

810
public class TestMultivalued extends TestFmwkMinusMinus {
11+
12+
private static final boolean DEBUG = false;
13+
14+
UnicodeProperty exemplarProp = XPropertyFactory.make().getProperty("exemplar");
15+
UnicodeProperty scxProp = XPropertyFactory.make().getProperty("scx");
16+
917
@Test
1018
public void TestScx1Script() {
19+
if (DEBUG) {
20+
String x = scxProp.getValue('।');
21+
}
22+
1123
String unicodeSetString = "\\p{scx=deva}";
1224
UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
1325

@@ -20,6 +32,20 @@ public void TestScx1Script() {
2032
parsed.containsAll(mustNotContain));
2133
}
2234

35+
@Test
36+
public void TestScx1ScriptB() {
37+
String unicodeSetString = "\\p{scx=Arab}";
38+
UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
39+
40+
UnicodeSet mustContain = new UnicodeSet("[،ء]"); // one character single script, one multi
41+
assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain));
42+
43+
UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bangla character
44+
assertFalse(
45+
unicodeSetString + " !contains " + mustNotContain,
46+
parsed.containsAll(mustNotContain));
47+
}
48+
2349
@Test
2450
public void TestScxMulti() {
2551
String unicodeSetString = "\\p{scx=beng,deva}";
@@ -37,6 +63,10 @@ public void TestScxMulti() {
3763

3864
@Test
3965
public void TestExemplars() {
66+
if (DEBUG) {
67+
String x = exemplarProp.getValue('æ');
68+
}
69+
4070
String unicodeSetString = "\\p{exem=da}";
4171
UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
4272

@@ -48,4 +78,31 @@ public void TestExemplars() {
4878
unicodeSetString + " !contains " + mustNotContain,
4979
parsed.containsAll(mustNotContain));
5080
}
81+
82+
@Test
83+
public void TestEmpty() {
84+
assertEquals("exemplar(0x0000)", "", exemplarProp.getValue(0x0000));
85+
assertEquals("exemplar(α)", "el", exemplarProp.getValue('α'));
86+
87+
UnicodeSet exem = UnicodeSetUtilities.parseUnicodeSet("\\p{exem}");
88+
assertTrue("\\p{exem} contains 0", exem.contains(0x0000));
89+
assertFalse("\\p{exem} contains α", exem.contains('α'));
90+
UnicodeSet exem3 = UnicodeSetUtilities.parseUnicodeSet("\\p{exem=el}");
91+
assertFalse("\\p{exem=el} contains 0", exem3.contains(0x0000));
92+
assertTrue("\\p{exem=el} contains α", exem3.contains('α'));
93+
94+
String unicodeSetString = "[\\p{Greek}&\\p{exem}]";
95+
UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
96+
97+
String first = parsed.iterator().next();
98+
String firstValue = exemplarProp.getValue(first.codePointAt(0));
99+
assertEquals(unicodeSetString, "", firstValue);
100+
101+
String unicodeSetString2 = "[\\p{Greek}&\\P{exem}]";
102+
UnicodeSet parsed2 = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString2);
103+
104+
String first2 = parsed2.iterator().next();
105+
String firstValue2 = exemplarProp.getValue(first2.codePointAt(0));
106+
assertEquals(unicodeSetString2, "el", firstValue2);
107+
}
51108
}

unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,14 @@ public UnicodeProperty setMultivalued(boolean value) {
190190
(1 << ENUMERATED)
191191
| (1 << EXTENDED_ENUMERATED)
192192
| (1 << CATALOG)
193-
| (1 << EXTENDED_CATALOG);
193+
| (1 << EXTENDED_CATALOG),
194+
BINARY_OR_ENUMERATED_OR_CATALOG_MASK =
195+
(1 << ENUMERATED)
196+
| (1 << EXTENDED_ENUMERATED)
197+
| (1 << CATALOG)
198+
| (1 << EXTENDED_CATALOG)
199+
| (1 << BINARY)
200+
| (1 << EXTENDED_BINARY);
194201

195202
private static final String[] TYPE_NAMES = {
196203
"Unknown",
@@ -405,7 +412,7 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
405412
public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
406413
if (result == null) result = new UnicodeSet();
407414
boolean uniformUnassigned = hasUniformUnassigned();
408-
if (isType(STRING_OR_MISC_MASK)) {
415+
if (isType(STRING_OR_MISC_MASK) && !isMultivalued) {
409416
for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned);
410417
usi.next(); ) { // int i = 0; i <= 0x10FFFF; ++i
411418
int i = usi.codepoint;
@@ -423,7 +430,8 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
423430
while (it.hasNext()) {
424431
String value = it.next();
425432
temp.clear();
426-
Iterator<String> it2 = getValueAliases(value, temp).iterator();
433+
final List<String> valueAliases = getValueAliases(value, temp);
434+
Iterator<String> it2 = valueAliases.iterator();
427435
while (it2.hasNext()) {
428436
String value2 = it2.next();
429437
// System.out.println("Values:" + value2);

0 commit comments

Comments
 (0)