Skip to content

Commit da29127

Browse files
authored
CLDR-19098 Check for digits in main exemplars; fix Konkani (#5184)
1 parent 6dac2e4 commit da29127

File tree

4 files changed

+25
-28
lines changed

4 files changed

+25
-28
lines changed

common/main/kok.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
996996
</codePatterns>
997997
</localeDisplayNames>
998998
<characters>
999-
<exemplarCharacters>[़ ० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ं ँ ः अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क{क़} ख{ख़} ग{ग़} घ ङ च छ ज{ज़} झ ञ ट ठ ड{ड़} ढ{ढ़} ण त थ द ध न प फ{फ़} ब भ म य{य़} र ल व श ष स ह ळ ऽ ा ि ी ु ू ृ ॅ े ै ॉ ो ौ ्]</exemplarCharacters>
999+
<exemplarCharacters>[़ ॐ ं ँ ः अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क{क़} ख{ख़} ग{ग़} घ ङ च छ ज{ज़} झ ञ ट ठ ड{ड़} ढ{ढ़} ण त थ द ध न प फ{फ़} ब भ म य{य़} र ल व श ष स ह ळ ऽ ा ि ी ु ू ृ ॅ े ै ॉ ो ौ ्]</exemplarCharacters>
10001000
<exemplarCharacters type="auxiliary">[\u200C\u200D ॄ]</exemplarCharacters>
10011001
<exemplarCharacters type="index" draft="contributed">[अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ]</exemplarCharacters>
10021002
<exemplarCharacters type="numbers">[\- ‑ , . % ‰ + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९]</exemplarCharacters>

common/main/mn_Mong.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ CLDR data files are interpreted according to the LDML specification (http://unic
2525
</orientation>
2626
</layout>
2727
<characters>
28-
<exemplarCharacters>[᠐ ᠑ ᠒ ᠓ ᠔ ᠕ ᠖ ᠗ ᠘ ᠙ ᠠ ᠡ ᠢ ᠣ ᠤ ᠥ ᠦ ᠧ ᠨ ᠩ ᠪ ᠫ ᠬ ᠭ ᠮ ᠯ ᠰ ᠱ ᠲ ᠳ ᠴ ᠵ ᠶ ᠷ ᠸ ᠹ ᠺ ᠻ ᠼ ᠽ ᠾ ᠿ ᡀ ᡁ ᡂ]</exemplarCharacters>
28+
<exemplarCharacters>[ᠠ ᠡ ᠢ ᠣ ᠤ ᠥ ᠦ ᠧ ᠨ ᠩ ᠪ ᠫ ᠬ ᠭ ᠮ ᠯ ᠰ ᠱ ᠲ ᠳ ᠴ ᠵ ᠶ ᠷ ᠸ ᠹ ᠺ ᠻ ᠼ ᠽ ᠾ ᠿ ᡀ ᡁ ᡂ]</exemplarCharacters>
2929
<exemplarCharacters type="auxiliary">↑↑↑</exemplarCharacters>
3030
<exemplarCharacters type="index" draft="unconfirmed">[ᠠ ᠡ ᠢ ᠣ ᠤ ᠥ ᠦ ᠧ ᠨ ᠩ ᠪ ᠫ ᠬ ᠭ ᠮ ᠯ ᠰ ᠱ ᠲ ᠳ ᠴ ᠵ ᠶ ᠷ ᠸ ᠹ ᠺ ᠻ ᠼ ᠽ ᠾ ᠿ ᡀ ᡁ ᡂ]</exemplarCharacters>
31+
<exemplarCharacters type="numbers" draft="unconfirmed">[᠐ ᠑ ᠒ ᠓ ᠔ ᠕ ᠖ ᠗ ᠘ ᠙]</exemplarCharacters>
3132
</characters>
3233
<numbers>
3334
<defaultNumberingSystem>↑↑↑</defaultNumberingSystem>

common/main/pa.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/
10301030
</codePatterns>
10311031
</localeDisplayNames>
10321032
<characters>
1033-
<exemplarCharacters>[ੱ {ੰਂ} ਼ ੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ੴ ੳ ਉ ਊ ਓ ਅ ਆ ਐ ਔ ੲ ਇ ਈ ਏ ਸ{ਸ਼} ਹ ਕ ਖ{ਖ਼} ਗ{ਗ਼} ਘ ਙ ਚ ਛ ਜ{ਜ਼} ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ{ਫ਼} ਬ ਭ ਮ ਯ ਰ ਲ{ਲ਼} ਵ ੜ ੍ ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ]</exemplarCharacters>
1033+
<exemplarCharacters>[ੱ {ੰਂ} ਼ ੴ ੳ ਉ ਊ ਓ ਅ ਆ ਐ ਔ ੲ ਇ ਈ ਏ ਸ{ਸ਼} ਹ ਕ ਖ{ਖ਼} ਗ{ਗ਼} ਘ ਙ ਚ ਛ ਜ{ਜ਼} ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ{ਫ਼} ਬ ਭ ਮ ਯ ਰ ਲ{ਲ਼} ਵ ੜ ੍ ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ]</exemplarCharacters>
10341034
<exemplarCharacters type="auxiliary">[\u200C\u200Dਃ]</exemplarCharacters>
10351035
<exemplarCharacters type="index">[ੳ ਅ ੲ ਸ{ਸ਼} ਹ ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ੜ]</exemplarCharacters>
10361036
<exemplarCharacters type="numbers">[\- ‑ , . % ‰ + 0੦ 1੧ 2੨ 3੩ 4੪ 5੫ 6੬ 7੭ 8੮ 9੯]</exemplarCharacters>

tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckExemplars.java

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@
1313
import java.util.List;
1414
import java.util.Locale;
1515
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
16-
import org.unicode.cldr.util.CLDRConfig;
1716
import org.unicode.cldr.util.CLDRFile;
1817
import org.unicode.cldr.util.ComparatorUtilities;
1918
import org.unicode.cldr.util.Factory;
2019
import org.unicode.cldr.util.SimpleUnicodeSetFormatter;
21-
import org.unicode.cldr.util.SupplementalDataInfo;
2220
import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
2321
import org.unicode.cldr.util.XPathParts;
2422

@@ -30,7 +28,6 @@ public class CheckExemplars extends FactoryCheckCLDR {
3028
"quotationStart", "quotationEnd",
3129
"alternateQuotationStart", "alternateQuotationEnd"
3230
};
33-
static final SupplementalDataInfo SUP = CLDRConfig.getInstance().getSupplementalDataInfo();
3431

3532
Collator col;
3633
boolean isRoot;
@@ -88,6 +85,12 @@ public class CheckExemplars extends FactoryCheckCLDR {
8885
.removeAll(new UnicodeSet("[[:Uppercase:]-[\u0130]]"))
8986
.freeze();
9087

88+
private static final UnicodeSet ALLOWED_IN_NUMBERS_NOT_IN_MAIN =
89+
new UnicodeSet("[[:Numeric_Type=Decimal:]]").freeze();
90+
91+
private static final UnicodeSet ALLOWED_IN_MAIN =
92+
new UnicodeSet(AllowedInExemplars).removeAll(ALLOWED_IN_NUMBERS_NOT_IN_MAIN).freeze();
93+
9194
public static final UnicodeSet ALLOWED_IN_PUNCTUATION =
9295
new UnicodeSet("[[:P:][:S:]-[:Sc:]]").freeze();
9396

@@ -102,7 +105,10 @@ public class CheckExemplars extends FactoryCheckCLDR {
102105
.freeze();
103106

104107
public enum ExemplarType {
105-
main(AllowedInExemplars, "(specific-script - uppercase - invisibles + \u0130)", true),
108+
main(
109+
ALLOWED_IN_MAIN,
110+
"(specific-script - uppercase - invisibles - numbers + \u0130)",
111+
true),
106112
auxiliary(ALLOWED_IN_AUX, "(specific-script - uppercase - invisibles + \u0130)", true),
107113
punctuation(ALLOWED_IN_PUNCTUATION, "punctuation", false),
108114
punctuation_auxiliary(ALLOWED_IN_PUNCTUATION, "punctuation-auxiliary", false),
@@ -178,9 +184,9 @@ public CheckCLDR handleSetCldrFileToCheck(
178184
public CheckCLDR handleCheck(
179185
String path, String fullPath, String value, Options options, List<CheckStatus> result) {
180186
if (fullPath == null) return this; // skip paths that we don't have
181-
if (path.indexOf("/exemplarCharacters") < 0) {
187+
if (!path.contains("/exemplarCharacters")) {
182188
if (path.contains("parseLenient")) {
183-
checkParse(path, fullPath, value, options, result);
189+
checkParse(path, value, result);
184190
}
185191
return this;
186192
}
@@ -205,7 +211,7 @@ public CheckCLDR handleCheck(
205211
new UnicodeSet(mainSet)
206212
.retainAll(auxiliarySet)
207213
.removeAll(HangulSyllables);
208-
if (overlap.size() != 0) {
214+
if (!overlap.isEmpty()) {
209215
String fixedExemplar1 = rawFormatter.format(overlap);
210216
result.add(
211217
new CheckStatus()
@@ -278,7 +284,7 @@ public CheckCLDR handleCheck(
278284

279285
// check for consistency with RTL
280286

281-
Boolean localeIsRTL = false;
287+
boolean localeIsRTL = false;
282288
String charOrientation =
283289
getResolvedCldrFileToCheck()
284290
.getStringValue("//ldml/layout/orientation/characterOrder");
@@ -310,8 +316,7 @@ public CheckCLDR handleCheck(
310316
return this;
311317
}
312318

313-
private void checkParse(
314-
String path, String fullPath, String value, Options options, List<CheckStatus> result) {
319+
private void checkParse(String path, String value, List<CheckStatus> result) {
315320
if (value == null) {
316321
CheckStatus message =
317322
new CheckStatus()
@@ -351,11 +356,7 @@ private void checkParse(
351356
.setCause(this)
352357
.setMainType(CheckStatus.errorType)
353358
.setSubtype(Subtype.badParseLenient)
354-
.setMessage(
355-
e.toString()
356-
+ (e.getMessage() == null
357-
? ""
358-
: ": " + e.getMessage()));
359+
.setMessage(e + (e.getMessage() == null ? "" : ": " + e.getMessage()));
359360
result.add(message);
360361
}
361362
}
@@ -418,10 +419,7 @@ private void checkMixedScripts(String title, UnicodeSet set, List<CheckStatus> r
418419
.setCause(this)
419420
.setMainType(CheckStatus.errorType)
420421
.setSubtype(Subtype.illegalExemplarSet)
421-
.setMessage(
422-
"{0} exemplars contain multiple scripts: {1}",
423-
new Object[] {title, scripts}));
424-
return;
422+
.setMessage("{0} exemplars contain multiple scripts: {1}", title, scripts));
425423
}
426424

427425
private void checkExemplar(String v, List<CheckStatus> result, ExemplarType exemplarType) {
@@ -472,15 +470,13 @@ private void checkExemplar(String v, List<CheckStatus> result, ExemplarType exem
472470
for (String s : remainder0) {
473471
if (Character.codePointCount(s, 0, s.length()) == 1) {
474472
remainder.add(s);
475-
} else {
476-
// just check normalization
477473
}
474+
// else just check normalization
478475
}
479476

480477
// after a first check, we check again in case we flattened
481478

482-
if (remainder.size() != 0) {
483-
fixedExemplar1 = displayFormatter.format(exemplar1);
479+
if (!remainder.isEmpty()) {
484480
result.add(
485481
new CheckStatus()
486482
.setCause(this)
@@ -490,13 +486,13 @@ private void checkExemplar(String v, List<CheckStatus> result, ExemplarType exem
490486
"Should be limited to "
491487
+ exemplarType.message
492488
+ "; thus not contain: \u200E{0}\u200E",
493-
new Object[] {remainder}));
489+
remainder));
494490
}
495491
}
496492

497493
// now check for empty
498494

499-
if (!isRoot && exemplar1.size() == 0) {
495+
if (!isRoot && exemplar1.isEmpty()) {
500496
switch (exemplarType) {
501497
// case currencySymbol: // ok if empty
502498
// break;

0 commit comments

Comments
 (0)