From 2ccc2bbff590964a37090dd84b084dba13cdd838 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sat, 8 Nov 2025 18:40:59 -0800 Subject: [PATCH 1/4] CLDR-19110 New Growth Chart --- common/dtd/ldmlSupplemental.dtd | 1 - .../org/unicode/cldr/tool/ChartDelta.java | 7 +- .../org/unicode/cldr/tool/ChartDtdDelta.java | 43 ++++-- .../cldr/tool/GenerateFullCldrGrowth.java | 136 ++++++++++++++++++ 4 files changed, 175 insertions(+), 12 deletions(-) create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd index e968cae5a7a..003d24306c2 100644 --- a/common/dtd/ldmlSupplemental.dtd +++ b/common/dtd/ldmlSupplemental.dtd @@ -1229,7 +1229,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic - diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java index b6f3382967f..57fd5b671c9 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java @@ -298,8 +298,11 @@ private void showTotals() { for (Entry> entry : fileCounters.entrySet()) { showTotal(pw, entry.getKey(), entry.getValue()); } - for (String s : badHeaders) { - pw.println(s); + if (!badHeaders.isEmpty()) { + System.out.println("Bad headers: PageId.Unknown with PathHeader"); + for (String s : badHeaders) { + System.out.println(s); + } } // pw.println("# EOF"); } catch (IOException e) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDtdDelta.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDtdDelta.java index 079b896d475..9532ffe19c0 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDtdDelta.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDtdDelta.java @@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import com.ibm.icu.impl.Utility; import com.ibm.icu.util.VersionInfo; import java.io.FileNotFoundException; @@ -21,6 +22,7 @@ import java.util.Map; import java.util.Set; import java.util.regex.Matcher; +import java.util.stream.Collectors; import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.tool.ToolConstants.ChartStatus; import org.unicode.cldr.util.CLDRConfig; @@ -31,6 +33,7 @@ import org.unicode.cldr.util.DtdData.AttributeStatus; import org.unicode.cldr.util.DtdData.Element; import org.unicode.cldr.util.DtdType; +import org.unicode.cldr.util.Joiners; import org.unicode.cldr.util.SupplementalDataInfo; /** @@ -42,10 +45,10 @@ public class ChartDtdDelta extends Chart { private static final Splitter SPLITTER_SPACE = Splitter.on(' '); - private static final String NEW_PREFIX = "+"; + private static final String NEW_PREFIX = "⨁"; private static final String DEPRECATED_PREFIX = "⊖"; - private static final String UNDEPRECATED_PREFIX = "⊙"; // no occurances yet + private static final String UNDEPRECATED_PREFIX = "⊙"; // no occurrences yet private static final String ORDERED_SIGN = "⇣"; private static final String UNORDERED_SIGN = "⇟"; @@ -207,7 +210,15 @@ public void writeContents(FormattedFileWriter pw) throws IOException { try (PrintWriter tsvFile = FileUtilities.openUTF8Writer( CLDRPaths.CHART_DIRECTORY + "/tsv/", "dtd_deltas.tsv")) { - tablePrinter.toTsv(tsvFile); + for (DiffElement datum : data) { + tsvFile.println( + Joiners.TAB.join( + datum.getVersionString(), + datum.dtdType, + datum.newPath, + datum.newElement, + Joiners.SP.join(datum.newAttributes))); + } } } @@ -271,7 +282,8 @@ private void checkNames( // indication if (seen.contains(element)) { if (showAnyway) { - addData(dtdCurrent, NEW_PREFIX + name, version, newPath, OMITTED_ATTRIBUTES); + Set foo = Set.of(); + addData(dtdCurrent, NEW_PREFIX + name, version, newPath, OMITTED_ATTRIBUTES, foo); } return; } @@ -302,7 +314,8 @@ private void checkNames( prefix + name + (ordered ? ORDERED_SIGN : ""), version, newPath, - attributeNames); + attributeNames, + element.getAttributes().keySet()); } else { oldElement = oldNameToElement.get(name); boolean oldOrdered = oldElement.isOrdered(); @@ -345,7 +358,10 @@ private void checkNames( deprecatedStatus + previewStatus + name + orderingStatus, version, newPath, - attributeNames); + attributeNames, + Sets.difference( + element.getAttributes().keySet(), + oldElement.getAttributes().keySet())); } } if (element.getName().equals("coordinateUnit")) { @@ -389,13 +405,19 @@ private static class DiffElement { final String newPath; final String newElement; final String attributeNames; + final Set newAttributes; public DiffElement( DtdData dtdCurrent, String version, String newPath, String newElement, - Set attributeNames2) { + Set attributeNames2, + Set newAttributes) { + this.newAttributes = + newAttributes.stream() + .map(x -> NEW_PREFIX + x.getName()) + .collect(Collectors.toSet()); isBeta = version.endsWith("β"); try { this.version = @@ -450,8 +472,11 @@ private void addData( String element, String prefix, String newPath, - Set attributeNames) { - DiffElement item = new DiffElement(dtdCurrent, prefix, newPath, element, attributeNames); + Set attributeNames, + Set newAttributes) { + DiffElement item = + new DiffElement( + dtdCurrent, prefix, newPath, element, attributeNames, newAttributes); data.add(item); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java new file mode 100644 index 00000000000..26899b7c341 --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java @@ -0,0 +1,136 @@ +package org.unicode.cldr.tool; + +import com.google.common.base.Objects; +import com.google.common.collect.Sets; +import java.io.IOException; +import java.nio.file.*; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRFile.DraftStatus; +import org.unicode.cldr.util.CLDRPaths; +import org.unicode.cldr.util.Counter; +import org.unicode.cldr.util.DtdData; +import org.unicode.cldr.util.DtdData.Attribute; +import org.unicode.cldr.util.DtdData.AttributeStatus; +import org.unicode.cldr.util.DtdType; +import org.unicode.cldr.util.Joiners; +import org.unicode.cldr.util.XPathParts; + +public class GenerateFullCldrGrowth { + + enum ChangeType { + same, + added, + deleted, + changed; + + static ChangeType getDiff(String current, String last) { + return Objects.equal(current, last) + ? ChangeType.same + : current == null + ? ChangeType.deleted + : last == null ? ChangeType.added : ChangeType.changed; + } + } + + public static void main(String[] args) throws IOException { + Path archiveDir = + Paths.get(CLDRPaths.ARCHIVE_DIRECTORY); // Replace with your directory path + + // get subdirs in alpha order + List orderedCldrDirs = + Files.list(archiveDir) + .filter( + x -> + x.getFileName().toString().startsWith("cldr-") + && x.getFileName().toString().compareTo("cldr-2") + >= 0) + .sorted(Comparator.comparing(Path::getFileName)) + .collect(Collectors.toList()); + + Path previousRelease = null; + System.out.println("Version\t" + Joiners.TAB.join(ChangeType.values())); + for (Path release : orderedCldrDirs) { + if (previousRelease != null) { + compare(release, previousRelease); + } + previousRelease = release; + } + } + + private static void compare(Path release, Path previousRelease) throws IOException { + Counter changes = new Counter<>(); + try (Stream stream = Files.walk(release.resolve("common"))) { + stream.filter( + x -> + x.getFileName().toString().endsWith(".xml") + && !x.toString().contains("/collation/")) // && + // x.toString().contains("/annotations/") + .forEach(x -> getChanges(changes, x, replaceBase(x, release, previousRelease))); + } + System.out.println( + release.getFileName() + + "\t" + + List.of(ChangeType.values()).stream() + .map(x -> String.valueOf(changes.get(x))) + .collect(Collectors.joining("\t"))); + } + + private static Path replaceBase(Path x, Path xPrefix, Path otherPrefix) { + Path relativePath = xPrefix.relativize(x); + return otherPrefix.resolve(relativePath); + } + + private static void getChanges(Counter changes, Path x, Path previousRelease) { + CLDRFile current = + CLDRFile.loadFromFile( + x.toFile(), x.getFileName().toString(), DraftStatus.contributed); + CLDRFile last = + !previousRelease.toFile().exists() + ? null + : CLDRFile.loadFromFile( + previousRelease.toFile(), + previousRelease.getFileName().toString(), + DraftStatus.contributed); + boolean mayHaveValueAttributes = current.getDtdType() != DtdType.ldml; + DtdData dtdData = current.getDtdData(); + // could optimize by finding elements with value attributes and caching + for (String currentPath : current) { + String currentValue = current.getStringValue(currentPath); + String lastValue = last == null ? null : last.getStringValue(currentPath); + ChangeType changeType = ChangeType.getDiff(currentValue, lastValue); + changes.add(changeType, 1); + if (mayHaveValueAttributes) { + XPathParts currentParts = + XPathParts.getFrozenInstance(current.getFullXPath(currentPath)); + XPathParts lastParts = + last == null + ? null + : XPathParts.getFrozenInstance(last.getFullXPath(currentPath)); + for (int i = 0; i < currentParts.size(); ++i) { + String element = currentParts.getElement(i); + Map currentAttributes = currentParts.getAttributes(i); + Map lastAttributes = + lastParts == null ? Collections.emptyMap() : lastParts.getAttributes(i); + for (String attribute : + Sets.union(currentAttributes.keySet(), lastAttributes.keySet())) { + Attribute attributeInfo = dtdData.getAttribute(element, attribute); + if (attributeInfo != null + && attributeInfo.attributeStatus == AttributeStatus.value) { + String currentAttributeValue = currentAttributes.get(attribute); + String lastAttributeValue = lastAttributes.get(attribute); + changes.add( + ChangeType.getDiff(currentAttributeValue, lastAttributeValue), + 1); + } + } + } + } + } + } +} From 3a26d446d96ead115a9911af70997fdadce66451 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 10 Nov 2025 12:06:59 -0800 Subject: [PATCH 2/4] CLDR-19110 Updates --- common/dtd/ldmlSupplemental.dtd | 7 +- .../org/unicode/cldr/tool/CldrVersion.java | 135 +++++++++++------- .../cldr/tool/GenerateFullCldrGrowth.java | 131 ++++++++++++----- 3 files changed, 182 insertions(+), 91 deletions(-) diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd index 003d24306c2..477062127e5 100644 --- a/common/dtd/ldmlSupplemental.dtd +++ b/common/dtd/ldmlSupplemental.dtd @@ -7,6 +7,11 @@ CLDR data files are interpreted according to the LDML specification (http://unic + + + + + @@ -300,7 +305,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic - @@ -313,7 +317,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic - diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CldrVersion.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CldrVersion.java index ab7f4189e9f..0742d3bcd79 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CldrVersion.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CldrVersion.java @@ -5,6 +5,12 @@ import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; import com.ibm.icu.util.VersionInfo; import java.io.File; +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.EnumSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -25,62 +31,63 @@ // TODO merge with all other copies of the CLDR version and replace with supplemental metadata, // CLDR-9149 public enum CldrVersion { - unknown, - v1_1, - v1_1_1, - v1_2, - v1_3, - v1_4, - v1_4_1, - v1_5_0_1, - v1_5_1, - v1_6_1, - v1_7_2, - v1_8_1, - v1_9_1, - v2_0_1, - v21_0, - v22_1, - v23_1, - v24_0, - v25_0, - v26_0, - v27_0, - v28_0, - v29_0, - v30_0, - v31_0, - v32_0, - v33_0, - v33_1, - v34_0, - v35_0, - v35_1, - v36_0, - v36_1, - v37_0, - v38_0, - v38_1, - v39_0, - v40_0, - v41_0, - v42_0, - v43_0, - v44_0, - v44_1, - v45_0, - v46_0, - v46_1, - v47_0, - v48_0, + unknown(""), + v1_1("2004-06-08"), + v1_1_1("2004-07-29"), + v1_2("2004-11-04"), + v1_3("2005-06-02"), + v1_4("2006-07-17"), + v1_4_1("2006-11-03"), + v1_5_0_1("2007-07-31"), + v1_5_1("2007-12-21"), + v1_6_1("2008-07-23"), + v1_7_2("2009-12-10"), + v1_8_1("2010-04-29"), + v1_9_1("2011-03-11"), + v2_0_1("2011-07-18"), + v21_0("2012-02-10"), + v22_1("2012-10-26"), + v23_1("2013-05-15"), + v24_0("2013-09-18"), + v25_0("2014-03-19"), + v26_0("2014-09-18"), + v27_0("2015-03-19"), + v28_0("2015-09-17"), + v29_0("2016-03-16"), + v30_0("2016-10-05"), + v31_0("2017-03-20"), + v32_0("2017-11-01"), + v33_0("2018-03-26"), + v33_1("2018-06-20"), + v34_0("2018-10-15"), + v35_0("2019-03-27"), + v35_1("2019-04-17"), + v36_0("2019-10-04"), + v36_1("2020-03-11"), + v37_0("2020-04-23"), + v38_0("2020-10-28"), + v38_1("2020-12-17"), + v39_0("2021-04-07"), + v40_0("2021-10-27"), + v41_0("2022-04-06"), + v42_0("2022-10-19"), + v43_0("2023-04-12"), + v44_0("2023-10-31"), + v44_1("2023-12-13"), + v45_0("2024-04-17"), + v46_0("2024-10-24"), + v46_1("2024-12-18"), + v47_0("2025-03-13"), + v48_0("2025-10-29"), /** * @see CLDRFile#GEN_VERSION */ - baseline; + baseline(""); private final String baseDirectory; private final String dotName; private final VersionInfo versionInfo; + private final Instant date; /** * Get the closest available version (successively dropping lower-significance values) We do @@ -118,6 +125,16 @@ public static CldrVersion from(String versionString) { : versionString); } + public Instant getDate() { + return date; + } + + static final ZoneId Z = ZoneId.of("GMT"); + + public int getYear() { + return ZonedDateTime.ofInstant(date, Z).getYear(); + } + public VersionInfo getVersionInfo() { return versionInfo; } @@ -135,7 +152,7 @@ public boolean isOlderThan(CldrVersion other) { return compareTo(other) < 0; } - private CldrVersion() { + private CldrVersion(String date) { String oldName = name(); if (oldName.charAt(0) == 'v') { dotName = oldName.substring(1).replace('_', '.'); @@ -147,12 +164,14 @@ private CldrVersion() { final VersionInfo cldrVersion = VersionInfo.getInstance(CLDRFile.GEN_VERSION); versionInfo = "baseline".equals(oldName) ? cldrVersion : VersionInfo.getInstance(0); } + this.date = date.isEmpty() ? null : Instant.parse(date + "T00:00:00Z"); } public static final CldrVersion LAST_RELEASE_VERSION = values()[values().length - 2]; public static final List CLDR_VERSIONS_ASCENDING; public static final List CLDR_VERSIONS_DESCENDING; private static final Map versionInfoToCldrVersion; + public static final List LAST_RELEASE_EACH_YEAR; static { EnumSet temp = EnumSet.allOf(CldrVersion.class); @@ -176,6 +195,22 @@ private CldrVersion() { } } versionInfoToCldrVersion = ImmutableMap.copyOf(temp2); + + List lastReleaseEachYear = new ArrayList<>(); + int lastYear = -1; + ArrayList descending = new ArrayList<>(Arrays.asList(CldrVersion.values())); + Collections.reverse(descending); + for (CldrVersion v : descending) { + if (v == CldrVersion.baseline || v == CldrVersion.unknown) { + continue; + } + int year = v.getYear(); + if (year != lastYear) { + lastReleaseEachYear.add(v); + lastYear = year; + } + } + LAST_RELEASE_EACH_YEAR = List.copyOf(lastReleaseEachYear); } public List getPathsForFactory() { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java index 26899b7c341..4c7144a3ca7 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java @@ -5,9 +5,10 @@ import java.io.IOException; import java.nio.file.*; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.Stream; import org.unicode.cldr.util.CLDRFile; @@ -19,6 +20,7 @@ import org.unicode.cldr.util.DtdData.AttributeStatus; import org.unicode.cldr.util.DtdType; import org.unicode.cldr.util.Joiners; +import org.unicode.cldr.util.Splitters; import org.unicode.cldr.util.XPathParts; public class GenerateFullCldrGrowth { @@ -38,47 +40,72 @@ static ChangeType getDiff(String current, String last) { } } - public static void main(String[] args) throws IOException { - Path archiveDir = - Paths.get(CLDRPaths.ARCHIVE_DIRECTORY); // Replace with your directory path + static Path archiveDir = + Paths.get(CLDRPaths.ARCHIVE_DIRECTORY); // Replace with your directory path - // get subdirs in alpha order - List orderedCldrDirs = - Files.list(archiveDir) - .filter( - x -> - x.getFileName().toString().startsWith("cldr-") - && x.getFileName().toString().compareTo("cldr-2") - >= 0) - .sorted(Comparator.comparing(Path::getFileName)) - .collect(Collectors.toList()); + public static void main(String[] args) throws IOException { + System.out.println(CldrVersion.LAST_RELEASE_EACH_YEAR); - Path previousRelease = null; - System.out.println("Version\t" + Joiners.TAB.join(ChangeType.values())); - for (Path release : orderedCldrDirs) { - if (previousRelease != null) { - compare(release, previousRelease); + System.out.println("Version\tYear\t" + Joiners.TAB.join(ChangeType.values())); + CldrVersion nextVersion = null; + for (CldrVersion previousVersion : CldrVersion.LAST_RELEASE_EACH_YEAR) { + if (nextVersion != null) { + compare(nextVersion, previousVersion); } - previousRelease = release; + nextVersion = previousVersion; } } - private static void compare(Path release, Path previousRelease) throws IOException { + static final Set SKIP_COMMON_SUBDIRS = + Set.of( + "collation", + "annotations", + "annotationsDerived", + "casing", + "subdivisions", + "supplemental-temp"); + + private static void compare(CldrVersion nextVersion, CldrVersion previousVersion) + throws IOException { + // if (nextVersion.compareTo(CldrVersion.v2_0_1) > 0) { // for debugging + // return; + // } + Path release = archiveDir.resolve("cldr-" + nextVersion + "/common"); + Path previousRelease = archiveDir.resolve("cldr-" + previousVersion + "/common"); + int commonIndex = release.getNameCount(); + Map failures = new TreeMap<>(); + Counter changes = new Counter<>(); - try (Stream stream = Files.walk(release.resolve("common"))) { + try (Stream stream = + Files.walk(release).collect(Collectors.toList()).parallelStream()) { stream.filter( x -> x.getFileName().toString().endsWith(".xml") - && !x.toString().contains("/collation/")) // && - // x.toString().contains("/annotations/") - .forEach(x -> getChanges(changes, x, replaceBase(x, release, previousRelease))); + && !SKIP_COMMON_SUBDIRS.contains( + x.getName(commonIndex).toString())) + .forEach( + x -> { + String error = + getChanges( + changes, + x, + replaceBase(x, release, previousRelease)); + if (error != null) { + failures.put(x, error); + } + }); } System.out.println( - release.getFileName() + nextVersion + + "\t" + + nextVersion.getYear() + "\t" + List.of(ChangeType.values()).stream() .map(x -> String.valueOf(changes.get(x))) .collect(Collectors.joining("\t"))); + if (!failures.isEmpty()) { + System.out.println(failures); + } } private static Path replaceBase(Path x, Path xPrefix, Path otherPrefix) { @@ -86,25 +113,44 @@ private static Path replaceBase(Path x, Path xPrefix, Path otherPrefix) { return otherPrefix.resolve(relativePath); } - private static void getChanges(Counter changes, Path x, Path previousRelease) { - CLDRFile current = - CLDRFile.loadFromFile( - x.toFile(), x.getFileName().toString(), DraftStatus.contributed); - CLDRFile last = - !previousRelease.toFile().exists() - ? null - : CLDRFile.loadFromFile( - previousRelease.toFile(), - previousRelease.getFileName().toString(), - DraftStatus.contributed); + private static String getChanges(Counter changes, Path x, Path previousRelease) { + CLDRFile current = null; + CLDRFile last = null; + try { + current = + CLDRFile.loadFromFile( + x.toFile(), x.getFileName().toString(), DraftStatus.contributed); + last = + !previousRelease.toFile().exists() + ? null + : CLDRFile.loadFromFile( + previousRelease.toFile(), + previousRelease.getFileName().toString(), + DraftStatus.contributed); + } catch (Exception e) { + return e.getMessage(); + } boolean mayHaveValueAttributes = current.getDtdType() != DtdType.ldml; DtdData dtdData = current.getDtdData(); // could optimize by finding elements with value attributes and caching for (String currentPath : current) { String currentValue = current.getStringValue(currentPath); String lastValue = last == null ? null : last.getStringValue(currentPath); - ChangeType changeType = ChangeType.getDiff(currentValue, lastValue); - changes.add(changeType, 1); + if (currentPath.contains("/annotations/")) { + Set currentSet = getVBarSet(currentValue); + Set lastSet = getVBarSet(lastValue); + int sameCount = Sets.intersection(currentSet, lastSet).size(); + changes.add(ChangeType.same, sameCount); + int addCount = currentSet.size() - sameCount; + int deleteCount = lastSet.size() - sameCount; + int changeCount = Math.min(addCount, deleteCount); + changes.add(ChangeType.changed, addCount); + changes.add(ChangeType.added, addCount - changeCount); + changes.add(ChangeType.deleted, deleteCount - changeCount); + } else { + ChangeType changeType = ChangeType.getDiff(currentValue, lastValue); + changes.add(changeType, 1); + } if (mayHaveValueAttributes) { XPathParts currentParts = XPathParts.getFrozenInstance(current.getFullXPath(currentPath)); @@ -132,5 +178,12 @@ private static void getChanges(Counter changes, Path x, Path previou } } } + return null; + } + + private static Set getVBarSet(String currentValue) { + return currentValue == null + ? Set.of() + : Set.copyOf(Splitters.VBAR.splitToList(currentValue)); } } From 740ffca5d5d445b9fb6ffb85e81248bc518b3e18 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 10 Nov 2025 13:39:42 -0800 Subject: [PATCH 3/4] CLDR-19110 Fixes for tests --- common/dtd/ldmlSupplemental.dtd | 2 +- .../resources/org/unicode/cldr/util/data/PathHeader.txt | 2 ++ .../test/java/org/unicode/cldr/unittest/TestDtdData.java | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd index 477062127e5..9cb03a574b2 100644 --- a/common/dtd/ldmlSupplemental.dtd +++ b/common/dtd/ldmlSupplemental.dtd @@ -8,7 +8,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic - + diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt index b30482644e6..f973dec0683 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt @@ -542,7 +542,9 @@ //supplementalData/measurementData/paperSize[@type="%A"]/_territories ; Supplemental ; Measurement ; Paper ; $1 ; HIDE //supplementalData/dayPeriodRuleSet[@type="%A"]/dayPeriodRules[@locales="%A"]/dayPeriodRule[@type="%A"]/_%E ; Supplemental ; DayPeriod ; $1-$2 ; $3-$4 ; HIDE +//supplementalData/dayPeriodRuleSet[@type="%A"]/dayPeriodRules[@locales="%A"]/dayPeriodRule[@type="%A"][@from="%A"]/_%E ; Supplemental ; DayPeriod ; $1-$2 ; $3-$4-$5 ; HIDE //supplementalData/dayPeriodRuleSet/dayPeriodRules[@locales="%A"]/dayPeriodRule[@type="%A"]/_%E ; Supplemental ; DayPeriod ; $1 ; $2-$3 ; HIDE +//supplementalData/dayPeriodRuleSet/dayPeriodRules[@locales="%A"]/dayPeriodRule[@type="%A"][@from="%A"]/_%E ; Supplemental ; DayPeriod ; $1 ; $2-$3-$4 ; HIDE //supplementalData/grammaticalData/grammaticalFeatures[@targets="%A"][@locales="%A"]/grammaticalCase[@scope="%A"]/_values ; Supplemental ; Grammar ; Case ; $1-$2-$3 ; HIDE //supplementalData/grammaticalData/grammaticalFeatures[@targets="%A"][@locales="%A"]/grammaticalDefiniteness[@scope="%A"]/_values ; Supplemental ; Grammar ; Definiteness ; $1-$2-$3 ; HIDE diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java index 962d7812b6b..301f3951f57 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java @@ -991,8 +991,14 @@ public void testEmptyPcdata() { // REQUIRED "//keyboardTest3…/repertoire[@ type < name", // Status: value ≠ distinguished // Mode: OPTIONAL ≠ REQUIRED - "//keyboardTest3…/info[@ author < name" // Status: metadata ≠ distinguished + "//keyboardTest3…/info[@ author < name", // Status: metadata ≠ distinguished // Mode: OPTIONAL ≠ REQUIRED + "//supplementalData…/dayPeriodRule[@ before < from", + // Status: value ≠ distinguished Mode: OPTIONAL + "//supplementalData…/minDays[@ territories < draft", + // Status: value ≠ distinguished Mode: REQUIRED ≠ OPTIONAL + "//supplementalData…/firstDay[@ territories < draft" + // Status: value ≠ distinguished Mode: REQUIRED ≠ OPTIONAL ); public void testAttributeOrder() { From 2ba2b3f73e71b72ebd7eed357a5f3b33e821a3f5 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Thu, 13 Nov 2025 17:30:42 -0800 Subject: [PATCH 4/4] CLDR-19110 Update for charts --- .../cldr/tool/GenerateFullCldrGrowth.java | 67 ++++++++++++++----- .../unicode/cldr/unittest/TestDtdData.java | 4 +- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java index 4c7144a3ca7..4ed17b6ffde 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateFullCldrGrowth.java @@ -9,6 +9,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.stream.Collectors; import java.util.stream.Stream; import org.unicode.cldr.util.CLDRFile; @@ -46,7 +47,8 @@ static ChangeType getDiff(String current, String last) { public static void main(String[] args) throws IOException { System.out.println(CldrVersion.LAST_RELEASE_EACH_YEAR); - System.out.println("Version\tYear\t" + Joiners.TAB.join(ChangeType.values())); + System.out.println(Changes.header()); + CldrVersion nextVersion = null; for (CldrVersion previousVersion : CldrVersion.LAST_RELEASE_EACH_YEAR) { if (nextVersion != null) { @@ -56,6 +58,33 @@ public static void main(String[] args) throws IOException { } } + private static class Changes { + Counter changeTypes = new Counter<>(); + Set locales = new TreeSet<>(); + Counter dtdTypes = new Counter<>(); + + static String header() { + return Joiners.TAB.join( + "Version", + "Year", + Joiners.TAB.join(ChangeType.values()), + "Locales", + Joiners.TAB.join(DtdType.values())); + } + + @Override + public String toString() { + return Joiners.TAB.join( + List.of(ChangeType.values()).stream() + .map(x -> String.valueOf(changeTypes.get(x))) + .collect(Collectors.joining("\t")), + locales.size(), + List.of(DtdType.values()).stream() + .map(x -> String.valueOf(dtdTypes.get(x))) + .collect(Collectors.joining("\t"))); + } + } + static final Set SKIP_COMMON_SUBDIRS = Set.of( "collation", @@ -75,7 +104,8 @@ private static void compare(CldrVersion nextVersion, CldrVersion previousVersion int commonIndex = release.getNameCount(); Map failures = new TreeMap<>(); - Counter changes = new Counter<>(); + Changes changes = new Changes(); + try (Stream stream = Files.walk(release).collect(Collectors.toList()).parallelStream()) { stream.filter( @@ -95,14 +125,10 @@ private static void compare(CldrVersion nextVersion, CldrVersion previousVersion } }); } - System.out.println( - nextVersion - + "\t" - + nextVersion.getYear() - + "\t" - + List.of(ChangeType.values()).stream() - .map(x -> String.valueOf(changes.get(x))) - .collect(Collectors.joining("\t"))); + System.out.println(Joiners.TAB.join(nextVersion, nextVersion.getYear(), changes)); + if (nextVersion == CldrVersion.LAST_RELEASE_EACH_YEAR.get(0)) { + System.out.println(changes.locales); + } if (!failures.isEmpty()) { System.out.println(failures); } @@ -113,7 +139,7 @@ private static Path replaceBase(Path x, Path xPrefix, Path otherPrefix) { return otherPrefix.resolve(relativePath); } - private static String getChanges(Counter changes, Path x, Path previousRelease) { + private static String getChanges(Changes changes, Path x, Path previousRelease) { CLDRFile current = null; CLDRFile last = null; try { @@ -130,26 +156,31 @@ private static String getChanges(Counter changes, Path x, Path previ } catch (Exception e) { return e.getMessage(); } - boolean mayHaveValueAttributes = current.getDtdType() != DtdType.ldml; + boolean isLdml = current.getDtdType() == DtdType.ldml; + if (isLdml) { + changes.locales.add(x.getFileName().toString()); + } + boolean mayHaveValueAttributes = !isLdml; DtdData dtdData = current.getDtdData(); // could optimize by finding elements with value attributes and caching for (String currentPath : current) { + changes.dtdTypes.add(dtdData.dtdType, 1); String currentValue = current.getStringValue(currentPath); String lastValue = last == null ? null : last.getStringValue(currentPath); if (currentPath.contains("/annotations/")) { Set currentSet = getVBarSet(currentValue); Set lastSet = getVBarSet(lastValue); int sameCount = Sets.intersection(currentSet, lastSet).size(); - changes.add(ChangeType.same, sameCount); + changes.changeTypes.add(ChangeType.same, sameCount); int addCount = currentSet.size() - sameCount; int deleteCount = lastSet.size() - sameCount; int changeCount = Math.min(addCount, deleteCount); - changes.add(ChangeType.changed, addCount); - changes.add(ChangeType.added, addCount - changeCount); - changes.add(ChangeType.deleted, deleteCount - changeCount); + changes.changeTypes.add(ChangeType.changed, addCount); + changes.changeTypes.add(ChangeType.added, addCount - changeCount); + changes.changeTypes.add(ChangeType.deleted, deleteCount - changeCount); } else { ChangeType changeType = ChangeType.getDiff(currentValue, lastValue); - changes.add(changeType, 1); + changes.changeTypes.add(changeType, 1); } if (mayHaveValueAttributes) { XPathParts currentParts = @@ -170,7 +201,7 @@ private static String getChanges(Counter changes, Path x, Path previ && attributeInfo.attributeStatus == AttributeStatus.value) { String currentAttributeValue = currentAttributes.get(attribute); String lastAttributeValue = lastAttributes.get(attribute); - changes.add( + changes.changeTypes.add( ChangeType.getDiff(currentAttributeValue, lastAttributeValue), 1); } diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java index 301f3951f57..fbdd544f024 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java @@ -993,9 +993,9 @@ public void testEmptyPcdata() { // Mode: OPTIONAL ≠ REQUIRED "//keyboardTest3…/info[@ author < name", // Status: metadata ≠ distinguished // Mode: OPTIONAL ≠ REQUIRED - "//supplementalData…/dayPeriodRule[@ before < from", + "//supplementalData…/dayPeriodRule[@ before < from", // Status: value ≠ distinguished Mode: OPTIONAL - "//supplementalData…/minDays[@ territories < draft", + "//supplementalData…/minDays[@ territories < draft", // Status: value ≠ distinguished Mode: REQUIRED ≠ OPTIONAL "//supplementalData…/firstDay[@ territories < draft" // Status: value ≠ distinguished Mode: REQUIRED ≠ OPTIONAL