diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DoctypeXmlStreamWrapper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DoctypeXmlStreamWrapper.java index c71599f2fb3..a4f5c5ee948 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DoctypeXmlStreamWrapper.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DoctypeXmlStreamWrapper.java @@ -8,6 +8,8 @@ import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.unicode.cldr.icu.LDMLConstants; import org.xml.sax.InputSource; @@ -17,6 +19,7 @@ public class DoctypeXmlStreamWrapper { private static final byte DOCTYPE_BYTES[] = DOCTYPE.getBytes(StandardCharsets.UTF_8); // the string to look for: xmlns=" private static final String XMLNS_EQUALS = LDMLConstants.XMLNS + "=\""; + private static final String XMLNS_SCHEMA_BASE = XMLNS_EQUALS + CLDRURLS.CLDR_SCHEMA_BASE + "/"; /** * Size of the input buffer, needs to be able to handle any expansion when the header is updated @@ -84,12 +87,27 @@ private static String fixup(byte[] inbuf, String encoding) { } } + private static final Pattern numberAndType = PatternCache.get("^([0-9][0-9])\\/([a-zA-Z0-9]+)"); + /** Fix an input String, including DOCTYPE */ private static String fixup(final String s) { - // exit if nothing matches - for (final DtdType d : DtdType.values()) { - if (s.contains(XMLNS_EQUALS + d.getNsUrl())) { - return fixup(s, d); + // Does it contain any CLDR-looking schemas? + final int xmlnsIndex = s.indexOf(XMLNS_SCHEMA_BASE); + if (xmlnsIndex != -1) { + final String remainder = s.substring(xmlnsIndex + XMLNS_SCHEMA_BASE.length()); + final Matcher m = numberAndType.matcher(remainder); + if (m.lookingAt()) { + // final String ver = m.group(1); // Not currently used. + final String type = m.group(2); + // is it a valid DtdType? + final DtdType d = DtdType.valueOf(type); + if (d != null) { + // fix it up unconditionally. + // Could check version # here. + return fixup(s, d); + } else { + System.err.println("Bad DTDtype: " + type + " in : " + s.substring(0, 100)); + } } } // couldn't fix it, just pass through diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java index 5b4320a47a6..947399ceb72 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java @@ -157,11 +157,6 @@ public String getXsdPath() { return dtdPath.replaceAll("\\.dtd$", ".xsd"); } - /** The xmlns name for this dtd type */ - public String getNsUrl() { - return CLDRURLS.CLDR_CURVER_BASE + "/" + name(); - } - /** The current version DTD as a URI */ String getDtdUri() { return new File(CLDRPaths.BASE_DIRECTORY, dtdPath).toURI().toString(); diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java b/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java index 5199e6147c9..2829fb3014c 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java @@ -15,6 +15,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; import org.unicode.cldr.util.CLDRConfig; +import org.unicode.cldr.util.DoctypeXmlStreamWrapper; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -29,7 +30,7 @@ public class TestKeyboardFlatten { }) void TestBrokenImports(final String path) throws IOException { try (final InputStream input = TestKeyboardFlatten.class.getResourceAsStream(path); ) { - final InputSource source = new InputSource(input); + final InputSource source = DoctypeXmlStreamWrapper.wrap(new InputSource(input)); // Expect failure. assertThrows( IllegalArgumentException.class, diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java index ada7c2708f2..22f7d677fee 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java @@ -385,8 +385,7 @@ public TimingInfo check(File systemID) { xmlReader.setErrorHandler(new MyErrorHandler()); InputSource is = new InputSource(fis); is.setSystemId(systemID.toString()); - DoctypeXmlStreamWrapper.wrap(is); - xmlReader.parse(is); + xmlReader.parse(DoctypeXmlStreamWrapper.wrap(is)); // fis.close(); } catch (SAXException | IOException e) { errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + e.getMessage());