From 05d2724e67653d2987d130bbc16562682de5bb38 Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Fri, 8 Jul 2022 15:04:10 +0800 Subject: [PATCH] Normalize lang codes when reading SVGs and writing translations Make sure that language codes are made lowercase and underscores are replaced with hyphens when SVGs are read and when new translations are written to them. Bug: https://phabricator.wikimedia.org/T271000 --- src/Model/Svg/SvgFile.php | 39 +++++++++++++++++++-------------- tests/Model/Svg/SvgFileTest.php | 27 +++++++++++++++++++++++ 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/Model/Svg/SvgFile.php b/src/Model/Svg/SvgFile.php index e0e06efe..cbf30a5f 100644 --- a/src/Model/Svg/SvgFile.php +++ b/src/Model/Svg/SvgFile.php @@ -290,6 +290,11 @@ protected function makeTranslationReady(): bool return false; } + // Normalize systemLanguage attributes to IETF standard. + if ($text->hasAttribute('systemLanguage')) { + $text->setAttribute('systemLanguage', $this->normalizeLang($text->getAttribute('systemLanguage'))); + } + // Sort out switches if ('switch' !== $text->parentNode->nodeName && 'svg:switch' !== $text->parentNode->nodeName @@ -427,8 +432,7 @@ protected function analyse(): void /** @var DOMElement $text */ $text = $actualNode->cloneNode(true); $numChildren = $text->childNodes->length; - $lang = $text->hasAttribute('systemLanguage') ? $text->getAttribute('systemLanguage') : 'fallback'; - $langCode = str_replace('_', '-', strtolower($lang)); + $langCode = $text->hasAttribute('systemLanguage') ? $text->getAttribute('systemLanguage') : 'fallback'; $counter = 1; for ($k = 0; $k < $numChildren; $k++) { @@ -489,12 +493,13 @@ public function getInFileTranslations(): array /** * Set translations for a single language. - * @param string $lang Language code of the translations being provided. + * @param string $langCode Language code of the translations being provided. * @param string[] $translations Array of tspan-IDs to message texts. * @return string[][] */ - public function setTranslations(string $lang, array $translations): array + public function setTranslations(string $langCode, array $translations): array { + $lang = $this->normalizeLang($langCode); // Load the existing translation structure, and go through it swapping the messages. $inFileTranslations = $this->getInFileTranslations(); $filteredTextNodes = $this->getFilteredTextNodes(); @@ -577,7 +582,7 @@ public function switchToTranslationSet(array $translations): array foreach ($translations[$textId] as $language => $translation) { // Sort out systemLanguage attribute if ('fallback' !== $language) { - $translation['systemLanguage'] = self::langCodeToOs($language); + $translation['systemLanguage'] = $language; } // Prepare an array of "children" (sub-messages) @@ -629,6 +634,10 @@ public function switchToTranslationSet(array $translations): array } $this->reorderTexts(); + // These will be re-populated when next requested. + $this->savedLanguages = null; + $this->inFileTranslations = null; + return [ 'started' => array_unique($started), 'expanded' => array_unique($expanded), @@ -798,10 +807,7 @@ public static function replaceIndicesRecursive( protected function reorderTexts(): void { // Move sublocales to the beginning of their switch elements - $sublocales = $this->xpath->query( - "//text[contains(@systemLanguage,'-') or contains(@systemLanguage,'_')]" - ."|//svg:text[contains(@systemLanguage,'-') or contains(@systemLanguage,'_')]" - ); + $sublocales = $this->xpath->query("//text[contains(@systemLanguage,'-')]|//svg:text[contains(@systemLanguage,'-')]"); $count = $sublocales->length; for ($i = 0; $i < $count; $i++) { $sublocale = $sublocales->item($i); @@ -823,16 +829,15 @@ protected function reorderTexts(): void } /** - * @param string $langCode + * Normalize a language code by replacing underscores with hyphens and making it all lowercase. Although the IETF + * recommends that language codes have e.g. uppercase region tags, they're actually case-insensitive and it's + * simpler to work with them if they're normalized to lowercase. + * + * @param string $lang * @return string */ - private static function langCodeToOs(string $langCode): string + private function normalizeLang(string $lang): string { - if (false === strpos($langCode, '-')) { - // No territory specified, so no change to make (fr => fr) - return $langCode; - } - [ $prefix, $suffix ] = explode('-', $langCode, 2); - return $prefix.'_'.strtoupper($suffix); + return strtolower(str_replace('_', '-', $lang)); } } diff --git a/tests/Model/Svg/SvgFileTest.php b/tests/Model/Svg/SvgFileTest.php index eb889860..a7e8cf68 100644 --- a/tests/Model/Svg/SvgFileTest.php +++ b/tests/Model/Svg/SvgFileTest.php @@ -491,6 +491,33 @@ public function testSetTranslations(): void static::assertStringNotContainsString('FooBarX', $svgContent); } + public function testRemovesUnderscoresFromLangTags() { + // Test that underscores are replaced with hyphens when loading an SVG. + $svgFile = $this->getSvgFileFromString('foobar'); + $this->assertSame( + '' . "\n" + . '' + . 'foo' + . 'bar' + . '' . "\n", + $svgFile->saveToString() + ); + $this->assertSame( ['zh-hant', 'fallback'], $svgFile->getSavedLanguages() ); + + // And also when writing new languages to the file. + $svgFile->setTranslations('en_gb', ['trsvg2' => 'baz']); + $this->assertSame( + '' . "\n" + . '' + . 'baz' + . 'foo' + . 'bar' + . '' . "\n", + $svgFile->saveToString() + ); + $this->assertSame(['en-gb', 'zh-hant', 'fallback'], $svgFile->getSavedLanguages()); + } + public function testT214717(): void { $fileName = tempnam(sys_get_temp_dir(), 'SvgFile');