Skip to content

Commit

Permalink
Merge pull request #604 from wikimedia/no-underscores
Browse files Browse the repository at this point in the history
Normalize lang codes when reading SVGs and writing translations
  • Loading branch information
theresnotime authored Aug 2, 2022
2 parents bdc3868 + 05d2724 commit f47b331
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 17 deletions.
39 changes: 22 additions & 17 deletions src/Model/Svg/SvgFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,11 @@ protected function makeTranslationReady(): bool
return false;
}

// Normalize systemLanguage attributes to IETF standard.
if ($text->hasAttribute('systemLanguage')) {
$text->setAttribute('systemLanguage', $this->normalizeLang($text->getAttribute('systemLanguage')));
}

// Sort out switches
if ('switch' !== $text->parentNode->nodeName
&& 'svg:switch' !== $text->parentNode->nodeName
Expand Down Expand Up @@ -427,8 +432,7 @@ protected function analyse(): void
/** @var DOMElement $text */
$text = $actualNode->cloneNode(true);
$numChildren = $text->childNodes->length;
$lang = $text->hasAttribute('systemLanguage') ? $text->getAttribute('systemLanguage') : 'fallback';
$langCode = str_replace('_', '-', strtolower($lang));
$langCode = $text->hasAttribute('systemLanguage') ? $text->getAttribute('systemLanguage') : 'fallback';

$counter = 1;
for ($k = 0; $k < $numChildren; $k++) {
Expand Down Expand Up @@ -489,12 +493,13 @@ public function getInFileTranslations(): array

/**
* Set translations for a single language.
* @param string $lang Language code of the translations being provided.
* @param string $langCode Language code of the translations being provided.
* @param string[] $translations Array of tspan-IDs to message texts.
* @return string[][]
*/
public function setTranslations(string $lang, array $translations): array
public function setTranslations(string $langCode, array $translations): array
{
$lang = $this->normalizeLang($langCode);
// Load the existing translation structure, and go through it swapping the messages.
$inFileTranslations = $this->getInFileTranslations();
$filteredTextNodes = $this->getFilteredTextNodes();
Expand Down Expand Up @@ -577,7 +582,7 @@ public function switchToTranslationSet(array $translations): array
foreach ($translations[$textId] as $language => $translation) {
// Sort out systemLanguage attribute
if ('fallback' !== $language) {
$translation['systemLanguage'] = self::langCodeToOs($language);
$translation['systemLanguage'] = $language;
}

// Prepare an array of "children" (sub-messages)
Expand Down Expand Up @@ -629,6 +634,10 @@ public function switchToTranslationSet(array $translations): array
}
$this->reorderTexts();

// These will be re-populated when next requested.
$this->savedLanguages = null;
$this->inFileTranslations = null;

return [
'started' => array_unique($started),
'expanded' => array_unique($expanded),
Expand Down Expand Up @@ -798,10 +807,7 @@ public static function replaceIndicesRecursive(
protected function reorderTexts(): void
{
// Move sublocales to the beginning of their switch elements
$sublocales = $this->xpath->query(
"//text[contains(@systemLanguage,'-') or contains(@systemLanguage,'_')]"
."|//svg:text[contains(@systemLanguage,'-') or contains(@systemLanguage,'_')]"
);
$sublocales = $this->xpath->query("//text[contains(@systemLanguage,'-')]|//svg:text[contains(@systemLanguage,'-')]");
$count = $sublocales->length;
for ($i = 0; $i < $count; $i++) {
$sublocale = $sublocales->item($i);
Expand All @@ -823,16 +829,15 @@ protected function reorderTexts(): void
}

/**
* @param string $langCode
* Normalize a language code by replacing underscores with hyphens and making it all lowercase. Although the IETF
* recommends that language codes have e.g. uppercase region tags, they're actually case-insensitive and it's
* simpler to work with them if they're normalized to lowercase.
*
* @param string $lang
* @return string
*/
private static function langCodeToOs(string $langCode): string
private function normalizeLang(string $lang): string
{
if (false === strpos($langCode, '-')) {
// No territory specified, so no change to make (fr => fr)
return $langCode;
}
[ $prefix, $suffix ] = explode('-', $langCode, 2);
return $prefix.'_'.strtoupper($suffix);
return strtolower(str_replace('_', '-', $lang));
}
}
27 changes: 27 additions & 0 deletions tests/Model/Svg/SvgFileTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,33 @@ public function testSetTranslations(): void
static::assertStringNotContainsString('FooBarX', $svgContent);
}

public function testRemovesUnderscoresFromLangTags() {
// Test that underscores are replaced with hyphens when loading an SVG.
$svgFile = $this->getSvgFileFromString('<svg><switch><text systemLanguage="zh_HANT">foo</text><text>bar</text></switch></svg>');
$this->assertSame(
'<?xml version="1.0" encoding="UTF-8"?>' . "\n"
. '<svg xmlns="http://www.w3.org/2000/svg"><switch>'
. '<text systemLanguage="zh-hant" id="trsvg3"><tspan id="trsvg1">foo</tspan></text>'
. '<text id="trsvg4"><tspan id="trsvg2">bar</tspan></text>'
. '</switch></svg>' . "\n",
$svgFile->saveToString()
);
$this->assertSame( ['zh-hant', 'fallback'], $svgFile->getSavedLanguages() );

// And also when writing new languages to the file.
$svgFile->setTranslations('en_gb', ['trsvg2' => 'baz']);
$this->assertSame(
'<?xml version="1.0" encoding="UTF-8"?>' . "\n"
. '<svg xmlns="http://www.w3.org/2000/svg"><switch>'
. '<text id="trsvg4-en-gb" systemLanguage="en-gb"><tspan id="trsvg2-en-gb">baz</tspan></text>'
. '<text systemLanguage="zh-hant" id="trsvg3"><tspan id="trsvg1">foo</tspan></text>'
. '<text id="trsvg4"><tspan id="trsvg2">bar</tspan></text>'
. '</switch></svg>' . "\n",
$svgFile->saveToString()
);
$this->assertSame(['en-gb', 'zh-hant', 'fallback'], $svgFile->getSavedLanguages());
}

public function testT214717(): void
{
$fileName = tempnam(sys_get_temp_dir(), 'SvgFile');
Expand Down

0 comments on commit f47b331

Please sign in to comment.