From 5f3489c146299192d940146e4c539cea5ecba9a3 Mon Sep 17 00:00:00 2001 From: brave-builds Date: Mon, 30 Dec 2024 20:46:31 +0000 Subject: [PATCH] Uplift of #27005 (squashed) to beta --- build/commands/lib/pullL10n.js | 9 +- build/commands/lib/pushL10n.js | 2 + build/commands/scripts/commands.js | 10 +- script/lib/l10n/crowdin/__init__.py | 0 .../lib/l10n/crowdin/api_v2_client_wrapper.py | 278 ++++++++++++++ script/lib/l10n/crowdin/common.py | 135 +++++++ script/lib/l10n/crowdin/pull.py | 341 ++++++++++++++++++ script/lib/l10n/crowdin/push.py | 321 +++++++++++++++++ script/lib/l10n/grd_utils.py | 5 +- script/pull-l10n.py | 59 ++- script/push-l10n.py | 144 ++++++-- 11 files changed, 1259 insertions(+), 45 deletions(-) create mode 100644 script/lib/l10n/crowdin/__init__.py create mode 100755 script/lib/l10n/crowdin/api_v2_client_wrapper.py create mode 100755 script/lib/l10n/crowdin/common.py create mode 100755 script/lib/l10n/crowdin/pull.py create mode 100755 script/lib/l10n/crowdin/push.py mode change 100644 => 100755 script/pull-l10n.py diff --git a/build/commands/lib/pullL10n.js b/build/commands/lib/pullL10n.js index 555412f7a503..7e4accad49cf 100644 --- a/build/commands/lib/pullL10n.js +++ b/build/commands/lib/pullL10n.js @@ -21,10 +21,13 @@ const pullL10n = (options) => { l10nUtil.getBraveTopLevelPaths().forEach((sourceStringPath) => { if (!options.grd_path || sourceStringPath.endsWith(path.sep + options.grd_path)) { - let cmd_args = ['script/pull-l10n.py', '--source_string_path', sourceStringPath] + let args = ['script/pull-l10n.py', + '--service', options.service, + '--channel', options.channel, + '--source_string_path', sourceStringPath] if (options.debug) - cmd_args.push('--debug') - util.run('python3', cmd_args, cmdOptions) + args.push('--debug') + util.run('python3', args, cmdOptions) } }) } diff --git a/build/commands/lib/pushL10n.js b/build/commands/lib/pushL10n.js index df0a20f40fa3..eaa8dc2b2079 100644 --- a/build/commands/lib/pushL10n.js +++ b/build/commands/lib/pushL10n.js @@ -31,6 +31,8 @@ const pushL10n = (options) => { 'python3', [ 'script/push-l10n.py', + '--service', options.service, + '--channel', options.channel, '--source_string_path', sourceStringPath, extraScriptOptions diff --git a/build/commands/scripts/commands.js b/build/commands/scripts/commands.js index 0eac75ff1323..930417fddb14 100644 --- a/build/commands/scripts/commands.js +++ b/build/commands/scripts/commands.js @@ -188,15 +188,19 @@ program program .command('pull_l10n') + .option('--service ', 'Service to use: Transifex or Crowdin') + .option('--channel ', 'Release|Beta|Nightly for Crowdin, Release for Transifex') .option('--grd_path ', `Relative path to match end of full GRD path, e.g: 'generated_resources.grd'.`) - .option('--debug', `Dumps downloaded content for one language into TransifexCurrent.txt file in the temp directory.`) + .option('--debug', `Dumps downloaded content for one language into TransifexCurrent.txt or CrowdinCurrent.txt file in the temp directory.`) .action(pullL10n) program .command('push_l10n') + .option('--service ', 'Service to use: Transifex or Crowdin') + .option('--channel ', 'Release|Beta|Nightly for Crowdin, Release for Transifex') .option('--grd_path ', `Relative path to match end of full GRD path, e.g: 'generated_resources.grd'.`) - .option('--with_translations', 'Push local translations. WARNING: this will overwrite translations in Tansifex.') - .option('--with_missing_translations', 'Push local translations for strings that do not have translations in Transifex.') + .option('--with_translations', 'Push local translations. WARNING: this will overwrite translations in Transifex/Crowdin.') + .option('--with_missing_translations', 'Push local translations for strings that do not have translations in Transifex/Crowdin.') .action(pushL10n) program diff --git a/script/lib/l10n/crowdin/__init__.py b/script/lib/l10n/crowdin/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/script/lib/l10n/crowdin/api_v2_client_wrapper.py b/script/lib/l10n/crowdin/api_v2_client_wrapper.py new file mode 100755 index 000000000000..715f5314c903 --- /dev/null +++ b/script/lib/l10n/crowdin/api_v2_client_wrapper.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import requests + +from lib.config import get_env_var +# pylint: disable=import-error +from crowdin_api import CrowdinClient +from crowdin_api.api_resources.source_files.enums import FileType +# pylint: enable=import-error + +# This module is a wrapper around Crowdin API v2 + + +class CrowdinClientWrapper(): + """Wrapper class for the Crowdin API (v2) python SDK from + https://github.com/crowdin/crowdin-api-client-python""" + + def __init__(self, project_id): + self._organization = 'Brave-Software' + self._project_id = project_id + self._auth_token = get_env_var('CROWDIN_API_KEY') + assert self._project_id, \ + 'CrowdinClientWrapper: project_id is not set.' + assert self._auth_token, \ + 'BRAVE_CROWDIN_API_KEY environmental var is not set.' + # Set up CrowdinClient using an API token. You can generate one at + # https://brave-software.crowdin.com/u/user_settings/access-tokens + self._client = CrowdinClient(organization=self._organization, + project_id=self.project_id, + token=self._auth_token) + + @property + def project_id(self): + return self._project_id + + def __get_branch(self, branch_name): + all_branches = self._client.source_files.list_project_branches( + projectId=self._project_id)['data'] + for branch_data in all_branches: + branch = branch_data['data'] + if branch['name'] == branch_name: + return branch['id'] + return 0 + + def __create_branch(self, branch_name): + branch = self._client.source_files.add_branch( + name=branch_name, projectId=self._project_id) + return branch['data']['id'] + + def __create_storage(self, resource_path): + storage_data = self._client.storages.add_storage( + open(resource_path, 'rb')) + return storage_data['data']['id'] + + def __get_resource_file(self, branch_id, resource_name): + all_files = self._client.source_files.list_files( + projectId=self._project_id, branchId=branch_id)['data'] + for file_data in all_files: + file = file_data['data'] + if file['name'] == resource_name: + return file['id'] + return 0 + + def __add_resource_file(self, branch_id, storage_id, resource_name, + file_type): + file_types_map = { + 'ANDROID': FileType.ANDROID, + 'CHROME': FileType.CHROME + } + assert file_type in file_types_map, ('Unexpected file type: ' + + f'{file_type}.') + + new_file = self._client.source_files.add_file( + storageId=storage_id, + name=resource_name, + projectId=self._project_id, + branchId=branch_id, + type=file_types_map[file_type]) + return new_file['data']['id'] + + def __update_resource_file(self, file_id, storage_id): + updated_file = self._client.source_files.update_file( + file_id, storageId=storage_id, projectId=self._project_id) + return updated_file['data']['id'] + + def __get_resource_download_url(self, file_id): + download = self._client.source_files.download_file( + fileId=file_id, projectId=self._project_id) + return download['data']['url'] + + def __get_resource_translation_download_url(self, file_id, lang_code): + download = self._client.translations.export_project_translation( + targetLanguageId=lang_code, + projectId=self._project_id, + fileIds=[file_id], + skipUntranslatedStrings=True) + return download['data']['url'] + + def __get_resource_file_strings(self, file_id): + return \ + self._client.source_strings.with_fetch_all().list_strings( + projectId=self._project_id, fileId=file_id)['data'] + + def __get_string_id_from_key(self, all_strings, string_key): + for string_data in all_strings: + string = string_data['data'] + if string['identifier'] == string_key: + return string['id'] + return 0 + + def __has_source_string_l10n(self, string_id, lang_code): + all_translations = \ + self._client.string_translations.list_string_translations( + projectId=self._project_id, stringId=string_id, + languageId=lang_code)['data'] + return len(all_translations) and \ + len(all_translations[0]['data']['text']) + + def __delete_source_string_l10n(self, string_id, lang_code): + self._client.string_translations.delete_string_translations( + projectId=self._project_id, + stringId=string_id, + languageId=lang_code) + + def __add_source_string_l10n(self, string_id, lang_code, translation): + self._client.string_translations.add_translation( + projectId=self._project_id, + stringId=string_id, + languageId=lang_code, + text=translation) + + def __upload_translation(self, file_id, storage_id, lang_code): + uploaded_file = self._client.translations.upload_translation( + projectId=self._project_id, + languageId=lang_code, + storageId=storage_id, + fileId=file_id, + importEqSuggestions=True, # Add l10n == source + autoApproveImported=True, + translateHidden=True) + return uploaded_file['data']['fileId'] + + # Wrapper API + + def is_supported_language(self, lang_code): + project = self._client.projects.get_project( + projectId=self._project_id)['data'] + return lang_code in project['targetLanguageIds'] + + def upload_resource_file(self, branch, upload_file_path, resource_name, + i18n_type): + """Upload resource file to Crowdin""" + # Create new storage for the file + storage_id = self.__create_storage(upload_file_path) + # Check if the branch already exists + branch_id = self.__get_branch(branch) + if branch_id: + print(f'Branch {branch} already exists') + # Check if this file already exists and if so update it + file_id = self.__get_resource_file(branch_id, resource_name) + if file_id: + print(f'Resource {resource_name} already exists. Updating...') + return self.__update_resource_file(file_id, storage_id) + else: + # Create new branch + print(f'Creating new branch {branch}') + branch_id = self.__create_branch(branch) + + print(f'Creating a new resource {resource_name}') + file_id = self.__add_resource_file(branch_id, storage_id, + resource_name, i18n_type) + return file_id + + def get_resource_source(self, branch, resource_name): + """Downloads resource source file (original language) from + Crowdin""" + branch_id = self.__get_branch(branch) + assert branch_id, ( + f'Unable to get resource {resource_name} for ' + + f'branch {branch} because the branch doesn\'t exist') + file_id = self.__get_resource_file(branch_id, resource_name) + assert file_id, ( + f'Unable to get resource {resource_name} for ' + + f'branch {branch} because the resource doesn\'t exist') + url = self.__get_resource_download_url(file_id) + r = requests.get(url, timeout=10) + assert r.status_code == 200, \ + f'Aborting. Status code {r.status_code}: {r.content}' + r.encoding = 'utf-8' + content = r.text.encode('utf-8') + return content + + def get_resource_l10n(self, branch, resource_name, lang_code, file_ext): + """Downloads resource l10n from Crowdin for the given language""" + assert file_ext in ('.grd', + '.json'), (f'Unexpected file extension {file_ext}') + if self.is_supported_language(lang_code): + branch_id = self.__get_branch(branch) + assert branch_id, ( + f'Unable to get {resource_name} l10n for ' + + f'branch {branch} because the branch doesn\'t exist') + file_id = self.__get_resource_file(branch_id, resource_name) + assert file_id, ( + f'Unable to get {resource_name} l10n for ' + + f'branch {branch} because the resource doesn\'t exist') + url = self.__get_resource_translation_download_url( + file_id, lang_code) + r = requests.get(url, timeout=10) + assert r.status_code == 200 or r.status_code == 204, \ + f'Aborting. Status code {r.status_code}: {r.content}' + if r.status_code == 200: + r.encoding = 'utf-8' + if file_ext == '.grd': + # Remove xml declaration header + second_line = r.text.find('\n') + 1 + text = r.text[second_line:] + else: + text = r.text + content = text.encode('utf-8') + return content + # Either unsupported language or status_code == 204 which means the + # file is empty. + if file_ext == '.json': + # For json files we need to have content even if untranslated, so + # get the source strings instead. + return self.get_resource_source(branch, resource_name) + # For GRDs we can just return an empty content: + return ''.encode('utf-8') + + def upload_strings_l10n(self, branch, resource_name, translations, + missing_only): + """Upload translations""" + branch_id = self.__get_branch(branch) + assert branch_id, ( + f'Unable to get resource {resource_name} for ' + + f'branch {branch} because the branch doesn\'t exist') + file_id = self.__get_resource_file(branch_id, resource_name) + assert file_id, ( + f'Unable to get resource {resource_name} for ' + + f'branch {branch} because the resource doesn\'t exist') + all_strings = self.__get_resource_file_strings(file_id) + # Translation is a dictionary whose keys are the string keys and values + # are lists of tuples of language codes and translation strings. + total = len(translations.keys()) + for idx, string_key in enumerate(translations.keys()): + string_id = self.__get_string_id_from_key(all_strings, string_key) + assert string_id, (f'Unable to find string by key {string_key} ' + + f'in resource {resource_name}.') + print(f'[{idx + 1}/{total}] Uploading translations for key ' + + f'{string_key}') + + for lang_code, translation in translations[string_key]: + has_l10n = self.__has_source_string_l10n(string_id, lang_code) + if has_l10n: + if missing_only: + print(f' Skipping {lang_code}: already translated.') + continue + self.__delete_source_string_l10n(string_id, lang_code) + print(f' Uploading {lang_code}') + self.__add_source_string_l10n(string_id, lang_code, + translation) + + def upload_grd_l10n_file(self, branch, upload_file_path, resource_name, + lang): + """Upload grd l10n file to Crowdin""" + # Create new storage for the file + storage_id = self.__create_storage(upload_file_path) + # Check if the branch already exists + branch_id = self.__get_branch(branch) + assert branch_id, f'Branch {branch} doesn\'t exist.' + file_id = self.__get_resource_file(branch_id, resource_name) + assert file_id, f'Resource {resource_name} doesn\'t exists.' + return self.__upload_translation(file_id, storage_id, lang) diff --git a/script/lib/l10n/crowdin/common.py b/script/lib/l10n/crowdin/common.py new file mode 100755 index 000000000000..8e375aa08618 --- /dev/null +++ b/script/lib/l10n/crowdin/common.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import json +import re +import os +import lxml.etree # pylint: disable=import-error + +from lib.l10n.grd_utils import textify + +# pylint: disable=import-error +from lib.l10n.crowdin.api_v2_client_wrapper import CrowdinClientWrapper +# pylint: enable=import-error + +brave_project_id = 6 # Brave Core (Android+Chrome) + +# This module contains functionality common to both pulling down translations +# from Crowdin and pushing source strings up to Crowdin. + +# Filenames that are fully handled by Crowdin (as opposed to files for which +# we create overrides that are then handled by Crowdin). +crowdin_handled_files = [ + 'android_brave_strings.xml', + 'brave_generated_resources.xml', + 'brave_components_strings.xml', + 'brave_extension.json', +] + + +def should_use_crowdin_for_file(source_string_path, filename): + """ Determines if the given file should be handled by Crowdin locally""" + name = crowdin_name_from_filename(source_string_path, filename) + return name in crowdin_handled_files + + +# pylint: disable=inconsistent-return-statements +def crowdin_name_from_filename(source_file_path, filename): + ext = os.path.splitext(source_file_path)[1] + # GRD files are uploaded in "Android XML" format. + if ext == '.grd': + return filename + '.xml' + # JSON files are uploaded as "Chrome JSON" format. + if 'brave_extension' in source_file_path: + return 'brave_extension.json' + assert False, ('JSON files should be mapped explicitly, this ' + f'one is not: {source_file_path}') + + +# pylint: enable=inconsistent-return-statements + + +def xtb_lang_to_crowdin_lang(lang): + """Reformats language code from XTB format to Crowdin format""" + # The lang code "iw" is the old code for Hebrew, Crowdin and GRDs use + # "he", but Chromium still uses "iw" inside the XTBs. + if lang == 'iw': + return 'he' + if lang == 'pt-PT': + return 'pt' + return lang + + +def json_lang_to_crowdin_lang(lang): + """Reformats language code from json format to Crowdin format""" + lang = lang.replace('_', '-') + if lang == 'pt-PT': + return 'pt' + return lang + + +def get_strings_dict_from_xml_content(xml_content): + """Obtains a dictionary mapping the string name to text from XML content""" + strings = lxml.etree.fromstring(xml_content).findall('string') + return { + string_tag.get('name'): textify_from_crowdin(string_tag) + for string_tag in strings + } + + +def fixup_string_from_crowdin(val): + """Returns the text of a node from Crowdin which also fixes up common + problems that localizers do""" + if val is None: + return val + val = (val.replace('&lt;', + '<').replace('&gt;', + '>').replace('&amp;', '&')) + return val + + +def textify_from_crowdin(tag): + """Returns the text content of a tag received from Crowdin while fixing + up common problems that localizers cause""" + return fixup_string_from_crowdin(textify(tag)) + + +def get_acceptable_json_lang_codes(langs_dir_path): + lang_codes = set(os.listdir(langs_dir_path)) + # Source language for Brave locales + lang_codes.discard('en_US') + + # Files that are not locales + lang_codes.discard('.DS_Store') + lang_codes.discard('index.json') + + return sorted(lang_codes) + + +def get_json_strings(json_file_path): + with open(json_file_path, mode='r', encoding='utf-8') as f: + data = json.load(f) + strings = [] + for key in data: + string_name = key + string_value = data[key]["message"] + string_desc = data[key]["description"] if "description" \ + in data[key] else "" + string_tuple = (string_name, string_value, string_desc) + strings.append(string_tuple) + return strings + + +# Client instance +def get_crowdin_client_wrapper(): + if get_crowdin_client_wrapper.wrapper is None: + get_crowdin_client_wrapper.wrapper = CrowdinClientWrapper( + project_id=brave_project_id) + return get_crowdin_client_wrapper.wrapper + + +get_crowdin_client_wrapper.wrapper = None diff --git a/script/lib/l10n/crowdin/pull.py b/script/lib/l10n/crowdin/pull.py new file mode 100755 index 000000000000..2e99ca0813a4 --- /dev/null +++ b/script/lib/l10n/crowdin/pull.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import html +import json +import os +import re +import lxml.etree # pylint: disable=import-error + +from lib.l10n.grd_utils import (get_grd_strings, get_override_file_path, + get_xtb_files) +from lib.l10n.crowdin.common import ( + get_acceptable_json_lang_codes, get_crowdin_client_wrapper, + get_json_strings, get_strings_dict_from_xml_content, + json_lang_to_crowdin_lang, textify_from_crowdin, + crowdin_name_from_filename, xtb_lang_to_crowdin_lang) +from lib.l10n.validation import validate_tags_in_one_string + +# This module contains functionality specific to pulling down translations +# from Crowdin. + +# API functions +# ------------- + + +def pull_source_file_from_crowdin(channel, source_file_path, filename, + dump_path): + """Downloads translations from Crowdin""" + ext = os.path.splitext(source_file_path)[1] + if ext == '.grd': + xtb_files = get_xtb_files(source_file_path) + base_path = os.path.dirname(source_file_path) + grd_strings = get_grd_strings(source_file_path) + for (lang_code, xtb_rel_path) in xtb_files: + xtb_file_path = os.path.join(base_path, xtb_rel_path) + print(f'Updating: {xtb_file_path} {lang_code}') + xml_content = get_crowdin_translation_file_content( + channel, source_file_path, filename, lang_code, dump_path) + xml_content = fixup_bad_ph_tags_from_raw_crowdin_string( + xml_content) + errors = validate_tags_in_crowdin_strings(xml_content) + assert errors is None, errors + xml_content = trim_ph_tags_in_xtb_file_content(xml_content) + translations = get_strings_dict_from_xml_content(xml_content) + xtb_content = generate_xtb_content(lang_code, grd_strings, + translations) + with open(xtb_file_path, mode='wb') as f: + f.write(xtb_content) + elif ext == '.json': + langs_dir_path = os.path.dirname(os.path.dirname(source_file_path)) + lang_codes = get_acceptable_json_lang_codes(langs_dir_path) + for lang_code in lang_codes: + print(f'getting filename {filename} for lang_code {lang_code}') + content = get_crowdin_translation_file_content( + channel, source_file_path, filename, lang_code, dump_path) + json_content = generate_json_content(content, source_file_path) + localized_translation_path = (os.path.join(langs_dir_path, + lang_code, + 'messages.json')) + dir_path = os.path.dirname(localized_translation_path) + if not os.path.exists(dir_path): + os.mkdir(dir_path) + with open(localized_translation_path, mode='wb') as f: + f.write(json_content.encode('utf-8')) + + +def combine_override_xtb_into_original(source_string_path): + """Applies XTB override file to the original""" + source_base_path = os.path.dirname(source_string_path) + override_path = get_override_file_path(source_string_path) + override_base_path = os.path.dirname(override_path) + xtb_files = get_xtb_files(source_string_path) + override_xtb_files = get_xtb_files(override_path) + assert len(xtb_files) == len(override_xtb_files) + + for (idx, _) in enumerate(xtb_files): + (lang, xtb_path) = xtb_files[idx] + (override_lang, override_xtb_path) = override_xtb_files[idx] + assert lang == override_lang + + xtb_tree = lxml.etree.parse(os.path.join(source_base_path, xtb_path)) + override_xtb_tree = lxml.etree.parse( + os.path.join(override_base_path, override_xtb_path)) + translationbundle = xtb_tree.xpath('//translationbundle')[0] + override_translations = override_xtb_tree.xpath('//translation') + translations = xtb_tree.xpath('//translation') + + override_translation_fps = [ + t.attrib['id'] for t in override_translations + ] + translation_fps = [t.attrib['id'] for t in translations] + + # Remove translations that we have a matching FP for + for translation in xtb_tree.xpath('//translation'): + if translation.attrib['id'] in override_translation_fps: + translation.getparent().remove(translation) + elif translation_fps.count(translation.attrib['id']) > 1: + translation.getparent().remove(translation) + translation_fps.remove(translation.attrib['id']) + + # Append the override translations into the original translation bundle + for translation in override_translations: + translationbundle.append(translation) + + xtb_content = (b'\n' + + lxml.etree.tostring(xtb_tree, + pretty_print=True, + xml_declaration=False, + encoding='utf-8').strip()) + with open(os.path.join(source_base_path, xtb_path), mode='wb') as f: + f.write(xtb_content) + # Delete the override xtb for this lang + os.remove(os.path.join(override_base_path, override_xtb_path)) + + +# Helper functions +# ---------------- + + +def crowdin_lang_to_xtb_lang(lang): + """Reformats language code from Crowdin format to XTB format""" + # The lang code "iw" is the old code for Hebrew, Crowdin and GRDs use + # "he", but Chromium still uses "iw" inside the XTBs, and it causes a + # compiling error on Windows if "he" is used. + if lang == 'he': + return 'iw' + if lang == 'pt': + return 'pt-PT' + return lang + + +def get_crowdin_translation_file_content(channel, source_file_path, filename, + lang_code, dump_path): + """Obtains a translation Android xml format and returns the string""" + ext = os.path.splitext(source_file_path)[1] + assert ext in ('.grd', '.json'), f'Unexpected extension {ext}' + crowdin_lang_code = xtb_lang_to_crowdin_lang( + lang_code) if ext == '.grd' else json_lang_to_crowdin_lang(lang_code) + resource_name = crowdin_name_from_filename(source_file_path, filename) + content = get_crowdin_client_wrapper().get_resource_l10n( + channel, resource_name, crowdin_lang_code, ext) + content = fix_crowdin_translation_file_content(content, ext) + if dump_path: + with open(dump_path, mode='wb') as f: + f.write(content) + verify_crowdin_translation_file_content(content, ext) + return content.decode('utf-8') + + +def fix_crowdin_translation_file_content(content, file_ext): + """Fixes escaped quotes in Crowdin translation file content""" + if file_ext == '.json': + # For .json files, for some reason Crowdin puts a \' + return content.replace(b"\\'", b"'") + if file_ext == '.grd': + # For .grd files, for some reason Crowdin puts a \\" and \' + return content.replace(b'\\\\"', + b'"').replace(b'\\"', + b'"').replace(b"\\'", b"'") + return None + + +def verify_crowdin_translation_file_content(content, file_ext): + """Verifies that Crowdin translation file content is parse-able""" + if file_ext == '.json': + json.loads(content) + elif file_ext == '.grd': + lxml.etree.fromstring(content) + + +def fixup_bad_ph_tags_from_raw_crowdin_string(xml_content): + """Attempts to fix improperly formatted PH tags in Crowdin translation + file content""" + begin_index = 0 + while begin_index < len(xml_content) and begin_index != -1: + string_index = xml_content.find('', string_index) + if string_index == -1: + return xml_content + string_index += 1 + string_end_index = xml_content.find('', string_index) + if string_end_index == -1: + return xml_content + before_part = xml_content[:string_index] + ending_part = xml_content[string_end_index:] + val = process_bad_ph_tags_for_one_string( + xml_content[string_index:string_end_index]) + xml_content = before_part + val + ending_part + begin_index = xml_content.find('', begin_index) + if begin_index != -1: + begin_index += 9 + return xml_content + + +def process_bad_ph_tags_for_one_string(val): + """Fixes common issues with PH tag formatting""" + val = (val.replace('\r\n', '\n').replace('\r', '\n')) + if val.find('<ph') == -1: + return val + val = (val.replace('<', '<').replace( + 'ph name="', + 'ph name="').replace('ph name= "', 'ph name="').replace( + 'ph name= ', 'ph name=').replace('">', '">').replace( + '>', '>').replace('> ', '> ').replace(' <', ' <')) + return val + + +def trim_ph_tags_in_xtb_file_content(xml_content): + """Removes all children of tags including text inside ph tag""" + xml = lxml.etree.fromstring(xml_content) + phs = xml.findall('.//ph') + for ph in phs: + lxml.etree.strip_elements(ph, '*') + if ph.text is not None: + ph.text = '' + return lxml.etree.tostring(xml, encoding='utf-8') + + +def generate_xtb_content(lang_code, grd_strings, translations): + """Generates an XTB file from a set of translations and GRD strings""" + # Used to make sure duplicate fingerprint strings are not made + # XTB only contains 1 entry even if multiple string names are + # different but have the same value. + all_string_fps = set() + translationbundle_tag = create_xtb_format_translationbundle_tag(lang_code) + for string in grd_strings: + if string[0] in translations: + fingerprint = string[2] + if fingerprint in all_string_fps: + continue + all_string_fps.add(fingerprint) + translation = translations[string[0]] + if len(translation) != 0: + check_plural_string_formatting(string[1], translation) + translationbundle_tag.append( + create_xtb_format_translation_tag(fingerprint, + translation)) + + xml_string = lxml.etree.tostring(translationbundle_tag, encoding='utf-8') + xml_string = html.unescape(xml_string.decode('utf-8')) + xml_string = ('\n\n' + + xml_string) + return xml_string.encode('utf-8') + + +def create_xtb_format_translationbundle_tag(lang): + """Creates the root XTB XML element""" + translationbundle_tag = lxml.etree.Element('translationbundle') + lang = crowdin_lang_to_xtb_lang(lang) + translationbundle_tag.set('lang', lang) + # Adds a newline so the first translation isn't glued to the + # translationbundle element for us weak humans. + translationbundle_tag.text = '\n' + return translationbundle_tag + + +def check_plural_string_formatting(grd_string_content, translation_content): + """Checks 'plural' string formatting in translations""" + pattern = re.compile(r"\s*{(.*,\s*plural,)(\s*offset:[0-2])?" + r"(\s*(=0|zero)\s*{(.*)})?" + r"(\s*(=1|one)\s*{(.*)})?" + r"(\s*(=2|two)\s*{(.*)})?" + r"(\s*(few)\s*{(.*)})?" + r"(\s*(many)\s*{(.*)})?" + r"(\s*other\s*{(.*)})?" + r"\s*}\s*") + if pattern.match(grd_string_content) is not None: + if pattern.match(translation_content) is None: + error = ('Translation of plural string:\n' + '-----------\n' + f"{grd_string_content.encode('utf-8')}\n" + '-----------\n' + 'does not match:\n' + '-----------\n' + f"{translation_content.encode('utf-8')}\n" + '-----------\n') + raise ValueError(error) + else: + # This finds plural strings that the pattern above doesn't catch + leading_pattern = re.compile(r"\s*{.*,\s*plural,.*") + if leading_pattern.match(grd_string_content) is not None: + error = ('Uncaught plural pattern:\n' + '-----------\n' + f"{grd_string_content.encode('utf-8')}\n" + '-----------\n') + raise ValueError(error) + + +def create_xtb_format_translation_tag(fingerprint, string_value): + """Creates child XTB elements for each translation tag""" + string_tag = lxml.etree.Element('translation') + string_tag.set('id', str(fingerprint)) + if string_value.count('<') != string_value.count('>'): + assert False, \ + 'Warning: Unmatched < character, consider fixing on Crowdin, ' \ + f'force encoding the following string: {string_value}' + string_tag.text = string_value + string_tag.tail = '\n' + return string_tag + + +def validate_tags_in_crowdin_strings(xml_content): + """Validates that all child elements of all s are allowed""" + xml = lxml.etree.fromstring(xml_content) + string_tags = xml.findall('.//string') + # print(f'Validating HTML tags in {len(string_tags)} strings') + errors = None + for string_tag in string_tags: + error = validate_tags_in_one_string(string_tag, textify_from_crowdin) + if error is not None: + errors = (errors or '') + error + if errors is not None: + errors = ("\n") + errors + return errors + + +def generate_json_content(l10n_content, source_file_path): + """Creates localized json file from source file and translations downloaded + from Crowdin. Some of the translations may no longer be needed and + untranslated strings need to be pulled from the source.""" + l10n_data = json.loads(l10n_content) + source_strings = get_json_strings(source_file_path) + content = {} + for (string_name, string_value, string_desc) in source_strings: + if string_name not in l10n_data: + content[string_name] = \ + {"message": string_value, + "description": string_desc} + else: + # Fix escaped double quotes in values + content[string_name] = \ + {"message": l10n_data[string_name]["message"].replace( + '\\"', '\"'), + "description": l10n_data[string_name]["description"]} + return json.dumps(content, ensure_ascii=False, indent=2) + '\n' diff --git a/script/lib/l10n/crowdin/push.py b/script/lib/l10n/crowdin/push.py new file mode 100755 index 000000000000..03a662ed6551 --- /dev/null +++ b/script/lib/l10n/crowdin/push.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. */ + +from shutil import copyfile + +import os +import tempfile +import lxml.etree # pylint: disable=import-error + +from lib.l10n.grd_utils import (get_grd_strings, get_xtb_files, textify) +from lib.l10n.crowdin.common import ( + crowdin_name_from_filename, get_acceptable_json_lang_codes, + get_crowdin_client_wrapper, get_json_strings, + get_strings_dict_from_xml_content, json_lang_to_crowdin_lang, + xtb_lang_to_crowdin_lang) + +# This module contains functionality specific to pushing translations up +# to Crowdin. + +# API functions +# ------------- + + +def upload_source_file_to_crowdin(channel, source_file_path, filename): + uploaded = False + i18n_type = '' + ext = os.path.splitext(source_file_path)[1] + assert ext in ('.grd', '.json'), ('Unsupported source file ext ' + + f'{ext}: {source_file_path}') + # Storage API derives the storage name from the filename, so use the + # resource name for consistency. + resource_name = crowdin_name_from_filename(source_file_path, filename) + tempdir = tempfile.gettempdir() + upload_file_path = os.path.join(tempdir, resource_name) + if os.path.exists(upload_file_path): + os.remove(upload_file_path) + + if ext == '.grd': + # Generate the intermediate Android XML format for the source file. + generate_source_strings_xml_from_grd(upload_file_path, + source_file_path) + i18n_type = 'ANDROID' + elif ext == '.json': + copyfile(source_file_path, upload_file_path) + i18n_type = 'CHROME' + + uploaded = get_crowdin_client_wrapper().upload_resource_file( + channel, upload_file_path, resource_name, i18n_type) + assert uploaded, f'Could not upload file {upload_file_path}' + os.remove(upload_file_path) + + +def check_source_grd_strings_parity_with_crowdin(channel, grd_file_path): + """Compares the GRD strings to the strings on Crowdin and uploads any + missing strings.""" + source_grd_strings = get_grd_strings(grd_file_path) + if len(source_grd_strings) == 0: + return + strings_dict = get_crowdin_source_resource_strings(channel, grd_file_path) + crowdin_string_ids = set(strings_dict.keys()) + grd_string_names = {string_name for (string_name, _, _, _) \ + in source_grd_strings} + x_grd_extra_strings = grd_string_names - crowdin_string_ids + assert len(x_grd_extra_strings) == 0, \ + f'GRD has extra strings over Crowdin {list(x_grd_extra_strings)}' + x_crowdin_extra_strings = crowdin_string_ids - grd_string_names + assert len(x_crowdin_extra_strings) == 0, \ + 'Crowdin has extra strings over GRD ' \ + f'{list(x_crowdin_extra_strings)}' + + +def upload_json_translations_to_crowdin(channel, source_string_path, + missing_only): + resource_name = crowdin_name_from_filename(source_string_path, '') + missing = 'missing' if missing_only else '' + print(f'Uploading {missing} translations for {source_string_path} ' \ + f'(resource: {resource_name})', flush=True) + source_strings = get_json_strings(source_string_path) + langs_dir_path = os.path.dirname(os.path.dirname(source_string_path)) + lang_codes = get_acceptable_json_lang_codes(langs_dir_path) + translations = {} + for lang_code in lang_codes: + crowdin_lang = json_lang_to_crowdin_lang(lang_code) + if not get_crowdin_client_wrapper().is_supported_language( + crowdin_lang): + print(f'Skipping language {crowdin_lang} ({lang_code}).') + continue + print(f'Processing language {lang_code}') + l10n_path = os.path.join(langs_dir_path, lang_code, 'messages.json') + l10n_strings = get_json_strings(l10n_path) + l10n_dict = { + string_name: string_value + for (string_name, string_value, _) in l10n_strings + } + for (string_name, string_value, _) in source_strings: + if string_name not in l10n_dict: + continue + if len(l10n_dict[string_name]) == 0 or (lang_code != 'en_GB' and \ + l10n_dict[string_name] == string_value): + continue + key = string_name.split(".")[0] + translation_value = (l10n_dict[string_name].replace( + "\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n")) + if key in translations: + translations[key].append((crowdin_lang, translation_value)) + else: + translations[key] = [(crowdin_lang, translation_value)] + + upload_translations_to_crowdin(channel, resource_name, translations, + missing_only) + + +def upload_grd_translations_to_crowdin(channel, + source_string_path, + filename, + missing_only, + is_override=False): + resource_name = crowdin_name_from_filename(source_string_path, filename) + missing = 'missing' if missing_only else '' + print(f'Uploading {missing} translations for {source_string_path} ' \ + f'(resource: {resource_name})', flush=True) + source_base_path = os.path.dirname(source_string_path) + grd_strings = get_grd_strings(source_string_path, False) + grd_xtbs = get_xtb_files(source_string_path) + translations = {} + for (lang, path) in grd_xtbs: + crowdin_lang = xtb_lang_to_crowdin_lang(lang) + if not get_crowdin_client_wrapper().is_supported_language( + crowdin_lang): + print(f'Skipping language {crowdin_lang} ({lang}).') + continue + xtb_full_path = os.path.join(source_base_path, path).replace('\\', '/') + if is_override: + xtb_full_path = xtb_full_path.replace('_override', '') + print( + f'Processing language {crowdin_lang} ({lang}) from {xtb_full_path}' + ) + xtb_tree = lxml.etree.parse(xtb_full_path) + xtb_strings = xtb_tree.xpath('//translation') + for xtb_string in xtb_strings: + string_fp = xtb_string.attrib['id'] + matches = [tup for tup in grd_strings if tup[2] == string_fp] + # XTB files may have translations for string that are no longer in + # the GRD, so only upload those that are needed for the GRD. + if len(matches): + key = matches[0][0] + value = textify(xtb_string) + if len(value) == 0: + print(f'Translation for {key} is empty') + continue + if key in translations: + translations[key].append((crowdin_lang, value)) + else: + translations[key] = [(crowdin_lang, value)] + + upload_translations_to_crowdin(channel, resource_name, translations, + missing_only) + + +def upload_translation_strings_xml_for_grd(channel, + source_string_path, + filename, + is_override=False): + """Generates string xml files for a GRD file from its XTB files in the + same format as the source we upload to Crowdin. These xml files can be + manually uploaded to Crowdin via their Translations page.""" + resource_name = crowdin_name_from_filename(source_string_path, filename) + print(f'Generating translations for {source_string_path} ' \ + f'(resource: {resource_name})', flush=True) + source_base_path = os.path.dirname(source_string_path) + # Get all grd strings (with fingerprints) + grd_strings = get_grd_strings(source_string_path, False) + # Get all xtb files from grd header + grd_xtbs = get_xtb_files(source_string_path) + tempdir = tempfile.gettempdir() + + for (lang, path) in grd_xtbs: + crowdin_lang = xtb_lang_to_crowdin_lang(lang) + if not get_crowdin_client_wrapper().is_supported_language( + crowdin_lang): + print(f'Skipping language {crowdin_lang} ({lang}).') + continue + # Prepare output xml and file + resources_tag = lxml.etree.Element('resources') + output_xml_file_path = os.path.join( + tempdir, resource_name + f'_{crowdin_lang}.xml') + if os.path.exists(output_xml_file_path): + os.remove(output_xml_file_path) + + # Load XTB strings + xtb_full_path = os.path.join(source_base_path, path).replace('\\', '/') + if is_override: + xtb_full_path = xtb_full_path.replace('_override', '') + print( + f'Processing language {crowdin_lang} ({lang}) from {xtb_full_path}' + ) + xtb_tree = lxml.etree.parse(xtb_full_path) + xtb_strings = xtb_tree.xpath('//translation') + # print(f'Loaded {len(xtb_strings)} translations') + + for xtb_string in xtb_strings: + string_fp = xtb_string.attrib['id'] + matches = [tup for tup in grd_strings if tup[2] == string_fp] + # XTB files may have translations for string that are no longer in + # the GRD, so only upload those that are needed for the GRD. + if len(matches): + # Revert escaping of & because lxml.etree.tostring will do + # it again and we'll end up with &amp; + value = textify(xtb_string).replace('&', '&') + for match in matches: + key = match[0] + # Leave description empty - it's not needed for translation + # files. + resources_tag.append( + create_android_format_string_tag(key, + value, + string_desc="")) + + xml_string = lxml.etree.tostring(resources_tag, + xml_declaration=True, + encoding='utf-8') + with open(output_xml_file_path, mode='wb') as f: + f.write(xml_string) + print(f'Uploading l10n for {resource_name}: {crowdin_lang}') + uploaded = get_crowdin_client_wrapper().upload_grd_l10n_file( + channel, output_xml_file_path, resource_name, crowdin_lang) + assert uploaded, 'Failed to upload.' + os.remove(output_xml_file_path) + + +# Helper functions +# ---------------- + + +def generate_source_strings_xml_from_grd(output_xml_file_path, grd_file_path): + """Generates a source string xml file from a GRD file""" + resources_tag = lxml.etree.Element('resources') + all_strings = get_grd_strings(grd_file_path) + assert len(all_strings) > 0, f'GRD {grd_file_path} appears to be empty' + for (string_name, string_value, _, string_desc) in all_strings: + (string_value, + string_desc) = process_source_string_value(string_value, string_desc) + # Revert escaping of & because lxml.etree.tostring will do it again + # and we'll end up with &amp; + resources_tag.append( + create_android_format_string_tag( + string_name, string_value.replace('&', '&'), string_desc)) + print(f'Generating {len(all_strings)} strings for GRD: {grd_file_path}') + xml_string = lxml.etree.tostring(resources_tag, + xml_declaration=True, + encoding='utf-8') + with open(output_xml_file_path, mode='wb') as f: + f.write(xml_string) + + +def process_source_string_value(string_value, string_desc): + """Empty everything out from placeholders. The content of placeholders + doesn't need to be localized and only confuses localizers. Plus, it + gets stripped out anyway when we download the translations. The only + useful parts of the placeholders are the example values which we can + extract here and add to the comment.""" + value_xml = lxml.etree.fromstring('' + string_value + '') + phs = value_xml.findall('.//ph') + examples = [] + for ph in phs: + name = ph.get('name') + example = ph.findtext('ex') + if example is not None: + examples.append((name, example)) + lxml.etree.strip_elements(ph, '*') + if ph.text is not None: + ph.text = '' + string_desc = add_placeholders_examples_to_description( + string_desc, examples) + string_value = lxml.etree.tostring( + value_xml, encoding='utf-8').decode('utf-8').replace('>', '/>') + return (string_value[8:-9], string_desc) + + +def add_placeholders_examples_to_description(string_desc, examples): + if len(examples): + string_desc = string_desc.strip() + if not string_desc.endswith('.'): + string_desc = string_desc + '.' + string_desc = string_desc + '\nPlaceholders examples:' + for example in examples: + string_desc = string_desc + f'\n{example[0]}={example[1]}' + return string_desc + + +def create_android_format_string_tag(string_name, string_value, string_desc): + """Creates intermediate Android format child tag for each translation + string""" + string_tag = lxml.etree.Element('string') + string_tag.set('name', string_name) + string_tag.set('comment', string_desc) + string_tag.text = string_value + string_tag.tail = '\n' + return string_tag + + +def get_crowdin_source_resource_strings(channel, grd_file_path): + """Obtains the list of strings from Crowdin""" + filename = os.path.basename(grd_file_path).split('.')[0] + resource_name = crowdin_name_from_filename(grd_file_path, filename) + content = get_crowdin_client_wrapper().get_resource_source( + channel, resource_name) + return get_strings_dict_from_xml_content(content) + + +def upload_translations_to_crowdin(channel, resource_name, translations, + missing_only): + """Uploads the list of (lang_code, key, translation)s.""" + print(f'Uploading translations for {len(translations)} strings.') + get_crowdin_client_wrapper().upload_strings_l10n(channel, resource_name, + translations, + missing_only) diff --git a/script/lib/l10n/grd_utils.py b/script/lib/l10n/grd_utils.py index 7892c77f0bcc..432b95725048 100755 --- a/script/lib/l10n/grd_utils.py +++ b/script/lib/l10n/grd_utils.py @@ -171,7 +171,10 @@ def update_xtbs_locally(grd_file_path, brave_source_root): len(GOOGLE_CHROME_STRINGS_MIGRATION_MAP) brave_strings_string_ids = remove_google_chrome_strings( grd_strings, GOOGLE_CHROME_STRINGS_MIGRATION_MAP) - assert len(grd_strings) == len(chromium_grd_strings) + assert len(grd_strings) == len(chromium_grd_strings), ( + f'String count in {grd_file_path} and in {chromium_grd_file_path} do' + + f'not match: {len(grd_strings)} vs {len(chromium_grd_strings)}.') + # Verify that string names match for idx, grd_string in enumerate(grd_strings): assert chromium_grd_strings[idx][0] == grd_string[0] diff --git a/script/pull-l10n.py b/script/pull-l10n.py old mode 100644 new mode 100755 index d4c4cf391058..30f0d35b2a85 --- a/script/pull-l10n.py +++ b/script/pull-l10n.py @@ -10,6 +10,8 @@ import sys import tempfile +from lib.l10n.crowdin.common import should_use_crowdin_for_file +from lib.l10n.crowdin.pull import pull_source_file_from_crowdin from lib.l10n.grd_utils import (get_override_file_path, update_xtbs_locally) from lib.l10n.transifex.common import should_use_transifex_for_file from lib.l10n.transifex.pull import (combine_override_xtb_into_original, @@ -20,47 +22,78 @@ def parse_args(): - parser = argparse.ArgumentParser(description='Pull strings from Transifex') - parser.add_argument('--source_string_path', nargs=1, + parser = argparse.ArgumentParser( + description='Pull strings from Transifex or Crowdin') + parser.add_argument('--source_string_path', + nargs=1, help='path to the source file (GRD(P) or JSON)') + parser.add_argument('--service', + nargs=1, + choices=['Transifex', 'Crowdin'], + default='Transifex') + parser.add_argument('--channel', + nargs=1, + choices=['Release', 'Beta', 'Nightly'], + default='Release') parser.add_argument('--debug', dest='debug', action='store_true', help='dump downloaded content for the current ' \ - 'language to the TransifexCurrent.txt file in ' \ - 'the temp directory') + 'language to the TransifexCurrent.txt or ' \ + 'CrowdinCurrent.txt file in the temp directory') return parser.parse_args() def main(): args = parse_args() + if args.service == 'Transifex' and args.channel != 'Release': + raise Exception('Only Release channel is supported with Transifex') + service = args.service[0] + channel = args.channel[0] + print(f'[pull-l10n] Service: {service}, Channel: {channel}') dump_path = None if args.debug: - dump_path = os.path.join(tempfile.gettempdir(), 'TransifexCurrent.txt') + dump_path = os.path.join(tempfile.gettempdir(), + f'{service}Current.txt') print(f'DEBUG: Content dump file = {dump_path}') source_string_path = os.path.join( BRAVE_SOURCE_ROOT, args.source_string_path[0]) filename = os.path.basename(source_string_path).split('.')[0] - if should_use_transifex_for_file(source_string_path, filename): - print('Transifex: ', source_string_path) - pull_source_files_from_transifex(source_string_path, filename, - dump_path) + use_crowdin = service == 'Crowdin' + should_use_service_for_file = (should_use_crowdin_for_file( + source_string_path, filename) if use_crowdin else + should_use_transifex_for_file( + source_string_path, filename)) + + if should_use_service_for_file: + print(f'{service}: ', source_string_path) + pull_source_file_from_service(use_crowdin, channel, source_string_path, + filename, dump_path) else: print('Local: ', source_string_path) override_path = get_override_file_path(source_string_path) override_exists = os.path.exists(override_path) if override_exists: - print('Transifex override: ', override_path) + print(f'{service} override: ', override_path) override_filename = os.path.basename(override_path).split('.')[0] - pull_source_files_from_transifex(override_path, override_filename, - dump_path) + pull_source_file_from_service(use_crowdin, channel, override_path, + override_filename, dump_path) else: - print('No Transifex override.') + print(f'No {service} override.') update_xtbs_locally(source_string_path, BRAVE_SOURCE_ROOT) if override_exists: combine_override_xtb_into_original(source_string_path) +def pull_source_file_from_service(use_crowdin, channel, source_file_path, + filename, dump_path): + if use_crowdin: + pull_source_file_from_crowdin(channel, source_file_path, filename, + dump_path) + else: + pull_source_files_from_transifex(source_file_path, filename, dump_path) + + if __name__ == '__main__': sys.exit(main()) diff --git a/script/push-l10n.py b/script/push-l10n.py index 5bfafbc1a85c..b84bf7810887 100755 --- a/script/push-l10n.py +++ b/script/push-l10n.py @@ -3,12 +3,17 @@ # Copyright (c) 2022 The Brave Authors. All rights reserved. # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. */ +# You can obtain one at https://mozilla.org/MPL/2.0/. */ import argparse import os import sys +from lib.l10n.crowdin.common import should_use_crowdin_for_file +from lib.l10n.crowdin.push import ( + upload_grd_translations_to_crowdin, upload_json_translations_to_crowdin, + upload_source_file_to_crowdin, upload_translation_strings_xml_for_grd, + check_source_grd_strings_parity_with_crowdin) from lib.l10n.transifex.common import should_use_transifex_for_file from lib.l10n.transifex.push import ( check_for_chromium_upgrade, @@ -25,68 +30,157 @@ def parse_args(): - parser = argparse.ArgumentParser(description='Push strings to Transifex') - parser.add_argument('--source_string_path', nargs=1) + parser = argparse.ArgumentParser( + description='Push strings to Transifex or Crowdin') + parser.add_argument('--source_string_path', nargs=1, required=True) + parser.add_argument('--service', + nargs=1, + choices=['Transifex', 'Crowdin'], + default='Transifex') + parser.add_argument('--channel', + nargs=1, + choices=['Release', 'Beta', 'Nightly'], + default='Release') group = parser.add_mutually_exclusive_group() group.add_argument('--with_translations', dest='with_translations', action='store_true', help='Uploads translations from the local .xtb and ' \ - '.json files to Transifex. WARNING: This will ' \ - 'overwrite the Transifex translations with the ' \ - 'local values') + '.json files to Transifex or Crowdin.' \ + 'WARNING: This will overwrite the Transifex ' \ + 'translations with the local values') group.add_argument('--with_missing_translations', dest='with_missing_translations', action='store_true', help='Uploads translations from the local .xtb and ' \ - '.json files to Transifex, but only for strings '\ - 'that are not translated in Transifex.') + '.json files to Transifex or Crowdin, but only ' \ + 'for strings that are not translated in ' \ + 'Transifex/Crowdin.') return parser.parse_args() def main(): args = parse_args() + service = args.service[0] + channel = args.channel[0] + print(f'[push-l10n] Service: {service}, Channel: {channel}') + if service == 'Transifex' and channel != 'Release': + raise Exception('Only Release channel is supported with Transifex') source_string_path = os.path.join(BRAVE_SOURCE_ROOT, args.source_string_path[0]) filename = os.path.basename(source_string_path).split('.')[0] - if not should_use_transifex_for_file(source_string_path, filename): + use_crowdin = service == 'Crowdin' + should_use_service_for_file = (should_use_crowdin_for_file( + source_string_path, filename) if use_crowdin else + should_use_transifex_for_file( + source_string_path, filename)) + + if not should_use_service_for_file: override_string_path = get_override_file_path(source_string_path) filename = os.path.basename(override_string_path).split('.')[0] # This check is needed because some files that we process have no - # replacements needed so in that case we don't even put an override - # file in Transifex. + # replacements needed so in that case we don't even upload the override + # file to the l10n service. if not os.path.exists(override_string_path): print('Skipping fully locally handled, override not present: ' f'{override_string_path} filename: {filename}') return - print('Handled locally, sending only overrides to Transifex: ' + print(f'Handled locally, sending only overrides to {service}: ' f'{override_string_path} filename: {filename}') - upload_source_files_to_transifex(override_string_path, filename) - upload_source_strings_desc(override_string_path, filename) + upload_source_file(use_crowdin, channel, override_string_path, + filename) + upload_source_strings_descriptions(use_crowdin, override_string_path, + filename) # Upload local translations if requested if args.with_translations or args.with_missing_translations: - upload_grd_translations_to_transifex(override_string_path, - filename, missing_only = args.with_missing_translations, - is_override = True) + upload_grd_translations( + use_crowdin, + channel, + override_string_path, + filename, + missing_only=args.with_missing_translations, + is_override=True) return - print(f'[Transifex]: {source_string_path}') - upload_source_files_to_transifex(source_string_path, filename) + print(f'[{service}]: {source_string_path}') + upload_source_file(use_crowdin, channel, source_string_path, filename) ext = os.path.splitext(source_string_path)[1] if ext == '.grd': check_for_chromium_upgrade(SOURCE_ROOT, source_string_path) - check_missing_source_grd_strings_to_transifex(source_string_path) - upload_source_strings_desc(source_string_path, filename) + check_source_grd_strings_parity_with_service(use_crowdin, channel, + source_string_path) + upload_source_strings_descriptions(use_crowdin, source_string_path, + filename) # Upload local translations if requested if (args.with_translations or args.with_missing_translations): if ext == '.grd': - upload_grd_translations_to_transifex(source_string_path, filename, - missing_only = args.with_missing_translations) + upload_grd_translations( + use_crowdin, + channel, + source_string_path, + filename, + missing_only=args.with_missing_translations) + else: + upload_json_translations( + use_crowdin, + channel, + source_string_path, + missing_only=args.with_missing_translations) + + +def upload_source_file(use_crowdin, channel, source_string_path, filename): + if use_crowdin: + upload_source_file_to_crowdin(channel, source_string_path, filename) + else: + upload_source_files_to_transifex(source_string_path, filename) + + +def upload_source_strings_descriptions(use_crowdin, source_string_path, + filename): + # Crowdin descriptions are uploaded with the source file + if not use_crowdin: + upload_source_strings_desc(source_string_path, filename) + + +def upload_grd_translations(use_crowdin, + channel, + source_string_path, + filename, + missing_only, + is_override=False): + if use_crowdin: + # String by string upload is too slow, so only use it for missing + # translations. + if missing_only: + upload_grd_translations_to_crowdin(channel, source_string_path, + filename, missing_only, + is_override) else: - upload_json_translations_to_transifex(source_string_path, - missing_only = args.with_missing_translations) + upload_translation_strings_xml_for_grd(channel, source_string_path, + filename, is_override) + else: + upload_grd_translations_to_transifex(source_string_path, filename, + missing_only, is_override) + + +def upload_json_translations(use_crowdin, channel, source_string_path, + missing_only): + if use_crowdin: + upload_json_translations_to_crowdin(channel, source_string_path, + missing_only) + else: + upload_json_translations_to_transifex(source_string_path, missing_only) + + +def check_source_grd_strings_parity_with_service(use_crowdin, channel, + source_string_path): + if use_crowdin: + check_source_grd_strings_parity_with_crowdin(channel, + source_string_path) + else: + check_missing_source_grd_strings_to_transifex(source_string_path) if __name__ == '__main__':