-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtranslation_utils.py
204 lines (153 loc) · 8.15 KB
/
translation_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
from utils.llm_utils import get_completion
def initial_translation(source_lang: str, target_lang: str, source_text: str) -> str:
"""
Translate the entire text as one chunk using an LLM.
Args:
source_lang (str): The source language of the text.
target_lang (str): The target language for translation.
source_text (str): The text to be translated.
Returns:
str: The translated text.
"""
system_message = f"You are an expert linguist, specializing in translation from {source_lang} to {target_lang}."
translation_prompt = f"""This is an {source_lang} to {target_lang} translation, please provide the {target_lang} translation for this text. \
Do not provide any explanations or text apart from the translation. But please keep subtitle timestamps.
{source_lang}: {source_text}
{target_lang}:"""
translation = get_completion(translation_prompt, system_message=system_message)
return translation
def reflect_on_translation(
source_lang: str,
target_lang: str,
source_text: str,
translation_1: str,
country: str = "",
) -> str:
"""
Use an LLM to reflect on the translation, treating the entire text as one chunk.
Args:
source_lang (str): The source language of the text.
target_lang (str): The target language of the translation.
source_text (str): The original text in the source language.
translation_1 (str): The initial translation of the source text.
country (str): Country specified for the target language.
Returns:
str: The LLM's reflection on the translation, providing constructive criticism and suggestions for improvement.
"""
system_message = f"You are an expert linguist specializing in translation from {source_lang} to {target_lang}. \
You will be provided with a source text and its translation and your goal is to improve the translation."
if country != "":
reflection_prompt = f"""Your task is to carefully read a source text and a translation from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions to improve the translation. \
The final style and tone of the translation should match the style of {target_lang} colloquially spoken in {country}.
The source text and initial translation, delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT> and <TRANSLATION></TRANSLATION>, are as follows:
<SOURCE_TEXT>
{source_text}
</SOURCE_TEXT>
<TRANSLATION>
{translation_1}
</TRANSLATION>
When writing suggestions, pay attention to whether there are ways to improve the translation's \n\
(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
(iii) style (by ensuring the translations reflect the style of the source text and take into account any cultural context),\n\
(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
Write a list of specific, helpful and constructive suggestions for improving the translation.
Each suggestion should address one specific part of the translation.
Output only the suggestions and nothing else."""
else:
reflection_prompt = f"""Your task is to carefully read a source text and a translation from {source_lang} to {target_lang}, and then give constructive criticisms and helpful suggestions to improve the translation. \
The source text and initial translation, delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT> and <TRANSLATION></TRANSLATION>, are as follows:
<SOURCE_TEXT>
{source_text}
</SOURCE_TEXT>
<TRANSLATION>
{translation_1}
</TRANSLATION>
When writing suggestions, pay attention to whether there are ways to improve the translation's \n\
(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
(iii) style (by ensuring the translations reflect the style of the source text and take into account any cultural context),\n\
(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
Write a list of specific, helpful and constructive suggestions for improving the translation.
Each suggestion should address one specific part of the translation.
Output only the suggestions and nothing else."""
reflection = get_completion(reflection_prompt, system_message=system_message)
return reflection
def improve_translation(
source_lang: str,
target_lang: str,
source_text: str,
translation_1: str,
reflection: str,
) -> str:
"""
Use the reflection to improve the translation, treating the entire text as one chunk.
Args:
source_lang (str): The source language of the text.
target_lang (str): The target language for the translation.
source_text (str): The original text in the source language.
translation_1 (str): The initial translation of the source text.
reflection (str): Expert suggestions and constructive criticism for improving the translation.
Returns:
str: The improved translation based on the expert suggestions.
"""
system_message = f"You are an expert linguist, specializing in translation editing from {source_lang} to {target_lang}."
prompt = f"""Your task is to carefully read, then edit, a translation from {source_lang} to {target_lang}, taking into
account a list of expert suggestions and constructive criticisms.
The source text, the initial translation, and the expert linguist suggestions are delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT>, <TRANSLATION></TRANSLATION> and <EXPERT_SUGGESTIONS></EXPERT_SUGGESTIONS> \
as follows:
<SOURCE_TEXT>
{source_text}
</SOURCE_TEXT>
<TRANSLATION>
{translation_1}
</TRANSLATION>
<EXPERT_SUGGESTIONS>
{reflection}
</EXPERT_SUGGESTIONS>
Please take into account the expert suggestions when editing the translation. Edit the translation by ensuring:
(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules and ensuring there are no unnecessary repetitions), \
(iii) style (by ensuring the translations reflect the style of the source text)
(iv) terminology (inappropriate for context, inconsistent use), or
(v) other errors.
Output only the new translation and nothing else."""
translation_2 = get_completion(prompt, system_message)
return translation_2
def translate_text(
source_lang: str, target_lang: str, source_text: str, country: str = ""
) -> str:
"""
Translate a single chunk of text from the source language to the target language.
This function performs a two-step translation process:
1. Get an initial translation of the source text.
2. Reflect on the initial translation and generate an improved translation.
Args:
source_lang (str): The source language of the text.
target_lang (str): The target language for the translation.
source_text (str): The text to be translated.
country (str): Country specified for the target language.
Returns:
str: The improved translation of the source text.
"""
translation_1 = initial_translation(source_lang, target_lang, source_text)
print("----------------------------")
print(translation_1)
reflection = reflect_on_translation(
source_lang, target_lang, source_text, translation_1, country
)
print("----------------------------")
print(reflection)
translation_2 = improve_translation(
source_lang, target_lang, source_text, translation_1, reflection
)
print("----------------------------")
print(translation_2)
return translation_2
if __name__ == "__main__":
translate_text(
source_lang="Chinese",
target_lang="English",
source_text="宠辱不惊,看庭前花开花落;去留无意,望天上云卷云舒",
country="American",
)