1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "compile/PseudolocaleGenerator.h"
18
19 #include <algorithm>
20
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "compile/Pseudolocalizer.h"
25 #include "util/Util.h"
26
27 using ::android::ConfigDescription;
28 using ::android::StringPiece;
29 using ::android::StringPiece16;
30
31 namespace aapt {
32
33 // The struct that represents both Span objects and UntranslatableSections.
34 struct UnifiedSpan {
35 // Only present for Span objects. If not present, this was an UntranslatableSection.
36 Maybe<std::string> tag;
37
38 // The UTF-16 index into the string where this span starts.
39 uint32_t first_char;
40
41 // The UTF-16 index into the string where this span ends, inclusive.
42 uint32_t last_char;
43 };
44
operator <(const UnifiedSpan & left,const UnifiedSpan & right)45 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
46 if (left.first_char < right.first_char) {
47 return true;
48 } else if (left.first_char > right.first_char) {
49 return false;
50 } else if (left.last_char < right.last_char) {
51 return true;
52 }
53 return false;
54 }
55
SpanToUnifiedSpan(const StringPool::Span & span)56 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
57 return UnifiedSpan{*span.name, span.first_char, span.last_char};
58 }
59
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)60 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
61 return UnifiedSpan{
62 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
63 }
64
65 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
66 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)67 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
68 // Ensure the Spans are sorted and converted.
69 std::vector<UnifiedSpan> sorted_spans;
70 sorted_spans.reserve(string.value->spans.size());
71 std::transform(string.value->spans.begin(), string.value->spans.end(),
72 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
73
74 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
75 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
76
77 // Ensure the UntranslatableSections are sorted and converted.
78 std::vector<UnifiedSpan> sorted_untranslatable_sections;
79 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
80 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
81 std::back_inserter(sorted_untranslatable_sections),
82 UntranslatableSectionToUnifiedSpan);
83 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
84
85 std::vector<UnifiedSpan> merged_spans;
86 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
87 auto span_iter = sorted_spans.begin();
88 auto untranslatable_iter = sorted_untranslatable_sections.begin();
89 while (span_iter != sorted_spans.end() &&
90 untranslatable_iter != sorted_untranslatable_sections.end()) {
91 if (*span_iter < *untranslatable_iter) {
92 merged_spans.push_back(std::move(*span_iter));
93 ++span_iter;
94 } else {
95 merged_spans.push_back(std::move(*untranslatable_iter));
96 ++untranslatable_iter;
97 }
98 }
99
100 while (span_iter != sorted_spans.end()) {
101 merged_spans.push_back(std::move(*span_iter));
102 ++span_iter;
103 }
104
105 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
106 merged_spans.push_back(std::move(*untranslatable_iter));
107 ++untranslatable_iter;
108 }
109 return merged_spans;
110 }
111
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,StringPool * pool)112 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
113 Pseudolocalizer::Method method,
114 StringPool* pool) {
115 Pseudolocalizer localizer(method);
116
117 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
118 // This will effectively subdivide the string into multiple sections that can be individually
119 // pseudolocalized, while keeping the span indices synchronized.
120 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
121
122 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
123 // runtime. So we will do all our processing in UTF-16, then convert back.
124 const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
125
126 // Convenient wrapper around the text that allows us to work with StringPieces.
127 const StringPiece16 text(text16);
128
129 // The new string.
130 std::string new_string = localizer.Start();
131
132 // The stack that keeps track of what nested Span we're in.
133 std::vector<size_t> span_stack;
134
135 // The current position in the original text.
136 uint32_t cursor = 0u;
137
138 // The current position in the new text.
139 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
140 new_string.size(), false);
141
142 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
143 bool translatable = true;
144 size_t span_idx = 0u;
145 while (span_idx < merged_spans.size() || !span_stack.empty()) {
146 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
147 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
148
149 if (span != nullptr) {
150 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
151 // There is no parent, or this span is the child of the parent.
152 // Pseudolocalize all the text until this span.
153 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
154 cursor += substr.size();
155
156 // Pseudolocalize the substring.
157 std::string new_substr = util::Utf16ToUtf8(substr);
158 if (translatable) {
159 new_substr = localizer.Text(new_substr);
160 }
161 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
162 new_substr.size(), false);
163 new_string += new_substr;
164
165 // Rewrite the first_char.
166 span->first_char = new_cursor;
167 if (!span->tag) {
168 // An untranslatable section has begun!
169 translatable = false;
170 }
171 span_stack.push_back(span_idx);
172 ++span_idx;
173 continue;
174 }
175 }
176
177 if (parent_span != nullptr) {
178 // There is a parent, and either this span is not a child of it, or there are no more spans.
179 // Pop this off the stack.
180 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
181 cursor += substr.size();
182
183 // Pseudolocalize the substring.
184 std::string new_substr = util::Utf16ToUtf8(substr);
185 if (translatable) {
186 new_substr = localizer.Text(new_substr);
187 }
188 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
189 new_substr.size(), false);
190 new_string += new_substr;
191
192 parent_span->last_char = new_cursor - 1;
193 if (parent_span->tag) {
194 // An end to an untranslatable section.
195 translatable = true;
196 }
197 span_stack.pop_back();
198 }
199 }
200
201 // Finish the pseudolocalization at the end of the string.
202 new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
203 new_string += localizer.End();
204
205 StyleString localized;
206 localized.str = std::move(new_string);
207
208 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
209 for (UnifiedSpan& span : merged_spans) {
210 if (span.tag) {
211 localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
212 }
213 }
214 return util::make_unique<StyledString>(pool->MakeRef(localized));
215 }
216
217 namespace {
218
219 class Visitor : public ValueVisitor {
220 public:
221 // Either value or item will be populated upon visiting the value.
222 std::unique_ptr<Value> value;
223 std::unique_ptr<Item> item;
224
Visitor(StringPool * pool,Pseudolocalizer::Method method)225 Visitor(StringPool* pool, Pseudolocalizer::Method method)
226 : pool_(pool), method_(method), localizer_(method) {}
227
Visit(Plural * plural)228 void Visit(Plural* plural) override {
229 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
230 for (size_t i = 0; i < plural->values.size(); i++) {
231 Visitor sub_visitor(pool_, method_);
232 if (plural->values[i]) {
233 plural->values[i]->Accept(&sub_visitor);
234 if (sub_visitor.value) {
235 localized->values[i] = std::move(sub_visitor.item);
236 } else {
237 localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
238 }
239 }
240 }
241 localized->SetSource(plural->GetSource());
242 localized->SetWeak(true);
243 value = std::move(localized);
244 }
245
Visit(String * string)246 void Visit(String* string) override {
247 const StringPiece original_string = *string->value;
248 std::string result = localizer_.Start();
249
250 // Pseudolocalize only the translatable sections.
251 size_t start = 0u;
252 for (const UntranslatableSection& section : string->untranslatable_sections) {
253 // Pseudolocalize the content before the untranslatable section.
254 const size_t len = section.start - start;
255 if (len > 0u) {
256 result += localizer_.Text(original_string.substr(start, len));
257 }
258
259 // Copy the untranslatable content.
260 result += original_string.substr(section.start, section.end - section.start);
261 start = section.end;
262 }
263
264 // Pseudolocalize the content after the last untranslatable section.
265 if (start != original_string.size()) {
266 const size_t len = original_string.size() - start;
267 result += localizer_.Text(original_string.substr(start, len));
268 }
269 result += localizer_.End();
270
271 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
272 localized->SetSource(string->GetSource());
273 localized->SetWeak(true);
274 item = std::move(localized);
275 }
276
Visit(StyledString * string)277 void Visit(StyledString* string) override {
278 item = PseudolocalizeStyledString(string, method_, pool_);
279 item->SetSource(string->GetSource());
280 item->SetWeak(true);
281 }
282
283 private:
284 DISALLOW_COPY_AND_ASSIGN(Visitor);
285
286 StringPool* pool_;
287 Pseudolocalizer::Method method_;
288 Pseudolocalizer localizer_;
289 };
290
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)291 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
292 Pseudolocalizer::Method m) {
293 ConfigDescription modified = base;
294 switch (m) {
295 case Pseudolocalizer::Method::kAccent:
296 modified.language[0] = 'e';
297 modified.language[1] = 'n';
298 modified.country[0] = 'X';
299 modified.country[1] = 'A';
300 break;
301
302 case Pseudolocalizer::Method::kBidi:
303 modified.language[0] = 'a';
304 modified.language[1] = 'r';
305 modified.country[0] = 'X';
306 modified.country[1] = 'B';
307 break;
308 default:
309 break;
310 }
311 return modified;
312 }
313
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,StringPool * pool,ResourceEntry * entry)314 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
315 ResourceConfigValue* original_value,
316 StringPool* pool, ResourceEntry* entry) {
317 Visitor visitor(pool, method);
318 original_value->value->Accept(&visitor);
319
320 std::unique_ptr<Value> localized_value;
321 if (visitor.value) {
322 localized_value = std::move(visitor.value);
323 } else if (visitor.item) {
324 localized_value = std::move(visitor.item);
325 }
326
327 if (!localized_value) {
328 return;
329 }
330
331 ConfigDescription config_with_accent =
332 ModifyConfigForPseudoLocale(original_value->config, method);
333
334 ResourceConfigValue* new_config_value =
335 entry->FindOrCreateValue(config_with_accent, original_value->product);
336 if (!new_config_value->value) {
337 // Only use auto-generated pseudo-localization if none is defined.
338 new_config_value->value = std::move(localized_value);
339 }
340 }
341
342 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
343 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)344 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
345 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
346 if (diff & ConfigDescription::CONFIG_LOCALE) {
347 return false;
348 }
349 return config_value->value->IsTranslatable();
350 }
351
352 } // namespace
353
Consume(IAaptContext * context,ResourceTable * table)354 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
355 for (auto& package : table->packages) {
356 for (auto& type : package->types) {
357 for (auto& entry : type->entries) {
358 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
359 for (ResourceConfigValue* value : values) {
360 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
361 entry.get());
362 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
363 entry.get());
364 }
365 }
366 }
367 }
368 return true;
369 }
370
371 } // namespace aapt
372