1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/js-list-format.h"
10
11 #include <memory>
12 #include <vector>
13
14 #include "src/elements.h"
15 #include "src/heap/factory.h"
16 #include "src/isolate.h"
17 #include "src/objects-inl.h"
18 #include "src/objects/intl-objects.h"
19 #include "src/objects/js-array-inl.h"
20 #include "src/objects/js-list-format-inl.h"
21 #include "src/objects/managed.h"
22 #include "unicode/listformatter.h"
23
24 namespace v8 {
25 namespace internal {
26
27 namespace {
28 const char* kStandard = "standard";
29 const char* kOr = "or";
30 const char* kUnit = "unit";
31 const char* kStandardShort = "standard-short";
32 const char* kUnitShort = "unit-short";
33 const char* kUnitNarrow = "unit-narrow";
34
GetIcuStyleString(JSListFormat::Style style,JSListFormat::Type type)35 const char* GetIcuStyleString(JSListFormat::Style style,
36 JSListFormat::Type type) {
37 switch (type) {
38 case JSListFormat::Type::CONJUNCTION:
39 switch (style) {
40 case JSListFormat::Style::LONG:
41 return kStandard;
42 case JSListFormat::Style::SHORT:
43 return kStandardShort;
44 case JSListFormat::Style::NARROW:
45 // Currently, ListFormat::createInstance on "standard-narrow" will
46 // fail so we use "standard-short" here.
47 // See https://unicode.org/cldr/trac/ticket/11254
48 // TODO(ftang): change to return kStandardNarrow; after the above
49 // issue fixed in CLDR/ICU.
50 // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
51 // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
52 return kStandardShort;
53 case JSListFormat::Style::COUNT:
54 UNREACHABLE();
55 }
56 case JSListFormat::Type::DISJUNCTION:
57 switch (style) {
58 // Currently, ListFormat::createInstance on "or-short" and "or-narrow"
59 // will fail so we use "or" here.
60 // See https://unicode.org/cldr/trac/ticket/11254
61 // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on
62 // style after the above issue fixed in CLDR/ICU.
63 // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
64 // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
65 case JSListFormat::Style::LONG:
66 case JSListFormat::Style::SHORT:
67 case JSListFormat::Style::NARROW:
68 return kOr;
69 case JSListFormat::Style::COUNT:
70 UNREACHABLE();
71 }
72 case JSListFormat::Type::UNIT:
73 switch (style) {
74 case JSListFormat::Style::LONG:
75 return kUnit;
76 case JSListFormat::Style::SHORT:
77 return kUnitShort;
78 case JSListFormat::Style::NARROW:
79 return kUnitNarrow;
80 case JSListFormat::Style::COUNT:
81 UNREACHABLE();
82 }
83 case JSListFormat::Type::COUNT:
84 UNREACHABLE();
85 }
86 }
87
88 } // namespace
89
get_style(const char * str)90 JSListFormat::Style get_style(const char* str) {
91 switch (str[0]) {
92 case 'n':
93 if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
94 break;
95 case 'l':
96 if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
97 break;
98 case 's':
99 if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
100 break;
101 }
102 UNREACHABLE();
103 }
104
get_type(const char * str)105 JSListFormat::Type get_type(const char* str) {
106 switch (str[0]) {
107 case 'c':
108 if (strcmp(&str[1], "onjunction") == 0)
109 return JSListFormat::Type::CONJUNCTION;
110 break;
111 case 'd':
112 if (strcmp(&str[1], "isjunction") == 0)
113 return JSListFormat::Type::DISJUNCTION;
114 break;
115 case 'u':
116 if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
117 break;
118 }
119 UNREACHABLE();
120 }
121
InitializeListFormat(Isolate * isolate,Handle<JSListFormat> list_format_holder,Handle<Object> input_locales,Handle<Object> input_options)122 MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat(
123 Isolate* isolate, Handle<JSListFormat> list_format_holder,
124 Handle<Object> input_locales, Handle<Object> input_options) {
125 Factory* factory = isolate->factory();
126 list_format_holder->set_flags(0);
127
128 Handle<JSReceiver> options;
129 // 2. If options is undefined, then
130 if (input_options->IsUndefined(isolate)) {
131 // a. Let options be ObjectCreate(null).
132 options = isolate->factory()->NewJSObjectWithNullProto();
133 // 3. Else
134 } else {
135 // a. Let options be ? ToObject(options).
136 ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
137 Object::ToObject(isolate, input_options),
138 JSListFormat);
139 }
140
141 // 5. Let t be GetOption(options, "type", "string", «"conjunction",
142 // "disjunction", "unit"», "conjunction").
143 std::unique_ptr<char[]> type_str = nullptr;
144 std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"};
145 Maybe<bool> maybe_found_type = Intl::GetStringOption(
146 isolate, options, "type", type_values, "Intl.ListFormat", &type_str);
147 Type type_enum = Type::CONJUNCTION;
148 MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>());
149 if (maybe_found_type.FromJust()) {
150 DCHECK_NOT_NULL(type_str.get());
151 type_enum = get_type(type_str.get());
152 }
153 // 6. Set listFormat.[[Type]] to t.
154 list_format_holder->set_type(type_enum);
155
156 // 7. Let s be ? GetOption(options, "style", "string",
157 // «"long", "short", "narrow"», "long").
158 std::unique_ptr<char[]> style_str = nullptr;
159 std::vector<const char*> style_values = {"long", "short", "narrow"};
160 Maybe<bool> maybe_found_style = Intl::GetStringOption(
161 isolate, options, "style", style_values, "Intl.ListFormat", &style_str);
162 Style style_enum = Style::LONG;
163 MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>());
164 if (maybe_found_style.FromJust()) {
165 DCHECK_NOT_NULL(style_str.get());
166 style_enum = get_style(style_str.get());
167 }
168 // 15. Set listFormat.[[Style]] to s.
169 list_format_holder->set_style(style_enum);
170
171 // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
172 // requestedLocales, opt, undefined, localeData).
173 Handle<JSObject> r;
174 ASSIGN_RETURN_ON_EXCEPTION(
175 isolate, r,
176 Intl::ResolveLocale(isolate, "listformat", input_locales, options),
177 JSListFormat);
178
179 Handle<Object> locale_obj =
180 JSObject::GetDataProperty(r, factory->locale_string());
181 Handle<String> locale;
182 ASSIGN_RETURN_ON_EXCEPTION(
183 isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat);
184
185 // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]].
186 list_format_holder->set_locale(*locale);
187
188 std::unique_ptr<char[]> locale_name = locale->ToCString();
189 icu::Locale icu_locale(locale_name.get());
190 UErrorCode status = U_ZERO_ERROR;
191 icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
192 icu_locale, GetIcuStyleString(style_enum, type_enum), status);
193 if (U_FAILURE(status)) {
194 delete formatter;
195 FATAL("Failed to create ICU list formatter, are ICU data files missing?");
196 }
197 CHECK_NOT_NULL(formatter);
198
199 Handle<Managed<icu::ListFormatter>> managed_formatter =
200 Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
201
202 list_format_holder->set_formatter(*managed_formatter);
203 return list_format_holder;
204 }
205
ResolvedOptions(Isolate * isolate,Handle<JSListFormat> format_holder)206 Handle<JSObject> JSListFormat::ResolvedOptions(
207 Isolate* isolate, Handle<JSListFormat> format_holder) {
208 Factory* factory = isolate->factory();
209 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
210 Handle<String> locale(format_holder->locale(), isolate);
211 JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
212 NONE);
213 JSObject::AddProperty(isolate, result, factory->style_string(),
214 format_holder->StyleAsString(), NONE);
215 JSObject::AddProperty(isolate, result, factory->type_string(),
216 format_holder->TypeAsString(), NONE);
217 return result;
218 }
219
UnpackFormatter(Isolate * isolate,Handle<JSListFormat> holder)220 icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate,
221 Handle<JSListFormat> holder) {
222 return Managed<icu::ListFormatter>::cast(holder->formatter())->raw();
223 }
224
StyleAsString() const225 Handle<String> JSListFormat::StyleAsString() const {
226 switch (style()) {
227 case Style::LONG:
228 return GetReadOnlyRoots().long_string_handle();
229 case Style::SHORT:
230 return GetReadOnlyRoots().short_string_handle();
231 case Style::NARROW:
232 return GetReadOnlyRoots().narrow_string_handle();
233 case Style::COUNT:
234 UNREACHABLE();
235 }
236 }
237
TypeAsString() const238 Handle<String> JSListFormat::TypeAsString() const {
239 switch (type()) {
240 case Type::CONJUNCTION:
241 return GetReadOnlyRoots().conjunction_string_handle();
242 case Type::DISJUNCTION:
243 return GetReadOnlyRoots().disjunction_string_handle();
244 case Type::UNIT:
245 return GetReadOnlyRoots().unit_string_handle();
246 case Type::COUNT:
247 UNREACHABLE();
248 }
249 }
250
251 namespace {
252
253 // TODO(ftang) remove the following hack after icu::ListFormat support
254 // FieldPosition.
255 // This is a temporary workaround until icu::ListFormat support FieldPosition
256 // It is inefficient and won't work correctly on the edge case that the input
257 // contains fraction of the list pattern.
258 // For example the following under English will mark the "an" incorrectly
259 // since the formatted is "a, b, and an".
260 // listFormat.formatToParts(["a", "b", "an"])
261 // https://ssl.icu-project.org/trac/ticket/13754
GenerateListFormatParts(Isolate * isolate,const icu::UnicodeString & formatted,const icu::UnicodeString items[],int length)262 MaybeHandle<JSArray> GenerateListFormatParts(
263 Isolate* isolate, const icu::UnicodeString& formatted,
264 const icu::UnicodeString items[], int length) {
265 Factory* factory = isolate->factory();
266 int estimate_size = length * 2 + 1;
267 Handle<JSArray> array = factory->NewJSArray(estimate_size);
268 int index = 0;
269 int last_pos = 0;
270 for (int i = 0; i < length; i++) {
271 int found = formatted.indexOf(items[i], last_pos);
272 DCHECK_GE(found, 0);
273 if (found > last_pos) {
274 Handle<String> substring;
275 ASSIGN_RETURN_ON_EXCEPTION(
276 isolate, substring,
277 Intl::ToString(isolate, formatted, last_pos, found), JSArray);
278 Intl::AddElement(isolate, array, index++, factory->literal_string(),
279 substring);
280 }
281 last_pos = found + items[i].length();
282 Handle<String> substring;
283 ASSIGN_RETURN_ON_EXCEPTION(
284 isolate, substring, Intl::ToString(isolate, formatted, found, last_pos),
285 JSArray);
286 Intl::AddElement(isolate, array, index++, factory->element_string(),
287 substring);
288 }
289 if (last_pos < formatted.length()) {
290 Handle<String> substring;
291 ASSIGN_RETURN_ON_EXCEPTION(
292 isolate, substring,
293 Intl::ToString(isolate, formatted, last_pos, formatted.length()),
294 JSArray);
295 Intl::AddElement(isolate, array, index++, factory->literal_string(),
296 substring);
297 }
298 return array;
299 }
300
301 // Extract String from JSArray into array of UnicodeString
ToUnicodeStringArray(Isolate * isolate,Handle<JSArray> array,icu::UnicodeString items[],uint32_t length)302 Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array,
303 icu::UnicodeString items[], uint32_t length) {
304 Factory* factory = isolate->factory();
305 // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
306 // elements in order. But given that it was created by a builtin we control,
307 // it shouldn't be possible for it to be problematic. Add DCHECK to ensure
308 // that.
309 DCHECK(array->HasFastPackedElements());
310 auto* accessor = array->GetElementsAccessor();
311 DCHECK(length == accessor->NumberOfElements(*array));
312 // ecma402 #sec-createpartsfromlist
313 // 2. If list contains any element value such that Type(value) is not String,
314 // throw a TypeError exception.
315 //
316 // Per spec it looks like we're supposed to throw a TypeError exception if the
317 // item isn't already a string, rather than coercing to a string. Moreover,
318 // the way the spec's written it looks like we're supposed to run through the
319 // whole list to check that they're all strings before going further.
320 for (uint32_t i = 0; i < length; i++) {
321 Handle<Object> item = accessor->Get(array, i);
322 DCHECK(!item.is_null());
323 if (!item->IsString()) {
324 THROW_NEW_ERROR_RETURN_VALUE(
325 isolate,
326 NewTypeError(MessageTemplate::kArrayItemNotType,
327 factory->NewStringFromStaticChars("list"),
328 factory->NewNumber(i),
329 factory->NewStringFromStaticChars("String")),
330 Nothing<bool>());
331 }
332 }
333 for (uint32_t i = 0; i < length; i++) {
334 Handle<String> string = Handle<String>::cast(accessor->Get(array, i));
335 DisallowHeapAllocation no_gc;
336 string = String::Flatten(isolate, string);
337 std::unique_ptr<uc16[]> sap;
338 items[i] =
339 icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(),
340 &sap, string->length()),
341 string->length());
342 }
343 return Just(true);
344 }
345
346 } // namespace
347
FormatListCommon(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list,icu::UnicodeString & formatted,uint32_t * length,std::unique_ptr<icu::UnicodeString[]> & array)348 Maybe<bool> FormatListCommon(Isolate* isolate,
349 Handle<JSListFormat> format_holder,
350 Handle<JSArray> list,
351 icu::UnicodeString& formatted, uint32_t* length,
352 std::unique_ptr<icu::UnicodeString[]>& array) {
353 DCHECK(!list->IsUndefined());
354
355 icu::ListFormatter* formatter =
356 JSListFormat::UnpackFormatter(isolate, format_holder);
357 CHECK_NOT_NULL(formatter);
358
359 *length = list->GetElementsAccessor()->NumberOfElements(*list);
360 array.reset(new icu::UnicodeString[*length]);
361
362 // ecma402 #sec-createpartsfromlist
363 // 2. If list contains any element value such that Type(value) is not String,
364 // throw a TypeError exception.
365 MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length),
366 Nothing<bool>());
367
368 UErrorCode status = U_ZERO_ERROR;
369 formatter->format(array.get(), *length, formatted, status);
370 DCHECK(U_SUCCESS(status));
371 return Just(true);
372 }
373
374 // ecma402 #sec-formatlist
FormatList(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list)375 MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
376 Handle<JSListFormat> format_holder,
377 Handle<JSArray> list) {
378 icu::UnicodeString formatted;
379 uint32_t length;
380 std::unique_ptr<icu::UnicodeString[]> array;
381 MAYBE_RETURN(
382 FormatListCommon(isolate, format_holder, list, formatted, &length, array),
383 Handle<String>());
384 return Intl::ToString(isolate, formatted);
385 }
386
387 // ecma42 #sec-formatlisttoparts
FormatListToParts(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list)388 MaybeHandle<JSArray> JSListFormat::FormatListToParts(
389 Isolate* isolate, Handle<JSListFormat> format_holder,
390 Handle<JSArray> list) {
391 icu::UnicodeString formatted;
392 uint32_t length;
393 std::unique_ptr<icu::UnicodeString[]> array;
394 MAYBE_RETURN(
395 FormatListCommon(isolate, format_holder, list, formatted, &length, array),
396 Handle<JSArray>());
397 return GenerateListFormatParts(isolate, formatted, array.get(), length);
398 }
399
400 } // namespace internal
401 } // namespace v8
402