1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-list-format.h"
10 
11 #include <memory>
12 #include <vector>
13 
14 #include "src/elements.h"
15 #include "src/heap/factory.h"
16 #include "src/isolate.h"
17 #include "src/objects-inl.h"
18 #include "src/objects/intl-objects.h"
19 #include "src/objects/js-array-inl.h"
20 #include "src/objects/js-list-format-inl.h"
21 #include "src/objects/managed.h"
22 #include "unicode/listformatter.h"
23 
24 namespace v8 {
25 namespace internal {
26 
27 namespace {
28 const char* kStandard = "standard";
29 const char* kOr = "or";
30 const char* kUnit = "unit";
31 const char* kStandardShort = "standard-short";
32 const char* kUnitShort = "unit-short";
33 const char* kUnitNarrow = "unit-narrow";
34 
GetIcuStyleString(JSListFormat::Style style,JSListFormat::Type type)35 const char* GetIcuStyleString(JSListFormat::Style style,
36                               JSListFormat::Type type) {
37   switch (type) {
38     case JSListFormat::Type::CONJUNCTION:
39       switch (style) {
40         case JSListFormat::Style::LONG:
41           return kStandard;
42         case JSListFormat::Style::SHORT:
43           return kStandardShort;
44         case JSListFormat::Style::NARROW:
45           // Currently, ListFormat::createInstance on "standard-narrow" will
46           // fail so we use "standard-short" here.
47           // See https://unicode.org/cldr/trac/ticket/11254
48           // TODO(ftang): change to return kStandardNarrow; after the above
49           // issue fixed in CLDR/ICU.
50           // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
51           // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
52           return kStandardShort;
53         case JSListFormat::Style::COUNT:
54           UNREACHABLE();
55       }
56     case JSListFormat::Type::DISJUNCTION:
57       switch (style) {
58         // Currently, ListFormat::createInstance on "or-short" and "or-narrow"
59         // will fail so we use "or" here.
60         // See https://unicode.org/cldr/trac/ticket/11254
61         // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on
62         // style after the above issue fixed in CLDR/ICU.
63         // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
64         // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
65         case JSListFormat::Style::LONG:
66         case JSListFormat::Style::SHORT:
67         case JSListFormat::Style::NARROW:
68           return kOr;
69         case JSListFormat::Style::COUNT:
70           UNREACHABLE();
71       }
72     case JSListFormat::Type::UNIT:
73       switch (style) {
74         case JSListFormat::Style::LONG:
75           return kUnit;
76         case JSListFormat::Style::SHORT:
77           return kUnitShort;
78         case JSListFormat::Style::NARROW:
79           return kUnitNarrow;
80         case JSListFormat::Style::COUNT:
81           UNREACHABLE();
82       }
83     case JSListFormat::Type::COUNT:
84       UNREACHABLE();
85   }
86 }
87 
88 }  // namespace
89 
get_style(const char * str)90 JSListFormat::Style get_style(const char* str) {
91   switch (str[0]) {
92     case 'n':
93       if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
94       break;
95     case 'l':
96       if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
97       break;
98     case 's':
99       if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
100       break;
101   }
102   UNREACHABLE();
103 }
104 
get_type(const char * str)105 JSListFormat::Type get_type(const char* str) {
106   switch (str[0]) {
107     case 'c':
108       if (strcmp(&str[1], "onjunction") == 0)
109         return JSListFormat::Type::CONJUNCTION;
110       break;
111     case 'd':
112       if (strcmp(&str[1], "isjunction") == 0)
113         return JSListFormat::Type::DISJUNCTION;
114       break;
115     case 'u':
116       if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
117       break;
118   }
119   UNREACHABLE();
120 }
121 
InitializeListFormat(Isolate * isolate,Handle<JSListFormat> list_format_holder,Handle<Object> input_locales,Handle<Object> input_options)122 MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat(
123     Isolate* isolate, Handle<JSListFormat> list_format_holder,
124     Handle<Object> input_locales, Handle<Object> input_options) {
125   Factory* factory = isolate->factory();
126   list_format_holder->set_flags(0);
127 
128   Handle<JSReceiver> options;
129   // 2. If options is undefined, then
130   if (input_options->IsUndefined(isolate)) {
131     // a. Let options be ObjectCreate(null).
132     options = isolate->factory()->NewJSObjectWithNullProto();
133     // 3. Else
134   } else {
135     // a. Let options be ? ToObject(options).
136     ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
137                                Object::ToObject(isolate, input_options),
138                                JSListFormat);
139   }
140 
141   // 5. Let t be GetOption(options, "type", "string", «"conjunction",
142   //    "disjunction", "unit"», "conjunction").
143   std::unique_ptr<char[]> type_str = nullptr;
144   std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"};
145   Maybe<bool> maybe_found_type = Intl::GetStringOption(
146       isolate, options, "type", type_values, "Intl.ListFormat", &type_str);
147   Type type_enum = Type::CONJUNCTION;
148   MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>());
149   if (maybe_found_type.FromJust()) {
150     DCHECK_NOT_NULL(type_str.get());
151     type_enum = get_type(type_str.get());
152   }
153   // 6. Set listFormat.[[Type]] to t.
154   list_format_holder->set_type(type_enum);
155 
156   // 7. Let s be ? GetOption(options, "style", "string",
157   //                          «"long", "short", "narrow"», "long").
158   std::unique_ptr<char[]> style_str = nullptr;
159   std::vector<const char*> style_values = {"long", "short", "narrow"};
160   Maybe<bool> maybe_found_style = Intl::GetStringOption(
161       isolate, options, "style", style_values, "Intl.ListFormat", &style_str);
162   Style style_enum = Style::LONG;
163   MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>());
164   if (maybe_found_style.FromJust()) {
165     DCHECK_NOT_NULL(style_str.get());
166     style_enum = get_style(style_str.get());
167   }
168   // 15. Set listFormat.[[Style]] to s.
169   list_format_holder->set_style(style_enum);
170 
171   // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
172   // requestedLocales, opt, undefined, localeData).
173   Handle<JSObject> r;
174   ASSIGN_RETURN_ON_EXCEPTION(
175       isolate, r,
176       Intl::ResolveLocale(isolate, "listformat", input_locales, options),
177       JSListFormat);
178 
179   Handle<Object> locale_obj =
180       JSObject::GetDataProperty(r, factory->locale_string());
181   Handle<String> locale;
182   ASSIGN_RETURN_ON_EXCEPTION(
183       isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat);
184 
185   // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]].
186   list_format_holder->set_locale(*locale);
187 
188   std::unique_ptr<char[]> locale_name = locale->ToCString();
189   icu::Locale icu_locale(locale_name.get());
190   UErrorCode status = U_ZERO_ERROR;
191   icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
192       icu_locale, GetIcuStyleString(style_enum, type_enum), status);
193   if (U_FAILURE(status)) {
194     delete formatter;
195     FATAL("Failed to create ICU list formatter, are ICU data files missing?");
196   }
197   CHECK_NOT_NULL(formatter);
198 
199   Handle<Managed<icu::ListFormatter>> managed_formatter =
200       Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
201 
202   list_format_holder->set_formatter(*managed_formatter);
203   return list_format_holder;
204 }
205 
ResolvedOptions(Isolate * isolate,Handle<JSListFormat> format_holder)206 Handle<JSObject> JSListFormat::ResolvedOptions(
207     Isolate* isolate, Handle<JSListFormat> format_holder) {
208   Factory* factory = isolate->factory();
209   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
210   Handle<String> locale(format_holder->locale(), isolate);
211   JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
212                         NONE);
213   JSObject::AddProperty(isolate, result, factory->style_string(),
214                         format_holder->StyleAsString(), NONE);
215   JSObject::AddProperty(isolate, result, factory->type_string(),
216                         format_holder->TypeAsString(), NONE);
217   return result;
218 }
219 
UnpackFormatter(Isolate * isolate,Handle<JSListFormat> holder)220 icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate,
221                                                   Handle<JSListFormat> holder) {
222   return Managed<icu::ListFormatter>::cast(holder->formatter())->raw();
223 }
224 
StyleAsString() const225 Handle<String> JSListFormat::StyleAsString() const {
226   switch (style()) {
227     case Style::LONG:
228       return GetReadOnlyRoots().long_string_handle();
229     case Style::SHORT:
230       return GetReadOnlyRoots().short_string_handle();
231     case Style::NARROW:
232       return GetReadOnlyRoots().narrow_string_handle();
233     case Style::COUNT:
234       UNREACHABLE();
235   }
236 }
237 
TypeAsString() const238 Handle<String> JSListFormat::TypeAsString() const {
239   switch (type()) {
240     case Type::CONJUNCTION:
241       return GetReadOnlyRoots().conjunction_string_handle();
242     case Type::DISJUNCTION:
243       return GetReadOnlyRoots().disjunction_string_handle();
244     case Type::UNIT:
245       return GetReadOnlyRoots().unit_string_handle();
246     case Type::COUNT:
247       UNREACHABLE();
248   }
249 }
250 
251 namespace {
252 
253 // TODO(ftang) remove the following hack after icu::ListFormat support
254 // FieldPosition.
255 // This is a temporary workaround until icu::ListFormat support FieldPosition
256 // It is inefficient and won't work correctly on the edge case that the input
257 // contains fraction of the list pattern.
258 // For example the following under English will mark the "an" incorrectly
259 // since the formatted is "a, b, and an".
260 // listFormat.formatToParts(["a", "b", "an"])
261 // https://ssl.icu-project.org/trac/ticket/13754
GenerateListFormatParts(Isolate * isolate,const icu::UnicodeString & formatted,const icu::UnicodeString items[],int length)262 MaybeHandle<JSArray> GenerateListFormatParts(
263     Isolate* isolate, const icu::UnicodeString& formatted,
264     const icu::UnicodeString items[], int length) {
265   Factory* factory = isolate->factory();
266   int estimate_size = length * 2 + 1;
267   Handle<JSArray> array = factory->NewJSArray(estimate_size);
268   int index = 0;
269   int last_pos = 0;
270   for (int i = 0; i < length; i++) {
271     int found = formatted.indexOf(items[i], last_pos);
272     DCHECK_GE(found, 0);
273     if (found > last_pos) {
274       Handle<String> substring;
275       ASSIGN_RETURN_ON_EXCEPTION(
276           isolate, substring,
277           Intl::ToString(isolate, formatted, last_pos, found), JSArray);
278       Intl::AddElement(isolate, array, index++, factory->literal_string(),
279                        substring);
280     }
281     last_pos = found + items[i].length();
282     Handle<String> substring;
283     ASSIGN_RETURN_ON_EXCEPTION(
284         isolate, substring, Intl::ToString(isolate, formatted, found, last_pos),
285         JSArray);
286     Intl::AddElement(isolate, array, index++, factory->element_string(),
287                      substring);
288   }
289   if (last_pos < formatted.length()) {
290     Handle<String> substring;
291     ASSIGN_RETURN_ON_EXCEPTION(
292         isolate, substring,
293         Intl::ToString(isolate, formatted, last_pos, formatted.length()),
294         JSArray);
295     Intl::AddElement(isolate, array, index++, factory->literal_string(),
296                      substring);
297   }
298   return array;
299 }
300 
301 // Extract String from JSArray into array of UnicodeString
ToUnicodeStringArray(Isolate * isolate,Handle<JSArray> array,icu::UnicodeString items[],uint32_t length)302 Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array,
303                                  icu::UnicodeString items[], uint32_t length) {
304   Factory* factory = isolate->factory();
305   // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
306   // elements in order. But given that it was created by a builtin we control,
307   // it shouldn't be possible for it to be problematic. Add DCHECK to ensure
308   // that.
309   DCHECK(array->HasFastPackedElements());
310   auto* accessor = array->GetElementsAccessor();
311   DCHECK(length == accessor->NumberOfElements(*array));
312   // ecma402 #sec-createpartsfromlist
313   // 2. If list contains any element value such that Type(value) is not String,
314   // throw a TypeError exception.
315   //
316   // Per spec it looks like we're supposed to throw a TypeError exception if the
317   // item isn't already a string, rather than coercing to a string. Moreover,
318   // the way the spec's written it looks like we're supposed to run through the
319   // whole list to check that they're all strings before going further.
320   for (uint32_t i = 0; i < length; i++) {
321     Handle<Object> item = accessor->Get(array, i);
322     DCHECK(!item.is_null());
323     if (!item->IsString()) {
324       THROW_NEW_ERROR_RETURN_VALUE(
325           isolate,
326           NewTypeError(MessageTemplate::kArrayItemNotType,
327                        factory->NewStringFromStaticChars("list"),
328                        factory->NewNumber(i),
329                        factory->NewStringFromStaticChars("String")),
330           Nothing<bool>());
331     }
332   }
333   for (uint32_t i = 0; i < length; i++) {
334     Handle<String> string = Handle<String>::cast(accessor->Get(array, i));
335     DisallowHeapAllocation no_gc;
336     string = String::Flatten(isolate, string);
337     std::unique_ptr<uc16[]> sap;
338     items[i] =
339         icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(),
340                                                   &sap, string->length()),
341                            string->length());
342   }
343   return Just(true);
344 }
345 
346 }  // namespace
347 
FormatListCommon(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list,icu::UnicodeString & formatted,uint32_t * length,std::unique_ptr<icu::UnicodeString[]> & array)348 Maybe<bool> FormatListCommon(Isolate* isolate,
349                              Handle<JSListFormat> format_holder,
350                              Handle<JSArray> list,
351                              icu::UnicodeString& formatted, uint32_t* length,
352                              std::unique_ptr<icu::UnicodeString[]>& array) {
353   DCHECK(!list->IsUndefined());
354 
355   icu::ListFormatter* formatter =
356       JSListFormat::UnpackFormatter(isolate, format_holder);
357   CHECK_NOT_NULL(formatter);
358 
359   *length = list->GetElementsAccessor()->NumberOfElements(*list);
360   array.reset(new icu::UnicodeString[*length]);
361 
362   // ecma402 #sec-createpartsfromlist
363   // 2. If list contains any element value such that Type(value) is not String,
364   // throw a TypeError exception.
365   MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length),
366                Nothing<bool>());
367 
368   UErrorCode status = U_ZERO_ERROR;
369   formatter->format(array.get(), *length, formatted, status);
370   DCHECK(U_SUCCESS(status));
371   return Just(true);
372 }
373 
374 // ecma402 #sec-formatlist
FormatList(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list)375 MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
376                                              Handle<JSListFormat> format_holder,
377                                              Handle<JSArray> list) {
378   icu::UnicodeString formatted;
379   uint32_t length;
380   std::unique_ptr<icu::UnicodeString[]> array;
381   MAYBE_RETURN(
382       FormatListCommon(isolate, format_holder, list, formatted, &length, array),
383       Handle<String>());
384   return Intl::ToString(isolate, formatted);
385 }
386 
387 // ecma42 #sec-formatlisttoparts
FormatListToParts(Isolate * isolate,Handle<JSListFormat> format_holder,Handle<JSArray> list)388 MaybeHandle<JSArray> JSListFormat::FormatListToParts(
389     Isolate* isolate, Handle<JSListFormat> format_holder,
390     Handle<JSArray> list) {
391   icu::UnicodeString formatted;
392   uint32_t length;
393   std::unique_ptr<icu::UnicodeString[]> array;
394   MAYBE_RETURN(
395       FormatListCommon(isolate, format_holder, list, formatted, &length, array),
396       Handle<JSArray>());
397   return GenerateListFormatParts(isolate, formatted, array.get(), length);
398 }
399 
400 }  // namespace internal
401 }  // namespace v8
402