1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/intl-objects.h"
10 #include "src/objects/intl-objects-inl.h"
11 
12 #include <algorithm>
13 #include <memory>
14 #include <string>
15 #include <vector>
16 
17 #include "src/api-inl.h"
18 #include "src/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/intl.h"
21 #include "src/isolate.h"
22 #include "src/objects-inl.h"
23 #include "src/objects/js-collator-inl.h"
24 #include "src/objects/managed.h"
25 #include "src/objects/string.h"
26 #include "src/property-descriptor.h"
27 #include "unicode/brkiter.h"
28 #include "unicode/bytestream.h"
29 #include "unicode/calendar.h"
30 #include "unicode/coll.h"
31 #include "unicode/curramt.h"
32 #include "unicode/dcfmtsym.h"
33 #include "unicode/decimfmt.h"
34 #include "unicode/dtfmtsym.h"
35 #include "unicode/dtptngen.h"
36 #include "unicode/gregocal.h"
37 #include "unicode/locid.h"
38 #include "unicode/numfmt.h"
39 #include "unicode/numsys.h"
40 #include "unicode/plurrule.h"
41 #include "unicode/rbbi.h"
42 #include "unicode/regex.h"
43 #include "unicode/smpdtfmt.h"
44 #include "unicode/timezone.h"
45 #include "unicode/uchar.h"
46 #include "unicode/ucol.h"
47 #include "unicode/ucurr.h"
48 #include "unicode/unum.h"
49 #include "unicode/upluralrules.h"
50 #include "unicode/ures.h"
51 #include "unicode/uvernum.h"
52 #include "unicode/uversion.h"
53 
54 #if U_ICU_VERSION_MAJOR_NUM >= 59
55 #include "unicode/char16ptr.h"
56 #endif
57 
58 namespace v8 {
59 namespace internal {
60 
61 namespace {
62 
ExtractStringSetting(Isolate * isolate,Handle<JSObject> options,const char * key,icu::UnicodeString * setting)63 bool ExtractStringSetting(Isolate* isolate, Handle<JSObject> options,
64                           const char* key, icu::UnicodeString* setting) {
65   v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
66   Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
67   Handle<Object> object =
68       JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
69   if (object->IsString()) {
70     v8::String::Utf8Value utf8_string(
71         v8_isolate, v8::Utils::ToLocal(Handle<String>::cast(object)));
72     *setting = icu::UnicodeString::fromUTF8(*utf8_string);
73     return true;
74   }
75   return false;
76 }
77 
ExtractIntegerSetting(Isolate * isolate,Handle<JSObject> options,const char * key,int32_t * value)78 bool ExtractIntegerSetting(Isolate* isolate, Handle<JSObject> options,
79                            const char* key, int32_t* value) {
80   Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
81   Handle<Object> object =
82       JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
83   if (object->IsNumber()) {
84     return object->ToInt32(value);
85   }
86   return false;
87 }
88 
ExtractBooleanSetting(Isolate * isolate,Handle<JSObject> options,const char * key,bool * value)89 bool ExtractBooleanSetting(Isolate* isolate, Handle<JSObject> options,
90                            const char* key, bool* value) {
91   Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
92   Handle<Object> object =
93       JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
94   if (object->IsBoolean()) {
95     *value = object->BooleanValue(isolate);
96     return true;
97   }
98   return false;
99 }
100 
CreateICUDateFormat(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)101 icu::SimpleDateFormat* CreateICUDateFormat(Isolate* isolate,
102                                            const icu::Locale& icu_locale,
103                                            Handle<JSObject> options) {
104   // Create time zone as specified by the user. We have to re-create time zone
105   // since calendar takes ownership.
106   icu::TimeZone* tz = nullptr;
107   icu::UnicodeString timezone;
108   if (ExtractStringSetting(isolate, options, "timeZone", &timezone)) {
109     tz = icu::TimeZone::createTimeZone(timezone);
110   } else {
111     tz = icu::TimeZone::createDefault();
112   }
113 
114   // Create a calendar using locale, and apply time zone to it.
115   UErrorCode status = U_ZERO_ERROR;
116   icu::Calendar* calendar =
117       icu::Calendar::createInstance(tz, icu_locale, status);
118 
119   if (calendar->getDynamicClassID() ==
120       icu::GregorianCalendar::getStaticClassID()) {
121     icu::GregorianCalendar* gc = (icu::GregorianCalendar*)calendar;
122     UErrorCode status = U_ZERO_ERROR;
123     // The beginning of ECMAScript time, namely -(2**53)
124     const double start_of_time = -9007199254740992;
125     gc->setGregorianChange(start_of_time, status);
126     DCHECK(U_SUCCESS(status));
127   }
128 
129   // Make formatter from skeleton. Calendar and numbering system are added
130   // to the locale as Unicode extension (if they were specified at all).
131   icu::SimpleDateFormat* date_format = nullptr;
132   icu::UnicodeString skeleton;
133   if (ExtractStringSetting(isolate, options, "skeleton", &skeleton)) {
134     // See https://github.com/tc39/ecma402/issues/225 . The best pattern
135     // generation needs to be done in the base locale according to the
136     // current spec however odd it may be. See also crbug.com/826549 .
137     // This is a temporary work-around to get v8's external behavior to match
138     // the current spec, but does not follow the spec provisions mentioned
139     // in the above Ecma 402 issue.
140     // TODO(jshin): The spec may need to be revised because using the base
141     // locale for the pattern match is not quite right. Moreover, what to
142     // do with 'related year' part when 'chinese/dangi' calendar is specified
143     // has to be discussed. Revisit once the spec is clarified/revised.
144     icu::Locale no_extension_locale(icu_locale.getBaseName());
145     std::unique_ptr<icu::DateTimePatternGenerator> generator(
146         icu::DateTimePatternGenerator::createInstance(no_extension_locale,
147                                                       status));
148     icu::UnicodeString pattern;
149     if (U_SUCCESS(status))
150       pattern = generator->getBestPattern(skeleton, status);
151 
152     date_format = new icu::SimpleDateFormat(pattern, icu_locale, status);
153     if (U_SUCCESS(status)) {
154       date_format->adoptCalendar(calendar);
155     }
156   }
157 
158   if (U_FAILURE(status)) {
159     delete calendar;
160     delete date_format;
161     date_format = nullptr;
162   }
163 
164   return date_format;
165 }
166 
SetResolvedDateSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::SimpleDateFormat * date_format,Handle<JSObject> resolved)167 void SetResolvedDateSettings(Isolate* isolate, const icu::Locale& icu_locale,
168                              icu::SimpleDateFormat* date_format,
169                              Handle<JSObject> resolved) {
170   Factory* factory = isolate->factory();
171   UErrorCode status = U_ZERO_ERROR;
172   icu::UnicodeString pattern;
173   date_format->toPattern(pattern);
174   JSObject::SetProperty(
175       isolate, resolved, factory->intl_pattern_symbol(),
176       factory
177           ->NewStringFromTwoByte(Vector<const uint16_t>(
178               reinterpret_cast<const uint16_t*>(pattern.getBuffer()),
179               pattern.length()))
180           .ToHandleChecked(),
181       LanguageMode::kSloppy)
182       .Assert();
183 
184   // Set time zone and calendar.
185   const icu::Calendar* calendar = date_format->getCalendar();
186   // getType() returns legacy calendar type name instead of LDML/BCP47 calendar
187   // key values. intl.js maps them to BCP47 values for key "ca".
188   // TODO(jshin): Consider doing it here, instead.
189   const char* calendar_name = calendar->getType();
190   JSObject::SetProperty(
191       isolate, resolved, factory->NewStringFromStaticChars("calendar"),
192       factory->NewStringFromAsciiChecked(calendar_name), LanguageMode::kSloppy)
193       .Assert();
194 
195   const icu::TimeZone& tz = calendar->getTimeZone();
196   icu::UnicodeString time_zone;
197   tz.getID(time_zone);
198 
199   icu::UnicodeString canonical_time_zone;
200   icu::TimeZone::getCanonicalID(time_zone, canonical_time_zone, status);
201   if (U_SUCCESS(status)) {
202     // In CLDR (http://unicode.org/cldr/trac/ticket/9943), Etc/UTC is made
203     // a separate timezone ID from Etc/GMT even though they're still the same
204     // timezone. We have Etc/UTC because 'UTC', 'Etc/Universal',
205     // 'Etc/Zulu' and others are turned to 'Etc/UTC' by ICU. Etc/GMT comes
206     // from Etc/GMT0, Etc/GMT+0, Etc/GMT-0, Etc/Greenwich.
207     // ecma402##sec-canonicalizetimezonename step 3
208     if (canonical_time_zone == UNICODE_STRING_SIMPLE("Etc/UTC") ||
209         canonical_time_zone == UNICODE_STRING_SIMPLE("Etc/GMT")) {
210       JSObject::SetProperty(
211           isolate, resolved, factory->NewStringFromStaticChars("timeZone"),
212           factory->NewStringFromStaticChars("UTC"), LanguageMode::kSloppy)
213           .Assert();
214     } else {
215       JSObject::SetProperty(isolate, resolved,
216                             factory->NewStringFromStaticChars("timeZone"),
217                             factory
218                                 ->NewStringFromTwoByte(Vector<const uint16_t>(
219                                     reinterpret_cast<const uint16_t*>(
220                                         canonical_time_zone.getBuffer()),
221                                     canonical_time_zone.length()))
222                                 .ToHandleChecked(),
223                             LanguageMode::kSloppy)
224           .Assert();
225     }
226   }
227 
228   // Ugly hack. ICU doesn't expose numbering system in any way, so we have
229   // to assume that for given locale NumberingSystem constructor produces the
230   // same digits as NumberFormat/Calendar would.
231   status = U_ZERO_ERROR;
232   icu::NumberingSystem* numbering_system =
233       icu::NumberingSystem::createInstance(icu_locale, status);
234   if (U_SUCCESS(status)) {
235     const char* ns = numbering_system->getName();
236     JSObject::SetProperty(
237         isolate, resolved, factory->NewStringFromStaticChars("numberingSystem"),
238         factory->NewStringFromAsciiChecked(ns), LanguageMode::kSloppy)
239         .Assert();
240   } else {
241     JSObject::SetProperty(isolate, resolved,
242                           factory->NewStringFromStaticChars("numberingSystem"),
243                           factory->undefined_value(), LanguageMode::kSloppy)
244         .Assert();
245   }
246   delete numbering_system;
247 
248   // Set the locale
249   char result[ULOC_FULLNAME_CAPACITY];
250   status = U_ZERO_ERROR;
251   uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
252                      FALSE, &status);
253   if (U_SUCCESS(status)) {
254     JSObject::SetProperty(
255         isolate, resolved, factory->NewStringFromStaticChars("locale"),
256         factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
257         .Assert();
258   } else {
259     // This would never happen, since we got the locale from ICU.
260     JSObject::SetProperty(
261         isolate, resolved, factory->NewStringFromStaticChars("locale"),
262         factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
263         .Assert();
264   }
265 }
266 
SetNumericSettings(Isolate * isolate,icu::DecimalFormat * number_format,Handle<JSObject> options)267 void SetNumericSettings(Isolate* isolate, icu::DecimalFormat* number_format,
268                         Handle<JSObject> options) {
269   int32_t digits;
270   if (ExtractIntegerSetting(isolate, options, "minimumIntegerDigits",
271                             &digits)) {
272     number_format->setMinimumIntegerDigits(digits);
273   }
274 
275   if (ExtractIntegerSetting(isolate, options, "minimumFractionDigits",
276                             &digits)) {
277     number_format->setMinimumFractionDigits(digits);
278   }
279 
280   if (ExtractIntegerSetting(isolate, options, "maximumFractionDigits",
281                             &digits)) {
282     number_format->setMaximumFractionDigits(digits);
283   }
284 
285   bool significant_digits_used = false;
286   if (ExtractIntegerSetting(isolate, options, "minimumSignificantDigits",
287                             &digits)) {
288     number_format->setMinimumSignificantDigits(digits);
289     significant_digits_used = true;
290   }
291 
292   if (ExtractIntegerSetting(isolate, options, "maximumSignificantDigits",
293                             &digits)) {
294     number_format->setMaximumSignificantDigits(digits);
295     significant_digits_used = true;
296   }
297 
298   number_format->setSignificantDigitsUsed(significant_digits_used);
299 
300   number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
301 }
302 
CreateICUNumberFormat(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)303 icu::DecimalFormat* CreateICUNumberFormat(Isolate* isolate,
304                                           const icu::Locale& icu_locale,
305                                           Handle<JSObject> options) {
306   // Make formatter from options. Numbering system is added
307   // to the locale as Unicode extension (if it was specified at all).
308   UErrorCode status = U_ZERO_ERROR;
309   icu::DecimalFormat* number_format = nullptr;
310   icu::UnicodeString style;
311   icu::UnicodeString currency;
312   if (ExtractStringSetting(isolate, options, "style", &style)) {
313     if (style == UNICODE_STRING_SIMPLE("currency")) {
314       icu::UnicodeString display;
315       ExtractStringSetting(isolate, options, "currency", &currency);
316       ExtractStringSetting(isolate, options, "currencyDisplay", &display);
317 
318 #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 6)
319       icu::NumberFormat::EStyles format_style;
320       if (display == UNICODE_STRING_SIMPLE("code")) {
321         format_style = icu::NumberFormat::kIsoCurrencyStyle;
322       } else if (display == UNICODE_STRING_SIMPLE("name")) {
323         format_style = icu::NumberFormat::kPluralCurrencyStyle;
324       } else {
325         format_style = icu::NumberFormat::kCurrencyStyle;
326       }
327 #else  // ICU version is 4.8 or above (we ignore versions below 4.0).
328       UNumberFormatStyle format_style;
329       if (display == UNICODE_STRING_SIMPLE("code")) {
330         format_style = UNUM_CURRENCY_ISO;
331       } else if (display == UNICODE_STRING_SIMPLE("name")) {
332         format_style = UNUM_CURRENCY_PLURAL;
333       } else {
334         format_style = UNUM_CURRENCY;
335       }
336 #endif
337 
338       number_format = static_cast<icu::DecimalFormat*>(
339           icu::NumberFormat::createInstance(icu_locale, format_style, status));
340 
341       if (U_FAILURE(status)) {
342         delete number_format;
343         return nullptr;
344       }
345     } else if (style == UNICODE_STRING_SIMPLE("percent")) {
346       number_format = static_cast<icu::DecimalFormat*>(
347           icu::NumberFormat::createPercentInstance(icu_locale, status));
348       if (U_FAILURE(status)) {
349         delete number_format;
350         return nullptr;
351       }
352       // Make sure 1.1% doesn't go into 2%.
353       number_format->setMinimumFractionDigits(1);
354     } else {
355       // Make a decimal instance by default.
356       number_format = static_cast<icu::DecimalFormat*>(
357           icu::NumberFormat::createInstance(icu_locale, status));
358     }
359   }
360 
361   if (U_FAILURE(status)) {
362     delete number_format;
363     return nullptr;
364   }
365 
366   // Set all options.
367   if (!currency.isEmpty()) {
368     number_format->setCurrency(currency.getBuffer(), status);
369   }
370 
371   SetNumericSettings(isolate, number_format, options);
372 
373   bool grouping;
374   if (ExtractBooleanSetting(isolate, options, "useGrouping", &grouping)) {
375     number_format->setGroupingUsed(grouping);
376   }
377 
378   return number_format;
379 }
380 
SetResolvedNumericSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::DecimalFormat * number_format,Handle<JSObject> resolved)381 void SetResolvedNumericSettings(Isolate* isolate, const icu::Locale& icu_locale,
382                                 icu::DecimalFormat* number_format,
383                                 Handle<JSObject> resolved) {
384   Factory* factory = isolate->factory();
385 
386   JSObject::SetProperty(
387       isolate, resolved,
388       factory->NewStringFromStaticChars("minimumIntegerDigits"),
389       factory->NewNumberFromInt(number_format->getMinimumIntegerDigits()),
390       LanguageMode::kSloppy)
391       .Assert();
392 
393   JSObject::SetProperty(
394       isolate, resolved,
395       factory->NewStringFromStaticChars("minimumFractionDigits"),
396       factory->NewNumberFromInt(number_format->getMinimumFractionDigits()),
397       LanguageMode::kSloppy)
398       .Assert();
399 
400   JSObject::SetProperty(
401       isolate, resolved,
402       factory->NewStringFromStaticChars("maximumFractionDigits"),
403       factory->NewNumberFromInt(number_format->getMaximumFractionDigits()),
404       LanguageMode::kSloppy)
405       .Assert();
406 
407   Handle<String> key =
408       factory->NewStringFromStaticChars("minimumSignificantDigits");
409   Maybe<bool> maybe = JSReceiver::HasOwnProperty(resolved, key);
410   CHECK(maybe.IsJust());
411   if (maybe.FromJust()) {
412     JSObject::SetProperty(
413         isolate, resolved,
414         factory->NewStringFromStaticChars("minimumSignificantDigits"),
415         factory->NewNumberFromInt(number_format->getMinimumSignificantDigits()),
416         LanguageMode::kSloppy)
417         .Assert();
418   }
419 
420   key = factory->NewStringFromStaticChars("maximumSignificantDigits");
421   maybe = JSReceiver::HasOwnProperty(resolved, key);
422   CHECK(maybe.IsJust());
423   if (maybe.FromJust()) {
424     JSObject::SetProperty(
425         isolate, resolved,
426         factory->NewStringFromStaticChars("maximumSignificantDigits"),
427         factory->NewNumberFromInt(number_format->getMaximumSignificantDigits()),
428         LanguageMode::kSloppy)
429         .Assert();
430   }
431 
432   // Set the locale
433   char result[ULOC_FULLNAME_CAPACITY];
434   UErrorCode status = U_ZERO_ERROR;
435   uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
436                      FALSE, &status);
437   if (U_SUCCESS(status)) {
438     JSObject::SetProperty(
439         isolate, resolved, factory->NewStringFromStaticChars("locale"),
440         factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
441         .Assert();
442   } else {
443     // This would never happen, since we got the locale from ICU.
444     JSObject::SetProperty(
445         isolate, resolved, factory->NewStringFromStaticChars("locale"),
446         factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
447         .Assert();
448   }
449 }
450 
SetResolvedNumberSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::DecimalFormat * number_format,Handle<JSObject> resolved)451 void SetResolvedNumberSettings(Isolate* isolate, const icu::Locale& icu_locale,
452                                icu::DecimalFormat* number_format,
453                                Handle<JSObject> resolved) {
454   Factory* factory = isolate->factory();
455 
456   // Set resolved currency code in options.currency if not empty.
457   icu::UnicodeString currency(number_format->getCurrency());
458   if (!currency.isEmpty()) {
459     JSObject::SetProperty(
460         isolate, resolved, factory->NewStringFromStaticChars("currency"),
461         factory
462             ->NewStringFromTwoByte(Vector<const uint16_t>(
463                 reinterpret_cast<const uint16_t*>(currency.getBuffer()),
464                 currency.length()))
465             .ToHandleChecked(),
466         LanguageMode::kSloppy)
467         .Assert();
468   }
469 
470   // Ugly hack. ICU doesn't expose numbering system in any way, so we have
471   // to assume that for given locale NumberingSystem constructor produces the
472   // same digits as NumberFormat/Calendar would.
473   UErrorCode status = U_ZERO_ERROR;
474   icu::NumberingSystem* numbering_system =
475       icu::NumberingSystem::createInstance(icu_locale, status);
476   if (U_SUCCESS(status)) {
477     const char* ns = numbering_system->getName();
478     JSObject::SetProperty(
479         isolate, resolved, factory->NewStringFromStaticChars("numberingSystem"),
480         factory->NewStringFromAsciiChecked(ns), LanguageMode::kSloppy)
481         .Assert();
482   } else {
483     JSObject::SetProperty(isolate, resolved,
484                           factory->NewStringFromStaticChars("numberingSystem"),
485                           factory->undefined_value(), LanguageMode::kSloppy)
486         .Assert();
487   }
488   delete numbering_system;
489 
490   JSObject::SetProperty(isolate, resolved,
491                         factory->NewStringFromStaticChars("useGrouping"),
492                         factory->ToBoolean(number_format->isGroupingUsed()),
493                         LanguageMode::kSloppy)
494       .Assert();
495 
496   SetResolvedNumericSettings(isolate, icu_locale, number_format, resolved);
497 }
498 
CreateICUBreakIterator(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)499 icu::BreakIterator* CreateICUBreakIterator(Isolate* isolate,
500                                            const icu::Locale& icu_locale,
501                                            Handle<JSObject> options) {
502   UErrorCode status = U_ZERO_ERROR;
503   icu::BreakIterator* break_iterator = nullptr;
504   icu::UnicodeString type;
505   if (!ExtractStringSetting(isolate, options, "type", &type)) return nullptr;
506 
507   if (type == UNICODE_STRING_SIMPLE("character")) {
508     break_iterator =
509         icu::BreakIterator::createCharacterInstance(icu_locale, status);
510   } else if (type == UNICODE_STRING_SIMPLE("sentence")) {
511     break_iterator =
512         icu::BreakIterator::createSentenceInstance(icu_locale, status);
513   } else if (type == UNICODE_STRING_SIMPLE("line")) {
514     break_iterator = icu::BreakIterator::createLineInstance(icu_locale, status);
515   } else {
516     // Defualt is word iterator.
517     break_iterator = icu::BreakIterator::createWordInstance(icu_locale, status);
518   }
519 
520   if (U_FAILURE(status)) {
521     delete break_iterator;
522     return nullptr;
523   }
524 
525   isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);
526 
527   return break_iterator;
528 }
529 
SetResolvedBreakIteratorSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::BreakIterator * break_iterator,Handle<JSObject> resolved)530 void SetResolvedBreakIteratorSettings(Isolate* isolate,
531                                       const icu::Locale& icu_locale,
532                                       icu::BreakIterator* break_iterator,
533                                       Handle<JSObject> resolved) {
534   Factory* factory = isolate->factory();
535   UErrorCode status = U_ZERO_ERROR;
536 
537   // Set the locale
538   char result[ULOC_FULLNAME_CAPACITY];
539   status = U_ZERO_ERROR;
540   uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
541                      FALSE, &status);
542   if (U_SUCCESS(status)) {
543     JSObject::SetProperty(
544         isolate, resolved, factory->NewStringFromStaticChars("locale"),
545         factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
546         .Assert();
547   } else {
548     // This would never happen, since we got the locale from ICU.
549     JSObject::SetProperty(
550         isolate, resolved, factory->NewStringFromStaticChars("locale"),
551         factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
552         .Assert();
553   }
554 }
555 
CachedOrNewService(Isolate * isolate,Handle<String> service,Handle<Object> locales,Handle<Object> options,Handle<Object> internal_options)556 MaybeHandle<JSObject> CachedOrNewService(Isolate* isolate,
557                                          Handle<String> service,
558                                          Handle<Object> locales,
559                                          Handle<Object> options,
560                                          Handle<Object> internal_options) {
561   Handle<Object> result;
562   Handle<Object> undefined_value(ReadOnlyRoots(isolate).undefined_value(),
563                                  isolate);
564   Handle<Object> args[] = {service, locales, options, internal_options};
565   ASSIGN_RETURN_ON_EXCEPTION(
566       isolate, result,
567       Execution::Call(isolate, isolate->cached_or_new_service(),
568                       undefined_value, arraysize(args), args),
569       JSArray);
570   return Handle<JSObject>::cast(result);
571 }
572 }  // namespace
573 
CreateICULocale(Isolate * isolate,Handle<String> bcp47_locale_str)574 icu::Locale Intl::CreateICULocale(Isolate* isolate,
575                                   Handle<String> bcp47_locale_str) {
576   v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
577   v8::String::Utf8Value bcp47_locale(v8_isolate,
578                                      v8::Utils::ToLocal(bcp47_locale_str));
579   CHECK_NOT_NULL(*bcp47_locale);
580 
581   DisallowHeapAllocation no_gc;
582 
583   // Convert BCP47 into ICU locale format.
584   UErrorCode status = U_ZERO_ERROR;
585   char icu_result[ULOC_FULLNAME_CAPACITY];
586   int icu_length = 0;
587 
588   // bcp47_locale_str should be a canonicalized language tag, which
589   // means this shouldn't fail.
590   uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY,
591                       &icu_length, &status);
592   CHECK(U_SUCCESS(status));
593   CHECK_LT(0, icu_length);
594 
595   icu::Locale icu_locale(icu_result);
596   if (icu_locale.isBogus()) {
597     FATAL("Failed to create ICU locale, are ICU data files missing?");
598   }
599 
600   return icu_locale;
601 }
602 
603 // static
InitializeDateTimeFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)604 icu::SimpleDateFormat* DateFormat::InitializeDateTimeFormat(
605     Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
606     Handle<JSObject> resolved) {
607   icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
608   DCHECK(!icu_locale.isBogus());
609 
610   icu::SimpleDateFormat* date_format =
611       CreateICUDateFormat(isolate, icu_locale, options);
612   if (!date_format) {
613     // Remove extensions and try again.
614     icu::Locale no_extension_locale(icu_locale.getBaseName());
615     date_format = CreateICUDateFormat(isolate, no_extension_locale, options);
616 
617     if (!date_format) {
618       FATAL("Failed to create ICU date format, are ICU data files missing?");
619     }
620 
621     // Set resolved settings (pattern, numbering system, calendar).
622     SetResolvedDateSettings(isolate, no_extension_locale, date_format,
623                             resolved);
624   } else {
625     SetResolvedDateSettings(isolate, icu_locale, date_format, resolved);
626   }
627 
628   CHECK_NOT_NULL(date_format);
629   return date_format;
630 }
631 
UnpackDateFormat(Handle<JSObject> obj)632 icu::SimpleDateFormat* DateFormat::UnpackDateFormat(Handle<JSObject> obj) {
633   return reinterpret_cast<icu::SimpleDateFormat*>(
634       obj->GetEmbedderField(DateFormat::kSimpleDateFormatIndex));
635 }
636 
DeleteDateFormat(const v8::WeakCallbackInfo<void> & data)637 void DateFormat::DeleteDateFormat(const v8::WeakCallbackInfo<void>& data) {
638   delete reinterpret_cast<icu::SimpleDateFormat*>(data.GetInternalField(0));
639   GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
640 }
641 
Unwrap(Isolate * isolate,Handle<JSReceiver> receiver,const char * method_name)642 MaybeHandle<JSObject> DateFormat::Unwrap(Isolate* isolate,
643                                          Handle<JSReceiver> receiver,
644                                          const char* method_name) {
645   Handle<Context> native_context =
646       Handle<Context>(isolate->context()->native_context(), isolate);
647   Handle<JSFunction> constructor = Handle<JSFunction>(
648       JSFunction::cast(native_context->intl_date_time_format_function()),
649       isolate);
650   Handle<String> method_name_str =
651       isolate->factory()->NewStringFromAsciiChecked(method_name);
652 
653   return Intl::UnwrapReceiver(isolate, receiver, constructor,
654                               Intl::Type::kDateTimeFormat, method_name_str,
655                               true);
656 }
657 
658 // ecma402/#sec-formatdatetime
659 // FormatDateTime( dateTimeFormat, x )
FormatDateTime(Isolate * isolate,Handle<JSObject> date_time_format_holder,double x)660 MaybeHandle<String> DateFormat::FormatDateTime(
661     Isolate* isolate, Handle<JSObject> date_time_format_holder, double x) {
662   double date_value = DateCache::TimeClip(x);
663   if (std::isnan(date_value)) {
664     THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kInvalidTimeValue),
665                     String);
666   }
667 
668   CHECK(Intl::IsObjectOfType(isolate, date_time_format_holder,
669                              Intl::Type::kDateTimeFormat));
670   icu::SimpleDateFormat* date_format =
671       DateFormat::UnpackDateFormat(date_time_format_holder);
672   CHECK_NOT_NULL(date_format);
673 
674   icu::UnicodeString result;
675   date_format->format(date_value, result);
676 
677   return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
678       reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length()));
679 }
680 
681 // ecma402/#sec-datetime-format-functions
682 // DateTime Format Functions
DateTimeFormat(Isolate * isolate,Handle<JSObject> date_time_format_holder,Handle<Object> date)683 MaybeHandle<String> DateFormat::DateTimeFormat(
684     Isolate* isolate, Handle<JSObject> date_time_format_holder,
685     Handle<Object> date) {
686   // 2. Assert: Type(dtf) is Object and dtf has an [[InitializedDateTimeFormat]]
687   // internal slot.
688   DCHECK(Intl::IsObjectOfType(isolate, date_time_format_holder,
689                               Intl::Type::kDateTimeFormat));
690 
691   // 3. If date is not provided or is undefined, then
692   double x;
693   if (date->IsUndefined()) {
694     // 3.a Let x be Call(%Date_now%, undefined).
695     x = JSDate::CurrentTimeValue(isolate);
696   } else {
697     // 4. Else,
698     //    a. Let x be ? ToNumber(date).
699     ASSIGN_RETURN_ON_EXCEPTION(isolate, date, Object::ToNumber(isolate, date),
700                                String);
701     CHECK(date->IsNumber());
702     x = date->Number();
703   }
704   // 5. Return FormatDateTime(dtf, x).
705   return DateFormat::FormatDateTime(isolate, date_time_format_holder, x);
706 }
707 
ToLocaleDateTime(Isolate * isolate,Handle<Object> date,Handle<Object> locales,Handle<Object> options,const char * required,const char * defaults,const char * service)708 MaybeHandle<String> DateFormat::ToLocaleDateTime(
709     Isolate* isolate, Handle<Object> date, Handle<Object> locales,
710     Handle<Object> options, const char* required, const char* defaults,
711     const char* service) {
712   Factory* factory = isolate->factory();
713   // 1. Let x be ? thisTimeValue(this value);
714   if (!date->IsJSDate()) {
715     THROW_NEW_ERROR(isolate,
716                     NewTypeError(MessageTemplate::kMethodInvokedOnWrongType,
717                                  factory->NewStringFromStaticChars("Date")),
718                     String);
719   }
720 
721   double const x = Handle<JSDate>::cast(date)->value()->Number();
722   // 2. If x is NaN, return "Invalid Date"
723   if (std::isnan(x)) {
724     return factory->NewStringFromStaticChars("Invalid Date");
725   }
726 
727   // 3. Let options be ? ToDateTimeOptions(options, required, defaults).
728   Handle<JSObject> internal_options;
729   ASSIGN_RETURN_ON_EXCEPTION(
730       isolate, internal_options,
731       DateFormat::ToDateTimeOptions(isolate, options, required, defaults),
732       String);
733 
734   // 4. Let dateFormat be ? Construct(%DateTimeFormat%, « locales, options »).
735   Handle<JSObject> date_format;
736   ASSIGN_RETURN_ON_EXCEPTION(
737       isolate, date_format,
738       CachedOrNewService(isolate, factory->NewStringFromAsciiChecked(service),
739                          locales, options, internal_options),
740       String);
741 
742   // 5. Return FormatDateTime(dateFormat, x).
743   return DateFormat::FormatDateTime(isolate, date_format, x);
744 }
745 
InitializeNumberFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)746 icu::DecimalFormat* NumberFormat::InitializeNumberFormat(
747     Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
748     Handle<JSObject> resolved) {
749   icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
750   DCHECK(!icu_locale.isBogus());
751 
752   icu::DecimalFormat* number_format =
753       CreateICUNumberFormat(isolate, icu_locale, options);
754   if (!number_format) {
755     // Remove extensions and try again.
756     icu::Locale no_extension_locale(icu_locale.getBaseName());
757     number_format =
758         CreateICUNumberFormat(isolate, no_extension_locale, options);
759 
760     if (!number_format) {
761       FATAL("Failed to create ICU number format, are ICU data files missing?");
762     }
763 
764     // Set resolved settings (pattern, numbering system).
765     SetResolvedNumberSettings(isolate, no_extension_locale, number_format,
766                               resolved);
767   } else {
768     SetResolvedNumberSettings(isolate, icu_locale, number_format, resolved);
769   }
770 
771   CHECK_NOT_NULL(number_format);
772   return number_format;
773 }
774 
UnpackNumberFormat(Handle<JSObject> obj)775 icu::DecimalFormat* NumberFormat::UnpackNumberFormat(Handle<JSObject> obj) {
776   return reinterpret_cast<icu::DecimalFormat*>(
777       obj->GetEmbedderField(NumberFormat::kDecimalFormatIndex));
778 }
779 
DeleteNumberFormat(const v8::WeakCallbackInfo<void> & data)780 void NumberFormat::DeleteNumberFormat(const v8::WeakCallbackInfo<void>& data) {
781   delete reinterpret_cast<icu::DecimalFormat*>(data.GetInternalField(0));
782   GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
783 }
784 
InitializeBreakIterator(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)785 icu::BreakIterator* V8BreakIterator::InitializeBreakIterator(
786     Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
787     Handle<JSObject> resolved) {
788   icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
789   DCHECK(!icu_locale.isBogus());
790 
791   icu::BreakIterator* break_iterator =
792       CreateICUBreakIterator(isolate, icu_locale, options);
793   if (!break_iterator) {
794     // Remove extensions and try again.
795     icu::Locale no_extension_locale(icu_locale.getBaseName());
796     break_iterator =
797         CreateICUBreakIterator(isolate, no_extension_locale, options);
798 
799     if (!break_iterator) {
800       FATAL("Failed to create ICU break iterator, are ICU data files missing?");
801     }
802 
803     // Set resolved settings (locale).
804     SetResolvedBreakIteratorSettings(isolate, no_extension_locale,
805                                      break_iterator, resolved);
806   } else {
807     SetResolvedBreakIteratorSettings(isolate, icu_locale, break_iterator,
808                                      resolved);
809   }
810 
811   CHECK_NOT_NULL(break_iterator);
812   return break_iterator;
813 }
814 
UnpackBreakIterator(Handle<JSObject> obj)815 icu::BreakIterator* V8BreakIterator::UnpackBreakIterator(Handle<JSObject> obj) {
816   return reinterpret_cast<icu::BreakIterator*>(
817       obj->GetEmbedderField(V8BreakIterator::kBreakIteratorIndex));
818 }
819 
DeleteBreakIterator(const v8::WeakCallbackInfo<void> & data)820 void V8BreakIterator::DeleteBreakIterator(
821     const v8::WeakCallbackInfo<void>& data) {
822   delete reinterpret_cast<icu::BreakIterator*>(data.GetInternalField(0));
823   delete reinterpret_cast<icu::UnicodeString*>(data.GetInternalField(1));
824   GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
825 }
826 
AdoptText(Isolate * isolate,Handle<JSObject> break_iterator_holder,Handle<String> text)827 void V8BreakIterator::AdoptText(Isolate* isolate,
828                                 Handle<JSObject> break_iterator_holder,
829                                 Handle<String> text) {
830   icu::BreakIterator* break_iterator =
831       V8BreakIterator::UnpackBreakIterator(break_iterator_holder);
832   CHECK_NOT_NULL(break_iterator);
833 
834   icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
835       break_iterator_holder->GetEmbedderField(
836           V8BreakIterator::kUnicodeStringIndex));
837   delete u_text;
838 
839   int length = text->length();
840   text = String::Flatten(isolate, text);
841   DisallowHeapAllocation no_gc;
842   String::FlatContent flat = text->GetFlatContent();
843   std::unique_ptr<uc16[]> sap;
844   const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
845   u_text = new icu::UnicodeString(text_value, length);
846   break_iterator_holder->SetEmbedderField(V8BreakIterator::kUnicodeStringIndex,
847                                           reinterpret_cast<Smi*>(u_text));
848 
849   break_iterator->setText(*u_text);
850 }
851 
ToString(Isolate * isolate,const icu::UnicodeString & string)852 MaybeHandle<String> Intl::ToString(Isolate* isolate,
853                                    const icu::UnicodeString& string) {
854   return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
855       reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
856 }
857 
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)858 MaybeHandle<String> Intl::ToString(Isolate* isolate,
859                                    const icu::UnicodeString& string,
860                                    int32_t begin, int32_t end) {
861   return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
862 }
863 
864 namespace {
865 
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)866 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
867                                  int index, Handle<String> field_type_string,
868                                  Handle<String> value) {
869   // let element = $array[$index] = {
870   //   type: $field_type_string,
871   //   value: $value
872   // }
873   // return element;
874   Factory* factory = isolate->factory();
875   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
876   JSObject::AddProperty(isolate, element, factory->type_string(),
877                         field_type_string, NONE);
878 
879   JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
880   JSObject::AddDataElement(array, index, element, NONE);
881   return element;
882 }
883 
884 }  // namespace
885 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)886 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
887                       Handle<String> field_type_string, Handle<String> value) {
888   // Same as $array[$index] = {type: $field_type_string, value: $value};
889   InnerAddElement(isolate, array, index, field_type_string, value);
890 }
891 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)892 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
893                       Handle<String> field_type_string, Handle<String> value,
894                       Handle<String> additional_property_name,
895                       Handle<String> additional_property_value) {
896   // Same as $array[$index] = {
897   //   type: $field_type_string, value: $value,
898   //   $additional_property_name: $additional_property_value
899   // }
900   Handle<JSObject> element =
901       InnerAddElement(isolate, array, index, field_type_string, value);
902   JSObject::AddProperty(isolate, element, additional_property_name,
903                         additional_property_value, NONE);
904 }
905 // Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)906 bool Intl::RemoveLocaleScriptTag(const std::string& icu_locale,
907                                  std::string* locale_less_script) {
908   icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
909   const char* icu_script = new_locale.getScript();
910   if (icu_script == NULL || strlen(icu_script) == 0) {
911     *locale_less_script = std::string();
912     return false;
913   }
914 
915   const char* icu_language = new_locale.getLanguage();
916   const char* icu_country = new_locale.getCountry();
917   icu::Locale short_locale = icu::Locale(icu_language, icu_country);
918   const char* icu_name = short_locale.getName();
919   *locale_less_script = std::string(icu_name);
920   return true;
921 }
922 
923 namespace {
924 
IsPropertyUndefined(Isolate * isolate,Handle<JSObject> options,const char * property)925 Maybe<bool> IsPropertyUndefined(Isolate* isolate, Handle<JSObject> options,
926                                 const char* property) {
927   Factory* factory = isolate->factory();
928   // i. Let prop be the property name.
929   // ii. Let value be ? Get(options, prop).
930   Handle<Object> value;
931   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
932       isolate, value,
933       Object::GetPropertyOrElement(
934           isolate, options, factory->NewStringFromAsciiChecked(property)),
935       Nothing<bool>());
936   return Just(value->IsUndefined(isolate));
937 }
938 
939 }  // namespace
940 
941 // ecma-402/#sec-todatetimeoptions
ToDateTimeOptions(Isolate * isolate,Handle<Object> input_options,const char * required,const char * defaults)942 MaybeHandle<JSObject> DateFormat::ToDateTimeOptions(
943     Isolate* isolate, Handle<Object> input_options, const char* required,
944     const char* defaults) {
945   Factory* factory = isolate->factory();
946   // 1. If options is undefined, let options be null; otherwise let options be ?
947   //    ToObject(options).
948   Handle<JSObject> options;
949   if (input_options->IsUndefined(isolate)) {
950     options = factory->NewJSObjectWithNullProto();
951   } else {
952     Handle<JSReceiver> options_obj;
953     ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
954                                Object::ToObject(isolate, input_options),
955                                JSObject);
956     // 2. Let options be ObjectCreate(options).
957     ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
958                                JSObject::ObjectCreate(isolate, options_obj),
959                                JSObject);
960   }
961 
962   // 3. Let needDefaults be true.
963   bool needs_default = true;
964 
965   bool required_is_any = strcmp(required, "any") == 0;
966   // 4. If required is "date" or "any", then
967   if (required_is_any || (strcmp(required, "date") == 0)) {
968     // a. For each of the property names "weekday", "year", "month", "day", do
969     for (auto& prop : {"weekday", "year", "month", "day"}) {
970       //  i. Let prop be the property name.
971       // ii. Let value be ? Get(options, prop)
972       Maybe<bool> maybe_undefined = IsPropertyUndefined(isolate, options, prop);
973       MAYBE_RETURN(maybe_undefined, Handle<JSObject>());
974       // iii. If value is not undefined, let needDefaults be false.
975       if (!maybe_undefined.FromJust()) {
976         needs_default = false;
977       }
978     }
979   }
980 
981   // 5. If required is "time" or "any", then
982   if (required_is_any || (strcmp(required, "time") == 0)) {
983     // a. For each of the property names "hour", "minute", "second", do
984     for (auto& prop : {"hour", "minute", "second"}) {
985       //  i. Let prop be the property name.
986       // ii. Let value be ? Get(options, prop)
987       Maybe<bool> maybe_undefined = IsPropertyUndefined(isolate, options, prop);
988       MAYBE_RETURN(maybe_undefined, Handle<JSObject>());
989       // iii. If value is not undefined, let needDefaults be false.
990       if (!maybe_undefined.FromJust()) {
991         needs_default = false;
992       }
993     }
994   }
995 
996   // 6. If needDefaults is true and defaults is either "date" or "all", then
997   if (needs_default) {
998     bool default_is_all = strcmp(defaults, "all") == 0;
999     if (default_is_all || (strcmp(defaults, "date") == 0)) {
1000       // a. For each of the property names "year", "month", "day", do
1001       // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric").
1002       for (auto& prop : {"year", "month", "day"}) {
1003         MAYBE_RETURN(
1004             JSReceiver::CreateDataProperty(
1005                 isolate, options, factory->NewStringFromAsciiChecked(prop),
1006                 factory->numeric_string(), kThrowOnError),
1007             Handle<JSObject>());
1008       }
1009     }
1010     // 7. If needDefaults is true and defaults is either "time" or "all", then
1011     if (default_is_all || (strcmp(defaults, "time") == 0)) {
1012       // a. For each of the property names "hour", "minute", "second", do
1013       // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric").
1014       for (auto& prop : {"hour", "minute", "second"}) {
1015         MAYBE_RETURN(
1016             JSReceiver::CreateDataProperty(
1017                 isolate, options, factory->NewStringFromAsciiChecked(prop),
1018                 factory->numeric_string(), kThrowOnError),
1019             Handle<JSObject>());
1020       }
1021     }
1022   }
1023   // 8. Return options.
1024   return options;
1025 }
1026 
GetAvailableLocales(const IcuService & service)1027 std::set<std::string> Intl::GetAvailableLocales(const IcuService& service) {
1028   const icu::Locale* icu_available_locales = nullptr;
1029   int32_t count = 0;
1030   std::set<std::string> locales;
1031 
1032   switch (service) {
1033     case IcuService::kBreakIterator:
1034       icu_available_locales = icu::BreakIterator::getAvailableLocales(count);
1035       break;
1036     case IcuService::kCollator:
1037       icu_available_locales = icu::Collator::getAvailableLocales(count);
1038       break;
1039     case IcuService::kDateFormat:
1040       icu_available_locales = icu::DateFormat::getAvailableLocales(count);
1041       break;
1042     case IcuService::kNumberFormat:
1043       icu_available_locales = icu::NumberFormat::getAvailableLocales(count);
1044       break;
1045     case IcuService::kPluralRules:
1046       // TODO(littledan): For PluralRules, filter out locales that
1047       // don't support PluralRules.
1048       // PluralRules is missing an appropriate getAvailableLocales method,
1049       // so we should filter from all locales, but it's not clear how; see
1050       // https://ssl.icu-project.org/trac/ticket/12756
1051       icu_available_locales = icu::Locale::getAvailableLocales(count);
1052       break;
1053     case IcuService::kResourceBundle: {
1054       UErrorCode status = U_ZERO_ERROR;
1055       UEnumeration* en = ures_openAvailableLocales(nullptr, &status);
1056       int32_t length = 0;
1057       const char* locale_str = uenum_next(en, &length, &status);
1058       while (U_SUCCESS(status) && (locale_str != nullptr)) {
1059         std::string locale(locale_str, length);
1060         std::replace(locale.begin(), locale.end(), '_', '-');
1061         locales.insert(locale);
1062         std::string shortened_locale;
1063         if (Intl::RemoveLocaleScriptTag(locale_str, &shortened_locale)) {
1064           std::replace(shortened_locale.begin(), shortened_locale.end(), '_',
1065                        '-');
1066           locales.insert(shortened_locale);
1067         }
1068         locale_str = uenum_next(en, &length, &status);
1069       }
1070       uenum_close(en);
1071       return locales;
1072     }
1073     case IcuService::kRelativeDateTimeFormatter: {
1074       // ICU RelativeDateTimeFormatter does not provide a getAvailableLocales()
1075       // interface, because RelativeDateTimeFormatter depends on
1076       // 1. NumberFormat and 2. ResourceBundle, return the
1077       // intersection of these two set.
1078       // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20009
1079       // TODO(ftang): change to call ICU's getAvailableLocales() after it is
1080       // added.
1081       std::set<std::string> number_format_set(
1082           Intl::GetAvailableLocales(IcuService::kNumberFormat));
1083       std::set<std::string> resource_bundle_set(
1084           Intl::GetAvailableLocales(IcuService::kResourceBundle));
1085       set_intersection(resource_bundle_set.begin(), resource_bundle_set.end(),
1086                        number_format_set.begin(), number_format_set.end(),
1087                        std::inserter(locales, locales.begin()));
1088       return locales;
1089     }
1090     case IcuService::kListFormatter: {
1091       // TODO(ftang): for now just use
1092       // icu::Locale::getAvailableLocales(count) until we migrate to
1093       // Intl::GetAvailableLocales().
1094       // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
1095       icu_available_locales = icu::Locale::getAvailableLocales(count);
1096       break;
1097     }
1098   }
1099 
1100   UErrorCode error = U_ZERO_ERROR;
1101   char result[ULOC_FULLNAME_CAPACITY];
1102 
1103   for (int32_t i = 0; i < count; ++i) {
1104     const char* icu_name = icu_available_locales[i].getName();
1105 
1106     error = U_ZERO_ERROR;
1107     // No need to force strict BCP47 rules.
1108     uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
1109     if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
1110       // This shouldn't happen, but lets not break the user.
1111       continue;
1112     }
1113     std::string locale(result);
1114     locales.insert(locale);
1115 
1116     std::string shortened_locale;
1117     if (Intl::RemoveLocaleScriptTag(icu_name, &shortened_locale)) {
1118       std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
1119       locales.insert(shortened_locale);
1120     }
1121   }
1122 
1123   return locales;
1124 }
1125 
StringToIcuService(Handle<String> service)1126 IcuService Intl::StringToIcuService(Handle<String> service) {
1127   if (service->IsUtf8EqualTo(CStrVector("collator"))) {
1128     return IcuService::kCollator;
1129   } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
1130     return IcuService::kNumberFormat;
1131   } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
1132     return IcuService::kDateFormat;
1133   } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
1134     return IcuService::kBreakIterator;
1135   } else if (service->IsUtf8EqualTo(CStrVector("pluralrules"))) {
1136     return IcuService::kPluralRules;
1137   } else if (service->IsUtf8EqualTo(CStrVector("relativetimeformat"))) {
1138     return IcuService::kRelativeDateTimeFormatter;
1139   } else if (service->IsUtf8EqualTo(CStrVector("listformat"))) {
1140     return IcuService::kListFormatter;
1141   }
1142   UNREACHABLE();
1143 }
1144 
AvailableLocalesOf(Isolate * isolate,Handle<String> service)1145 V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> Intl::AvailableLocalesOf(
1146     Isolate* isolate, Handle<String> service) {
1147   Factory* factory = isolate->factory();
1148   std::set<std::string> results =
1149       Intl::GetAvailableLocales(StringToIcuService(service));
1150   Handle<JSObject> locales = factory->NewJSObjectWithNullProto();
1151 
1152   int32_t i = 0;
1153   for (auto iter = results.begin(); iter != results.end(); ++iter) {
1154     RETURN_ON_EXCEPTION(
1155         isolate,
1156         JSObject::SetOwnPropertyIgnoreAttributes(
1157             locales, factory->NewStringFromAsciiChecked(iter->c_str()),
1158             factory->NewNumber(i++), NONE),
1159         JSObject);
1160   }
1161   return locales;
1162 }
1163 
DefaultLocale(Isolate * isolate)1164 std::string Intl::DefaultLocale(Isolate* isolate) {
1165   if (isolate->default_locale().empty()) {
1166     icu::Locale default_locale;
1167     // Translate ICU's fallback locale to a well-known locale.
1168     if (strcmp(default_locale.getName(), "en_US_POSIX") == 0) {
1169       isolate->set_default_locale("en-US");
1170     } else {
1171       // Set the locale
1172       char result[ULOC_FULLNAME_CAPACITY];
1173       UErrorCode status = U_ZERO_ERROR;
1174       int32_t length =
1175           uloc_toLanguageTag(default_locale.getName(), result,
1176                              ULOC_FULLNAME_CAPACITY, FALSE, &status);
1177       isolate->set_default_locale(
1178           U_SUCCESS(status) ? std::string(result, length) : "und");
1179     }
1180     DCHECK(!isolate->default_locale().empty());
1181   }
1182   return isolate->default_locale();
1183 }
1184 
IsObjectOfType(Isolate * isolate,Handle<Object> input,Intl::Type expected_type)1185 bool Intl::IsObjectOfType(Isolate* isolate, Handle<Object> input,
1186                           Intl::Type expected_type) {
1187   if (!input->IsJSObject()) return false;
1188   Handle<JSObject> obj = Handle<JSObject>::cast(input);
1189 
1190   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
1191   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
1192 
1193   if (!tag->IsSmi()) return false;
1194 
1195   Intl::Type type = Intl::TypeFromSmi(Smi::cast(*tag));
1196   return type == expected_type;
1197 }
1198 
1199 namespace {
1200 
1201 // In ECMA 402 v1, Intl constructors supported a mode of operation
1202 // where calling them with an existing object as a receiver would
1203 // transform the receiver into the relevant Intl instance with all
1204 // internal slots. In ECMA 402 v2, this capability was removed, to
1205 // avoid adding internal slots on existing objects. In ECMA 402 v3,
1206 // the capability was re-added as "normative optional" in a mode
1207 // which chains the underlying Intl instance on any object, when the
1208 // constructor is called
1209 //
1210 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,Intl::Type type)1211 MaybeHandle<Object> LegacyUnwrapReceiver(Isolate* isolate,
1212                                          Handle<JSReceiver> receiver,
1213                                          Handle<JSFunction> constructor,
1214                                          Intl::Type type) {
1215   bool has_initialized_slot = Intl::IsObjectOfType(isolate, receiver, type);
1216 
1217   Handle<Object> obj_is_instance_of;
1218   ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
1219                              Object::InstanceOf(isolate, receiver, constructor),
1220                              Object);
1221   bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
1222 
1223   // 2. If receiver does not have an [[Initialized...]] internal slot
1224   //    and ? InstanceofOperator(receiver, constructor) is true, then
1225   if (!has_initialized_slot && is_instance_of) {
1226     // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
1227     Handle<Object> new_receiver;
1228     ASSIGN_RETURN_ON_EXCEPTION(
1229         isolate, new_receiver,
1230         JSReceiver::GetProperty(isolate, receiver,
1231                                 isolate->factory()->intl_fallback_symbol()),
1232         Object);
1233     return new_receiver;
1234   }
1235 
1236   return receiver;
1237 }
1238 
1239 }  // namespace
1240 
UnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,Intl::Type type,Handle<String> method_name,bool check_legacy_constructor)1241 MaybeHandle<JSObject> Intl::UnwrapReceiver(Isolate* isolate,
1242                                            Handle<JSReceiver> receiver,
1243                                            Handle<JSFunction> constructor,
1244                                            Intl::Type type,
1245                                            Handle<String> method_name,
1246                                            bool check_legacy_constructor) {
1247   DCHECK(type == Intl::Type::kCollator || type == Intl::Type::kNumberFormat ||
1248          type == Intl::Type::kDateTimeFormat ||
1249          type == Intl::Type::kBreakIterator);
1250   Handle<Object> new_receiver = receiver;
1251   if (check_legacy_constructor) {
1252     ASSIGN_RETURN_ON_EXCEPTION(
1253         isolate, new_receiver,
1254         LegacyUnwrapReceiver(isolate, receiver, constructor, type), JSObject);
1255   }
1256 
1257   // Collator has been ported to use regular instance types. We
1258   // shouldn't be using Intl::IsObjectOfType anymore.
1259   if (type == Intl::Type::kCollator) {
1260     if (!receiver->IsJSCollator()) {
1261       // 3. a. Throw a TypeError exception.
1262       THROW_NEW_ERROR(isolate,
1263                       NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
1264                                    method_name, receiver),
1265                       JSObject);
1266     }
1267     return Handle<JSCollator>::cast(receiver);
1268   }
1269 
1270   DCHECK_NE(type, Intl::Type::kCollator);
1271   // 3. If Type(new_receiver) is not Object or nf does not have an
1272   //    [[Initialized...]]  internal slot, then
1273   if (!Intl::IsObjectOfType(isolate, new_receiver, type)) {
1274     // 3. a. Throw a TypeError exception.
1275     THROW_NEW_ERROR(isolate,
1276                     NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
1277                                  method_name, receiver),
1278                     JSObject);
1279   }
1280 
1281   // The above IsObjectOfType returns true only for JSObjects, which
1282   // makes this cast safe.
1283   return Handle<JSObject>::cast(new_receiver);
1284 }
1285 
Unwrap(Isolate * isolate,Handle<JSReceiver> receiver,const char * method_name)1286 MaybeHandle<JSObject> NumberFormat::Unwrap(Isolate* isolate,
1287                                            Handle<JSReceiver> receiver,
1288                                            const char* method_name) {
1289   Handle<Context> native_context =
1290       Handle<Context>(isolate->context()->native_context(), isolate);
1291   Handle<JSFunction> constructor = Handle<JSFunction>(
1292       JSFunction::cast(native_context->intl_number_format_function()), isolate);
1293   Handle<String> method_name_str =
1294       isolate->factory()->NewStringFromAsciiChecked(method_name);
1295 
1296   return Intl::UnwrapReceiver(isolate, receiver, constructor,
1297                               Intl::Type::kNumberFormat, method_name_str, true);
1298 }
1299 
FormatNumber(Isolate * isolate,Handle<JSObject> number_format_holder,double value)1300 MaybeHandle<String> NumberFormat::FormatNumber(
1301     Isolate* isolate, Handle<JSObject> number_format_holder, double value) {
1302   icu::DecimalFormat* number_format =
1303       NumberFormat::UnpackNumberFormat(number_format_holder);
1304   CHECK_NOT_NULL(number_format);
1305 
1306   icu::UnicodeString result;
1307   number_format->format(value, result);
1308 
1309   return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
1310       reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length()));
1311 }
1312 
DefineWEProperty(Isolate * isolate,Handle<JSObject> target,Handle<Name> key,Handle<Object> value)1313 void Intl::DefineWEProperty(Isolate* isolate, Handle<JSObject> target,
1314                             Handle<Name> key, Handle<Object> value) {
1315   PropertyDescriptor desc;
1316   desc.set_writable(true);
1317   desc.set_enumerable(true);
1318   desc.set_value(value);
1319   Maybe<bool> success =
1320       JSReceiver::DefineOwnProperty(isolate, target, key, &desc, kDontThrow);
1321   DCHECK(success.IsJust() && success.FromJust());
1322   USE(success);
1323 }
1324 
1325 namespace {
1326 
1327 // Define general regexp macros.
1328 // Note "(?:" means the regexp group a non-capture group.
1329 #define REGEX_ALPHA "[a-z]"
1330 #define REGEX_DIGIT "[0-9]"
1331 #define REGEX_ALPHANUM "(?:" REGEX_ALPHA "|" REGEX_DIGIT ")"
1332 
BuildLanguageTagRegexps(Isolate * isolate)1333 void BuildLanguageTagRegexps(Isolate* isolate) {
1334 // Define the language tag regexp macros.
1335 // For info on BCP 47 see https://tools.ietf.org/html/bcp47 .
1336 // Because language tags are case insensitive per BCP 47 2.1.1 and regexp's
1337 // defined below will always be used after lowercasing the input, uppercase
1338 // ranges in BCP 47 2.1 are dropped and grandfathered tags are all lowercased.
1339 // clang-format off
1340 #define BCP47_REGULAR                                          \
1341   "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|" \
1342   "zh-min|zh-min-nan|zh-xiang)"
1343 #define BCP47_IRREGULAR                                  \
1344   "(?:en-gb-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" \
1345   "i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|"  \
1346   "i-tsu|sgn-be-fr|sgn-be-nl|sgn-ch-de)"
1347 #define BCP47_GRANDFATHERED "(?:" BCP47_IRREGULAR "|" BCP47_REGULAR ")"
1348 #define BCP47_PRIVATE_USE "(?:x(?:-" REGEX_ALPHANUM "{1,8})+)"
1349 
1350 #define BCP47_SINGLETON "(?:" REGEX_DIGIT "|" "[a-wy-z])"
1351 
1352 #define BCP47_EXTENSION "(?:" BCP47_SINGLETON "(?:-" REGEX_ALPHANUM "{2,8})+)"
1353 #define BCP47_VARIANT  \
1354   "(?:" REGEX_ALPHANUM "{5,8}" "|" "(?:" REGEX_DIGIT REGEX_ALPHANUM "{3}))"
1355 
1356 #define BCP47_REGION "(?:" REGEX_ALPHA "{2}" "|" REGEX_DIGIT "{3})"
1357 #define BCP47_SCRIPT "(?:" REGEX_ALPHA "{4})"
1358 #define BCP47_EXT_LANG "(?:" REGEX_ALPHA "{3}(?:-" REGEX_ALPHA "{3}){0,2})"
1359 #define BCP47_LANGUAGE "(?:" REGEX_ALPHA "{2,3}(?:-" BCP47_EXT_LANG ")?" \
1360   "|" REGEX_ALPHA "{4}" "|" REGEX_ALPHA "{5,8})"
1361 #define BCP47_LANG_TAG         \
1362   BCP47_LANGUAGE               \
1363   "(?:-" BCP47_SCRIPT ")?"     \
1364   "(?:-" BCP47_REGION ")?"     \
1365   "(?:-" BCP47_VARIANT ")*"    \
1366   "(?:-" BCP47_EXTENSION ")*"  \
1367   "(?:-" BCP47_PRIVATE_USE ")?"
1368   // clang-format on
1369 
1370   constexpr char kLanguageTagSingletonRegexp[] = "^" BCP47_SINGLETON "$";
1371   constexpr char kLanguageTagVariantRegexp[] = "^" BCP47_VARIANT "$";
1372   constexpr char kLanguageTagRegexp[] =
1373       "^(?:" BCP47_LANG_TAG "|" BCP47_PRIVATE_USE "|" BCP47_GRANDFATHERED ")$";
1374 
1375   UErrorCode status = U_ZERO_ERROR;
1376   icu::RegexMatcher* language_singleton_regexp_matcher = new icu::RegexMatcher(
1377       icu::UnicodeString(kLanguageTagSingletonRegexp, -1, US_INV), 0, status);
1378   icu::RegexMatcher* language_tag_regexp_matcher = new icu::RegexMatcher(
1379       icu::UnicodeString(kLanguageTagRegexp, -1, US_INV), 0, status);
1380   icu::RegexMatcher* language_variant_regexp_matcher = new icu::RegexMatcher(
1381       icu::UnicodeString(kLanguageTagVariantRegexp, -1, US_INV), 0, status);
1382   CHECK(U_SUCCESS(status));
1383 
1384   isolate->set_language_tag_regexp_matchers(language_singleton_regexp_matcher,
1385                                             language_tag_regexp_matcher,
1386                                             language_variant_regexp_matcher);
1387 // Undefine the language tag regexp macros.
1388 #undef BCP47_EXTENSION
1389 #undef BCP47_EXT_LANG
1390 #undef BCP47_GRANDFATHERED
1391 #undef BCP47_IRREGULAR
1392 #undef BCP47_LANG_TAG
1393 #undef BCP47_LANGUAGE
1394 #undef BCP47_PRIVATE_USE
1395 #undef BCP47_REGION
1396 #undef BCP47_REGULAR
1397 #undef BCP47_SCRIPT
1398 #undef BCP47_SINGLETON
1399 #undef BCP47_VARIANT
1400 }
1401 
1402 // Undefine the general regexp macros.
1403 #undef REGEX_ALPHA
1404 #undef REGEX_DIGIT
1405 #undef REGEX_ALPHANUM
1406 
GetLanguageSingletonRegexMatcher(Isolate * isolate)1407 icu::RegexMatcher* GetLanguageSingletonRegexMatcher(Isolate* isolate) {
1408   icu::RegexMatcher* language_singleton_regexp_matcher =
1409       isolate->language_singleton_regexp_matcher();
1410   if (language_singleton_regexp_matcher == nullptr) {
1411     BuildLanguageTagRegexps(isolate);
1412     language_singleton_regexp_matcher =
1413         isolate->language_singleton_regexp_matcher();
1414   }
1415   return language_singleton_regexp_matcher;
1416 }
1417 
GetLanguageTagRegexMatcher(Isolate * isolate)1418 icu::RegexMatcher* GetLanguageTagRegexMatcher(Isolate* isolate) {
1419   icu::RegexMatcher* language_tag_regexp_matcher =
1420       isolate->language_tag_regexp_matcher();
1421   if (language_tag_regexp_matcher == nullptr) {
1422     BuildLanguageTagRegexps(isolate);
1423     language_tag_regexp_matcher = isolate->language_tag_regexp_matcher();
1424   }
1425   return language_tag_regexp_matcher;
1426 }
1427 
GetLanguageVariantRegexMatcher(Isolate * isolate)1428 icu::RegexMatcher* GetLanguageVariantRegexMatcher(Isolate* isolate) {
1429   icu::RegexMatcher* language_variant_regexp_matcher =
1430       isolate->language_variant_regexp_matcher();
1431   if (language_variant_regexp_matcher == nullptr) {
1432     BuildLanguageTagRegexps(isolate);
1433     language_variant_regexp_matcher =
1434         isolate->language_variant_regexp_matcher();
1435   }
1436   return language_variant_regexp_matcher;
1437 }
1438 
1439 }  // anonymous namespace
1440 
ResolveLocale(Isolate * isolate,const char * service,Handle<Object> requestedLocales,Handle<Object> options)1441 MaybeHandle<JSObject> Intl::ResolveLocale(Isolate* isolate, const char* service,
1442                                           Handle<Object> requestedLocales,
1443                                           Handle<Object> options) {
1444   Handle<String> service_str =
1445       isolate->factory()->NewStringFromAsciiChecked(service);
1446 
1447   Handle<JSFunction> resolve_locale_function = isolate->resolve_locale();
1448 
1449   Handle<Object> result;
1450   Handle<Object> undefined_value = isolate->factory()->undefined_value();
1451   Handle<Object> args[] = {service_str, requestedLocales, options};
1452   ASSIGN_RETURN_ON_EXCEPTION(
1453       isolate, result,
1454       Execution::Call(isolate, resolve_locale_function, undefined_value,
1455                       arraysize(args), args),
1456       JSObject);
1457 
1458   return Handle<JSObject>::cast(result);
1459 }
1460 
CanonicalizeLocaleListJS(Isolate * isolate,Handle<Object> locales)1461 MaybeHandle<JSObject> Intl::CanonicalizeLocaleListJS(Isolate* isolate,
1462                                                      Handle<Object> locales) {
1463   Handle<JSFunction> canonicalize_locale_list_function =
1464       isolate->canonicalize_locale_list();
1465 
1466   Handle<Object> result;
1467   Handle<Object> undefined_value = isolate->factory()->undefined_value();
1468   Handle<Object> args[] = {locales};
1469   ASSIGN_RETURN_ON_EXCEPTION(
1470       isolate, result,
1471       Execution::Call(isolate, canonicalize_locale_list_function,
1472                       undefined_value, arraysize(args), args),
1473       JSObject);
1474 
1475   return Handle<JSObject>::cast(result);
1476 }
1477 
GetStringOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,std::vector<const char * > values,const char * service,std::unique_ptr<char[]> * result)1478 Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
1479                                   const char* property,
1480                                   std::vector<const char*> values,
1481                                   const char* service,
1482                                   std::unique_ptr<char[]>* result) {
1483   Handle<String> property_str =
1484       isolate->factory()->NewStringFromAsciiChecked(property);
1485 
1486   // 1. Let value be ? Get(options, property).
1487   Handle<Object> value;
1488   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1489       isolate, value,
1490       Object::GetPropertyOrElement(isolate, options, property_str),
1491       Nothing<bool>());
1492 
1493   if (value->IsUndefined(isolate)) {
1494     return Just(false);
1495   }
1496 
1497   // 2. c. Let value be ? ToString(value).
1498   Handle<String> value_str;
1499   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1500       isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
1501   std::unique_ptr<char[]> value_cstr = value_str->ToCString();
1502 
1503   // 2. d. if values is not undefined, then
1504   if (values.size() > 0) {
1505     // 2. d. i. If values does not contain an element equal to value,
1506     // throw a RangeError exception.
1507     for (size_t i = 0; i < values.size(); i++) {
1508       if (strcmp(values.at(i), value_cstr.get()) == 0) {
1509         // 2. e. return value
1510         *result = std::move(value_cstr);
1511         return Just(true);
1512       }
1513     }
1514 
1515     Handle<String> service_str =
1516         isolate->factory()->NewStringFromAsciiChecked(service);
1517     THROW_NEW_ERROR_RETURN_VALUE(
1518         isolate,
1519         NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
1520                       property_str),
1521         Nothing<bool>());
1522   }
1523 
1524   // 2. e. return value
1525   *result = std::move(value_cstr);
1526   return Just(true);
1527 }
1528 
GetBoolOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,const char * service,bool * result)1529 V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
1530     Isolate* isolate, Handle<JSReceiver> options, const char* property,
1531     const char* service, bool* result) {
1532   Handle<String> property_str =
1533       isolate->factory()->NewStringFromAsciiChecked(property);
1534 
1535   // 1. Let value be ? Get(options, property).
1536   Handle<Object> value;
1537   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1538       isolate, value,
1539       Object::GetPropertyOrElement(isolate, options, property_str),
1540       Nothing<bool>());
1541 
1542   // 2. If value is not undefined, then
1543   if (!value->IsUndefined(isolate)) {
1544     // 2. b. i. Let value be ToBoolean(value).
1545     *result = value->BooleanValue(isolate);
1546 
1547     // 2. e. return value
1548     return Just(true);
1549   }
1550 
1551   return Just(false);
1552 }
1553 
1554 namespace {
1555 
AsciiToLower(char c)1556 char AsciiToLower(char c) {
1557   if (c < 'A' || c > 'Z') {
1558     return c;
1559   }
1560   return c | (1 << 5);
1561 }
1562 
1563 /**
1564  * Check the structural Validity of the language tag per ECMA 402 6.2.2:
1565  *   - Well-formed per RFC 5646 2.1
1566  *   - There are no duplicate variant subtags
1567  *   - There are no duplicate singleton (extension) subtags
1568  *
1569  * One extra-check is done (from RFC 5646 2.2.9): the tag is compared
1570  * against the list of grandfathered tags. However, subtags for
1571  * primary/extended language, script, region, variant are not checked
1572  * against the IANA language subtag registry.
1573  *
1574  * ICU is too permissible and lets invalid tags, like
1575  * hant-cmn-cn, through.
1576  *
1577  * Returns false if the language tag is invalid.
1578  */
IsStructurallyValidLanguageTag(Isolate * isolate,const std::string & locale_in)1579 bool IsStructurallyValidLanguageTag(Isolate* isolate,
1580                                     const std::string& locale_in) {
1581   if (!String::IsAscii(locale_in.c_str(),
1582                        static_cast<int>(locale_in.length()))) {
1583     return false;
1584   }
1585   std::string locale(locale_in);
1586   icu::RegexMatcher* language_tag_regexp_matcher =
1587       GetLanguageTagRegexMatcher(isolate);
1588 
1589   // Check if it's well-formed, including grandfathered tags.
1590   icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
1591   // Note: icu::RegexMatcher::reset does not make a copy of the input string
1592   // so cannot use a temp value; ie: cannot create it as a call parameter.
1593   language_tag_regexp_matcher->reset(locale_uni);
1594   UErrorCode status = U_ZERO_ERROR;
1595   bool is_valid_lang_tag = language_tag_regexp_matcher->matches(status);
1596   if (!is_valid_lang_tag || V8_UNLIKELY(U_FAILURE(status))) {
1597     return false;
1598   }
1599 
1600   // Just return if it's a x- form. It's all private.
1601   if (locale.find("x-") == 0) {
1602     return true;
1603   }
1604 
1605   // Check if there are any duplicate variants or singletons (extensions).
1606 
1607   // Remove private use section.
1608   locale = locale.substr(0, locale.find("-x-"));
1609 
1610   // Skip language since it can match variant regex, so we start from 1.
1611   // We are matching i-klingon here, but that's ok, since i-klingon-klingon
1612   // is not valid and would fail LANGUAGE_TAG_RE test.
1613   size_t pos = 0;
1614   std::vector<std::string> parts;
1615   while ((pos = locale.find("-")) != std::string::npos) {
1616     std::string token = locale.substr(0, pos);
1617     parts.push_back(token);
1618     locale = locale.substr(pos + 1);
1619   }
1620   if (locale.length() != 0) {
1621     parts.push_back(locale);
1622   }
1623 
1624   icu::RegexMatcher* language_variant_regexp_matcher =
1625       GetLanguageVariantRegexMatcher(isolate);
1626 
1627   icu::RegexMatcher* language_singleton_regexp_matcher =
1628       GetLanguageSingletonRegexMatcher(isolate);
1629 
1630   std::vector<std::string> variants;
1631   std::vector<std::string> extensions;
1632   for (auto it = parts.begin() + 1; it != parts.end(); it++) {
1633     icu::UnicodeString part(it->data(), -1, US_INV);
1634     language_variant_regexp_matcher->reset(part);
1635     bool is_language_variant = language_variant_regexp_matcher->matches(status);
1636     if (V8_UNLIKELY(U_FAILURE(status))) {
1637       return false;
1638     }
1639     if (is_language_variant && extensions.size() == 0) {
1640       if (std::find(variants.begin(), variants.end(), *it) == variants.end()) {
1641         variants.push_back(*it);
1642       } else {
1643         return false;
1644       }
1645     }
1646 
1647     language_singleton_regexp_matcher->reset(part);
1648     bool is_language_singleton =
1649         language_singleton_regexp_matcher->matches(status);
1650     if (V8_UNLIKELY(U_FAILURE(status))) {
1651       return false;
1652     }
1653     if (is_language_singleton) {
1654       if (std::find(extensions.begin(), extensions.end(), *it) ==
1655           extensions.end()) {
1656         extensions.push_back(*it);
1657       } else {
1658         return false;
1659       }
1660     }
1661   }
1662 
1663   return true;
1664 }
1665 
IsLowerAscii(char c)1666 bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
1667 
IsTwoLetterLanguage(const std::string & locale)1668 bool IsTwoLetterLanguage(const std::string& locale) {
1669   // Two letters, both in range 'a'-'z'...
1670   return locale.length() == 2 && IsLowerAscii(locale[0]) &&
1671          IsLowerAscii(locale[1]);
1672 }
1673 
IsDeprecatedLanguage(const std::string & locale)1674 bool IsDeprecatedLanguage(const std::string& locale) {
1675   //  Check if locale is one of the deprecated language tags:
1676   return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw";
1677 }
1678 
1679 // Reference:
1680 // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
IsGrandfatheredTagWithoutPreferredVaule(const std::string & locale)1681 bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
1682   if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
1683   if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
1684       V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
1685     return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
1686            locale.substr(2) == "mingo";
1687   }
1688   return false;
1689 }
1690 
1691 }  // anonymous namespace
1692 
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)1693 Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
1694                                                  Handle<Object> locale_in) {
1695   Handle<String> locale_str;
1696   // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
1697   // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
1698   // exception.
1699   // 7c iii. Let tag be ? ToString(kValue).
1700   // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
1701   // RangeError exception.
1702 
1703   if (locale_in->IsString()) {
1704     locale_str = Handle<String>::cast(locale_in);
1705   } else if (locale_in->IsJSReceiver()) {
1706     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
1707                                      Object::ToString(isolate, locale_in),
1708                                      Nothing<std::string>());
1709   } else {
1710     THROW_NEW_ERROR_RETURN_VALUE(isolate,
1711                                  NewTypeError(MessageTemplate::kLanguageID),
1712                                  Nothing<std::string>());
1713   }
1714   std::string locale(locale_str->ToCString().get());
1715 
1716   // Optimize for the most common case: a 2-letter language code in the
1717   // canonical form/lowercase that is not one of the deprecated codes
1718   // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
1719   // codes. Instead, let them be handled by ICU in the slow path. However,
1720   // fast-track 'fil' (3-letter canonical code).
1721   if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
1722       locale == "fil") {
1723     return Just(locale);
1724   }
1725 
1726   // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
1727   // the input before any more check.
1728   std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
1729   if (!IsStructurallyValidLanguageTag(isolate, locale)) {
1730     THROW_NEW_ERROR_RETURN_VALUE(
1731         isolate,
1732         NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1733         Nothing<std::string>());
1734   }
1735 
1736   // ICU maps a few grandfathered tags to what looks like a regular language
1737   // tag even though IANA language tag registry does not have a preferred
1738   // entry map for them. Return them as they're with lowercasing.
1739   if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
1740     return Just(locale);
1741   }
1742 
1743   // // ECMA 402 6.2.3
1744   // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
1745   // language tag if it's too long (much longer than 100 chars). Even if we
1746   // allocate a longer buffer, ICU will still fail if it's too long. Either
1747   // propose to Ecma 402 to put a limit on the locale length or change ICU to
1748   // handle long locale names better. See
1749   // https://unicode-org.atlassian.net/browse/ICU-13417
1750   UErrorCode error = U_ZERO_ERROR;
1751   char icu_result[ULOC_FULLNAME_CAPACITY];
1752   uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY,
1753                       nullptr, &error);
1754   if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
1755     // TODO(jshin): This should not happen because the structural validity
1756     // is already checked. If that's the case, remove this.
1757     THROW_NEW_ERROR_RETURN_VALUE(
1758         isolate,
1759         NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1760         Nothing<std::string>());
1761   }
1762 
1763   // Force strict BCP47 rules.
1764   char result[ULOC_FULLNAME_CAPACITY];
1765   int32_t result_len = uloc_toLanguageTag(icu_result, result,
1766                                           ULOC_FULLNAME_CAPACITY, TRUE, &error);
1767 
1768   if (U_FAILURE(error)) {
1769     THROW_NEW_ERROR_RETURN_VALUE(
1770         isolate,
1771         NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1772         Nothing<std::string>());
1773   }
1774 
1775   return Just(std::string(result, result_len));
1776 }
1777 
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)1778 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
1779     Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
1780   // 1. If locales is undefined, then
1781   if (locales->IsUndefined(isolate)) {
1782     // 1a. Return a new empty List.
1783     return Just(std::vector<std::string>());
1784   }
1785   // 2. Let seen be a new empty List.
1786   std::vector<std::string> seen;
1787   // 3. If Type(locales) is String, then
1788   if (locales->IsString()) {
1789     // 3a. Let O be CreateArrayFromList(« locales »).
1790     // Instead of creating a one-element array and then iterating over it,
1791     // we inline the body of the iteration:
1792     std::string canonicalized_tag;
1793     if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
1794       return Nothing<std::vector<std::string>>();
1795     }
1796     seen.push_back(canonicalized_tag);
1797     return Just(seen);
1798   }
1799   // 4. Else,
1800   // 4a. Let O be ? ToObject(locales).
1801   Handle<JSReceiver> o;
1802   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
1803                                    Object::ToObject(isolate, locales),
1804                                    Nothing<std::vector<std::string>>());
1805   // 5. Let len be ? ToLength(? Get(O, "length")).
1806   Handle<Object> length_obj;
1807   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
1808                                    Object::GetLengthFromArrayLike(isolate, o),
1809                                    Nothing<std::vector<std::string>>());
1810   // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
1811   // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
1812   // don't happen in practice (and would be very slow if they do), we'll keep
1813   // the code simple for now by using a saturating to-uint32 conversion.
1814   double raw_length = length_obj->Number();
1815   uint32_t len =
1816       raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
1817   // 6. Let k be 0.
1818   // 7. Repeat, while k < len
1819   for (uint32_t k = 0; k < len; k++) {
1820     // 7a. Let Pk be ToString(k).
1821     // 7b. Let kPresent be ? HasProperty(O, Pk).
1822     LookupIterator it(isolate, o, k);
1823     // 7c. If kPresent is true, then
1824     if (!it.IsFound()) continue;
1825     // 7c i. Let kValue be ? Get(O, Pk).
1826     Handle<Object> k_value;
1827     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
1828                                      Nothing<std::vector<std::string>>());
1829     // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
1830     // exception.
1831     // 7c iii. Let tag be ? ToString(kValue).
1832     // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
1833     // RangeError exception.
1834     // 7c v. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
1835     std::string canonicalized_tag;
1836     if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
1837       return Nothing<std::vector<std::string>>();
1838     }
1839     // 7c vi. If canonicalizedTag is not an element of seen, append
1840     // canonicalizedTag as the last element of seen.
1841     if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
1842       seen.push_back(canonicalized_tag);
1843     }
1844     // 7d. Increase k by 1. (See loop header.)
1845     // Optimization: some callers only need one result.
1846     if (only_return_one_result) return Just(seen);
1847   }
1848   // 8. Return seen.
1849   return Just(seen);
1850 }
1851 
1852 // ecma-402/#sec-currencydigits
CurrencyDigits(Isolate * isolate,Handle<String> currency)1853 Handle<Smi> Intl::CurrencyDigits(Isolate* isolate, Handle<String> currency) {
1854   v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
1855   v8::String::Value currency_string(v8_isolate, v8::Utils::ToLocal(currency));
1856   CHECK_NOT_NULL(*currency_string);
1857 
1858   DisallowHeapAllocation no_gc;
1859   UErrorCode status = U_ZERO_ERROR;
1860   uint32_t fraction_digits = ucurr_getDefaultFractionDigits(
1861       reinterpret_cast<const UChar*>(*currency_string), &status);
1862   // For missing currency codes, default to the most common, 2
1863   if (U_FAILURE(status)) fraction_digits = 2;
1864   return Handle<Smi>(Smi::FromInt(fraction_digits), isolate);
1865 }
1866 
CreateNumberFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)1867 MaybeHandle<JSObject> Intl::CreateNumberFormat(Isolate* isolate,
1868                                                Handle<String> locale,
1869                                                Handle<JSObject> options,
1870                                                Handle<JSObject> resolved) {
1871   Handle<JSFunction> constructor(
1872       isolate->native_context()->intl_number_format_function(), isolate);
1873 
1874   Handle<JSObject> local_object;
1875   ASSIGN_RETURN_ON_EXCEPTION(isolate, local_object,
1876                              JSObject::New(constructor, constructor), JSObject);
1877 
1878   // Set number formatter as embedder field of the resulting JS object.
1879   icu::DecimalFormat* number_format =
1880       NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
1881 
1882   CHECK_NOT_NULL(number_format);
1883 
1884   local_object->SetEmbedderField(NumberFormat::kDecimalFormatIndex,
1885                                  reinterpret_cast<Smi*>(number_format));
1886 
1887   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
1888   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
1889                           NumberFormat::DeleteNumberFormat,
1890                           WeakCallbackType::kInternalFields);
1891   return local_object;
1892 }
1893 
1894 /**
1895  * Parses Unicode extension into key - value map.
1896  * Returns empty object if the extension string is invalid.
1897  * We are not concerned with the validity of the values at this point.
1898  * 'attribute' in RFC 6047 is not supported. Keys without explicit
1899  * values are assigned UNDEFINED.
1900  * TODO(jshin): Fix the handling of 'attribute' (in RFC 6047, but none
1901  * has been defined so that it's not used) and boolean keys without
1902  * an explicit value.
1903  */
ParseExtension(Isolate * isolate,const std::string & extension,std::map<std::string,std::string> & out)1904 void Intl::ParseExtension(Isolate* isolate, const std::string& extension,
1905                           std::map<std::string, std::string>& out) {
1906   if (extension.compare(0, 3, "-u-") != 0) return;
1907 
1908   // Key is {2}alphanum, value is {3,8}alphanum.
1909   // Some keys may not have explicit values (booleans).
1910   std::string key;
1911   std::string value;
1912   // Skip the "-u-".
1913   size_t start = 3;
1914   size_t end;
1915   do {
1916     end = extension.find("-", start);
1917     size_t length =
1918         (end == std::string::npos) ? extension.length() - start : end - start;
1919     std::string element = extension.substr(start, length);
1920     // Key is {2}alphanum
1921     if (length == 2) {
1922       if (!key.empty()) {
1923         out.insert(std::pair<std::string, std::string>(key, value));
1924         value.clear();
1925       }
1926       key = element;
1927       // value is {3,8}alphanum.
1928     } else if (length >= 3 && length <= 8 && !key.empty()) {
1929       value = value.empty() ? element : (value + "-" + element);
1930     } else {
1931       return;
1932     }
1933     start = end + 1;
1934   } while (end != std::string::npos);
1935   if (!key.empty()) out.insert(std::pair<std::string, std::string>(key, value));
1936 }
1937 
1938 namespace {
1939 
IsAToZ(char ch)1940 bool IsAToZ(char ch) {
1941   return IsInRange(AsciiAlphaToLower(ch), 'a', 'z');
1942 }
1943 
1944 }  // namespace
1945 
1946 // Verifies that the input is a well-formed ISO 4217 currency code.
1947 // ecma402/#sec-currency-codes
IsWellFormedCurrencyCode(Isolate * isolate,Handle<String> currency)1948 bool Intl::IsWellFormedCurrencyCode(Isolate* isolate, Handle<String> currency) {
1949   // 2. If the number of elements in normalized is not 3, return false.
1950   if (currency->length() != 3) return false;
1951 
1952   currency = String::Flatten(isolate, currency);
1953   {
1954     DisallowHeapAllocation no_gc;
1955     String::FlatContent flat = currency->GetFlatContent();
1956 
1957     // 1. Let normalized be the result of mapping currency to upper case as
1958     // described in 6.1. 3. If normalized contains any character that is not in
1959     // the range "A" to "Z" (U+0041 to U+005A), return false. 4. Return true.
1960     // Don't uppercase to test. It could convert invalid code into a valid one.
1961     // For example \u00DFP (Eszett+P) becomes SSP.
1962     return (IsAToZ(flat.Get(0)) && IsAToZ(flat.Get(1)) && IsAToZ(flat.Get(2)));
1963   }
1964 }
1965 
1966 // ecma402 #sup-string.prototype.tolocalelowercase
1967 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)1968 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
1969                                                   Handle<String> s,
1970                                                   bool to_upper,
1971                                                   Handle<Object> locales) {
1972   std::vector<std::string> requested_locales;
1973   if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
1974     return MaybeHandle<String>();
1975   }
1976   std::string requested_locale = requested_locales.size() == 0
1977                                      ? Intl::DefaultLocale(isolate)
1978                                      : requested_locales[0];
1979   size_t dash = requested_locale.find("-");
1980   if (dash != std::string::npos) {
1981     requested_locale = requested_locale.substr(0, dash);
1982   }
1983 
1984   // Primary language tag can be up to 8 characters long in theory.
1985   // https://tools.ietf.org/html/bcp47#section-2.2.1
1986   DCHECK_LE(requested_locale.length(), 8);
1987   s = String::Flatten(isolate, s);
1988 
1989   // All the languages requiring special-handling have two-letter codes.
1990   // Note that we have to check for '!= 2' here because private-use language
1991   // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
1992   // only 'x' or 'i' when they get here.
1993   if (V8_UNLIKELY(requested_locale.length() != 2)) {
1994     return ConvertCase(s, to_upper, isolate);
1995   }
1996   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1997   // in the root locale needs to be adjusted for az, lt and tr because even case
1998   // mapping of ASCII range characters are different in those locales.
1999   // Greek (el) does not require any adjustment.
2000   if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
2001                   (requested_locale == "lt") || (requested_locale == "az"))) {
2002     return LocaleConvertCase(s, isolate, to_upper, requested_locale.c_str());
2003   } else {
2004     return ConvertCase(s, to_upper, isolate);
2005   }
2006 }
2007 
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options)2008 MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
2009                                               Handle<String> string1,
2010                                               Handle<String> string2,
2011                                               Handle<Object> locales,
2012                                               Handle<Object> options) {
2013   Factory* factory = isolate->factory();
2014   Handle<JSObject> collator;
2015   ASSIGN_RETURN_ON_EXCEPTION(
2016       isolate, collator,
2017       CachedOrNewService(isolate, factory->NewStringFromStaticChars("collator"),
2018                          locales, options, factory->undefined_value()),
2019       Object);
2020   CHECK(collator->IsJSCollator());
2021   return Intl::CompareStrings(isolate, Handle<JSCollator>::cast(collator),
2022                               string1, string2);
2023 }
2024 
2025 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,Handle<JSCollator> collator,Handle<String> string1,Handle<String> string2)2026 Handle<Object> Intl::CompareStrings(Isolate* isolate,
2027                                     Handle<JSCollator> collator,
2028                                     Handle<String> string1,
2029                                     Handle<String> string2) {
2030   Factory* factory = isolate->factory();
2031   icu::Collator* icu_collator = collator->icu_collator()->raw();
2032   CHECK_NOT_NULL(icu_collator);
2033 
2034   string1 = String::Flatten(isolate, string1);
2035   string2 = String::Flatten(isolate, string2);
2036 
2037   UCollationResult result;
2038   UErrorCode status = U_ZERO_ERROR;
2039   {
2040     DisallowHeapAllocation no_gc;
2041     int32_t length1 = string1->length();
2042     int32_t length2 = string2->length();
2043     String::FlatContent flat1 = string1->GetFlatContent();
2044     String::FlatContent flat2 = string2->GetFlatContent();
2045     std::unique_ptr<uc16[]> sap1;
2046     std::unique_ptr<uc16[]> sap2;
2047     icu::UnicodeString string_val1(
2048         FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
2049     icu::UnicodeString string_val2(
2050         FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
2051     result = icu_collator->compare(string_val1, string_val2, status);
2052   }
2053   DCHECK(U_SUCCESS(status));
2054 
2055   return factory->NewNumberFromInt(result);
2056 }
2057 
2058 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options)2059 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
2060                                                Handle<Object> num,
2061                                                Handle<Object> locales,
2062                                                Handle<Object> options) {
2063   Factory* factory = isolate->factory();
2064   Handle<JSObject> number_format_holder;
2065   // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
2066   ASSIGN_RETURN_ON_EXCEPTION(
2067       isolate, number_format_holder,
2068       CachedOrNewService(isolate,
2069                          factory->NewStringFromStaticChars("numberformat"),
2070                          locales, options, factory->undefined_value()),
2071       String);
2072   DCHECK(
2073       Intl::IsObjectOfType(isolate, number_format_holder, Intl::kNumberFormat));
2074   Handle<Object> number_obj;
2075   ASSIGN_RETURN_ON_EXCEPTION(isolate, number_obj,
2076                              Object::ToNumber(isolate, num), String);
2077 
2078   // Spec treats -0 and +0 as 0.
2079   double number = number_obj->Number() + 0;
2080   // Return FormatNumber(numberFormat, x).
2081   return NumberFormat::FormatNumber(isolate, number_format_holder, number);
2082 }
2083 
2084 // ecma402/#sec-defaultnumberoption
DefaultNumberOption(Isolate * isolate,Handle<Object> value,int min,int max,int fallback,Handle<String> property)2085 Maybe<int> Intl::DefaultNumberOption(Isolate* isolate, Handle<Object> value,
2086                                      int min, int max, int fallback,
2087                                      Handle<String> property) {
2088   // 2. Else, return fallback.
2089   if (value->IsUndefined()) return Just(fallback);
2090 
2091   // 1. If value is not undefined, then
2092   // a. Let value be ? ToNumber(value).
2093   Handle<Object> value_num;
2094   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2095       isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
2096   DCHECK(value_num->IsNumber());
2097 
2098   // b. If value is NaN or less than minimum or greater than maximum, throw a
2099   // RangeError exception.
2100   if (value_num->IsNaN() || value_num->Number() < min ||
2101       value_num->Number() > max) {
2102     THROW_NEW_ERROR_RETURN_VALUE(
2103         isolate,
2104         NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
2105         Nothing<int>());
2106   }
2107 
2108   // The max and min arguments are integers and the above check makes
2109   // sure that we are within the integer range making this double to
2110   // int conversion safe.
2111   //
2112   // c. Return floor(value).
2113   return Just(FastD2I(floor(value_num->Number())));
2114 }
2115 
2116 // ecma402/#sec-getnumberoption
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,Handle<String> property,int min,int max,int fallback)2117 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
2118                                  Handle<String> property, int min, int max,
2119                                  int fallback) {
2120   // 1. Let value be ? Get(options, property).
2121   Handle<Object> value;
2122   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2123       isolate, value, JSReceiver::GetProperty(isolate, options, property),
2124       Nothing<int>());
2125 
2126   // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
2127   return DefaultNumberOption(isolate, value, min, max, fallback, property);
2128 }
2129 
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,int min,int max,int fallback)2130 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
2131                                  const char* property, int min, int max,
2132                                  int fallback) {
2133   Handle<String> property_str =
2134       isolate->factory()->NewStringFromAsciiChecked(property);
2135   return GetNumberOption(isolate, options, property_str, min, max, fallback);
2136 }
2137 
SetNumberFormatDigitOptions(Isolate * isolate,icu::DecimalFormat * number_format,Handle<JSReceiver> options,int mnfd_default,int mxfd_default)2138 Maybe<bool> Intl::SetNumberFormatDigitOptions(Isolate* isolate,
2139                                               icu::DecimalFormat* number_format,
2140                                               Handle<JSReceiver> options,
2141                                               int mnfd_default,
2142                                               int mxfd_default) {
2143   CHECK_NOT_NULL(number_format);
2144 
2145   // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
2146   // 1).
2147   int mnid;
2148   if (!GetNumberOption(isolate, options, "minimumIntegerDigits", 1, 21, 1)
2149            .To(&mnid)) {
2150     return Nothing<bool>();
2151   }
2152 
2153   // 6. Let mnfd be ? GetNumberOption(options, "minimumFractionDigits", 0, 20,
2154   // mnfdDefault).
2155   int mnfd;
2156   if (!GetNumberOption(isolate, options, "minimumFractionDigits", 0, 20,
2157                        mnfd_default)
2158            .To(&mnfd)) {
2159     return Nothing<bool>();
2160   }
2161 
2162   // 7. Let mxfdActualDefault be max( mnfd, mxfdDefault ).
2163   int mxfd_actual_default = std::max(mnfd, mxfd_default);
2164 
2165   // 8. Let mxfd be ? GetNumberOption(options,
2166   // "maximumFractionDigits", mnfd, 20, mxfdActualDefault).
2167   int mxfd;
2168   if (!GetNumberOption(isolate, options, "maximumFractionDigits", mnfd, 20,
2169                        mxfd_actual_default)
2170            .To(&mxfd)) {
2171     return Nothing<bool>();
2172   }
2173 
2174   // 9.  Let mnsd be ? Get(options, "minimumSignificantDigits").
2175   Handle<Object> mnsd_obj;
2176   Handle<String> mnsd_str =
2177       isolate->factory()->NewStringFromStaticChars("minimumSignificantDigits");
2178   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2179       isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
2180       Nothing<bool>());
2181 
2182   // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
2183   Handle<Object> mxsd_obj;
2184   Handle<String> mxsd_str =
2185       isolate->factory()->NewStringFromStaticChars("maximumSignificantDigits");
2186   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2187       isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
2188       Nothing<bool>());
2189 
2190   // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
2191   number_format->setMinimumIntegerDigits(mnid);
2192 
2193   // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
2194   number_format->setMinimumFractionDigits(mnfd);
2195 
2196   // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
2197   number_format->setMaximumFractionDigits(mxfd);
2198 
2199   bool significant_digits_used = false;
2200   // 14. If mnsd is not undefined or mxsd is not undefined, then
2201   if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
2202     // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
2203     int mnsd;
2204     if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
2205       return Nothing<bool>();
2206     }
2207 
2208     // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
2209     int mxsd;
2210     if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
2211              .To(&mxsd)) {
2212       return Nothing<bool>();
2213     }
2214 
2215     significant_digits_used = true;
2216 
2217     // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
2218     number_format->setMinimumSignificantDigits(mnsd);
2219 
2220     // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
2221     number_format->setMaximumSignificantDigits(mxsd);
2222   }
2223 
2224   number_format->setSignificantDigitsUsed(significant_digits_used);
2225   number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
2226   return Just(true);
2227 }
2228 
2229 namespace {
2230 
2231 // ECMA 402 9.2.2 BestAvailableLocale(availableLocales, locale)
2232 // https://tc39.github.io/ecma402/#sec-bestavailablelocale
BestAvailableLocale(std::set<std::string> available_locales,std::string locale)2233 std::string BestAvailableLocale(std::set<std::string> available_locales,
2234                                 std::string locale) {
2235   const char separator = '-';
2236 
2237   // 1. Let candidate be locale.
2238   // 2. Repeat,
2239   do {
2240     // 2.a. If availableLocales contains an element equal to candidate, return
2241     //      candidate.
2242     if (available_locales.find(locale) != available_locales.end()) {
2243       return locale;
2244     }
2245     // 2.b. Let pos be the character index of the last occurrence of "-"
2246     //      (U+002D) within candidate. If that character does not occur, return
2247     //      undefined.
2248     size_t pos = locale.rfind(separator);
2249     if (pos == std::string::npos) {
2250       return "";
2251     }
2252     // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
2253     //      decrease pos by 2.
2254     if (pos >= 2 && locale[pos - 2] == separator) {
2255       pos -= 2;
2256     }
2257     // 2.d. Let candidate be the substring of candidate from position 0,
2258     //      inclusive, to position pos, exclusive.
2259     locale = locale.substr(0, pos);
2260   } while (true);
2261 }
2262 
2263 #define ANY_EXTENSION_REGEXP "-[a-z0-9]{1}-.*"
2264 
GetAnyExtensionRegexpMatcher()2265 std::unique_ptr<icu::RegexMatcher> GetAnyExtensionRegexpMatcher() {
2266   UErrorCode status = U_ZERO_ERROR;
2267   std::unique_ptr<icu::RegexMatcher> matcher(new icu::RegexMatcher(
2268       icu::UnicodeString(ANY_EXTENSION_REGEXP, -1, US_INV), 0, status));
2269   DCHECK(U_SUCCESS(status));
2270   return matcher;
2271 }
2272 
2273 #undef ANY_EXTENSION_REGEXP
2274 
2275 // ECMA 402 9.2.7 LookupSupportedLocales(availableLocales, requestedLocales)
2276 // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(std::set<std::string> available_locales,std::vector<std::string> requested_locales)2277 std::vector<std::string> LookupSupportedLocales(
2278     std::set<std::string> available_locales,
2279     std::vector<std::string> requested_locales) {
2280   std::unique_ptr<icu::RegexMatcher> matcher = GetAnyExtensionRegexpMatcher();
2281 
2282   // 1. Let subset be a new empty List.
2283   std::vector<std::string> subset;
2284 
2285   // 2. For each element locale of requestedLocales in List order, do
2286   for (auto locale : requested_locales) {
2287     // 2.a. Let noExtensionsLocale be the String value that is locale with all
2288     //      Unicode locale extension sequences removed.
2289     icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
2290     // TODO(bstell): look at using uloc_forLanguageTag to convert the language
2291     // tag to locale id
2292     // TODO(bstell): look at using uloc_getBaseName to just get the name without
2293     // all the keywords
2294     matcher->reset(locale_uni);
2295     UErrorCode status = U_ZERO_ERROR;
2296     // TODO(bstell): need to determine if this is the correct behavior.
2297     // This matches the JS implementation but might not match the spec.
2298     // According to
2299     // https://tc39.github.io/ecma402/#sec-unicode-locale-extension-sequences:
2300     //
2301     //     This standard uses the term "Unicode locale extension sequence" for
2302     //     any substring of a language tag that is not part of a private use
2303     //     subtag sequence, starts with a separator  "-" and the singleton "u",
2304     //     and includes the maximum sequence of following non-singleton subtags
2305     //     and their preceding "-" separators.
2306     //
2307     // According to the spec a locale "en-t-aaa-u-bbb-v-ccc-x-u-ddd", should
2308     // remove only the "-u-bbb" part, and keep everything else, whereas this
2309     // regexp matcher would leave only the "en".
2310     icu::UnicodeString no_extensions_locale_uni =
2311         matcher->replaceAll("", status);
2312     DCHECK(U_SUCCESS(status));
2313     std::string no_extensions_locale;
2314     no_extensions_locale_uni.toUTF8String(no_extensions_locale);
2315     // 2.b. Let availableLocale be BestAvailableLocale(availableLocales,
2316     //      noExtensionsLocale).
2317     std::string available_locale =
2318         BestAvailableLocale(available_locales, no_extensions_locale);
2319     // 2.c. If availableLocale is not undefined, append locale to the end of
2320     //      subset.
2321     if (!available_locale.empty()) {
2322       subset.push_back(locale);
2323     }
2324   }
2325 
2326   // 3. Return subset.
2327   return subset;
2328 }
2329 
2330 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
2331 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(std::set<std::string> available_locales,std::vector<std::string> requested_locales)2332 std::vector<std::string> BestFitSupportedLocales(
2333     std::set<std::string> available_locales,
2334     std::vector<std::string> requested_locales) {
2335   return LookupSupportedLocales(available_locales, requested_locales);
2336 }
2337 
2338 enum MatcherOption { kBestFit, kLookup };
2339 
2340 // TODO(bstell): should this be moved somewhere where it is reusable?
2341 // Implement steps 5, 6, 7 for ECMA 402 9.2.9 SupportedLocales
2342 // https://tc39.github.io/ecma402/#sec-supportedlocales
CreateReadOnlyArray(Isolate * isolate,std::vector<std::string> elements)2343 MaybeHandle<JSObject> CreateReadOnlyArray(Isolate* isolate,
2344                                           std::vector<std::string> elements) {
2345   Factory* factory = isolate->factory();
2346   if (elements.size() >= kMaxUInt32) {
2347     THROW_NEW_ERROR(
2348         isolate, NewRangeError(MessageTemplate::kInvalidArrayLength), JSObject);
2349   }
2350 
2351   PropertyAttributes attr =
2352       static_cast<PropertyAttributes>(READ_ONLY | DONT_DELETE);
2353 
2354   // 5. Let subset be CreateArrayFromList(elements).
2355   // 6. Let keys be subset.[[OwnPropertyKeys]]().
2356   Handle<JSArray> subset = factory->NewJSArray(0);
2357 
2358   // 7. For each element P of keys in List order, do
2359   uint32_t length = static_cast<uint32_t>(elements.size());
2360   for (uint32_t i = 0; i < length; i++) {
2361     const std::string& part = elements[i];
2362     Handle<String> value =
2363         factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
2364     JSObject::AddDataElement(subset, i, value, attr);
2365   }
2366 
2367   // 7.a. Let desc be PropertyDescriptor { [[Configurable]]: false,
2368   //          [[Writable]]: false }.
2369   PropertyDescriptor desc;
2370   desc.set_writable(false);
2371   desc.set_configurable(false);
2372 
2373   // 7.b. Perform ! DefinePropertyOrThrow(subset, P, desc).
2374   JSArray::ArraySetLength(isolate, subset, &desc, kThrowOnError).ToChecked();
2375   return subset;
2376 }
2377 
2378 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
2379 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,std::string service,std::set<std::string> available_locales,std::vector<std::string> requested_locales,Handle<Object> options)2380 MaybeHandle<JSObject> SupportedLocales(
2381     Isolate* isolate, std::string service,
2382     std::set<std::string> available_locales,
2383     std::vector<std::string> requested_locales, Handle<Object> options) {
2384   std::vector<std::string> supported_locales;
2385 
2386   // 1. If options is not undefined, then
2387   //    a. Let options be ? ToObject(options).
2388   //    b. Let matcher be ? GetOption(options, "localeMatcher", "string",
2389   //       « "lookup", "best fit" », "best fit").
2390   // 2. Else, let matcher be "best fit".
2391   MatcherOption matcher = kBestFit;
2392   if (!options->IsUndefined(isolate)) {
2393     Handle<JSReceiver> options_obj;
2394     ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
2395                                Object::ToObject(isolate, options), JSObject);
2396     std::unique_ptr<char[]> matcher_str = nullptr;
2397     std::vector<const char*> matcher_values = {"lookup", "best fit"};
2398     Maybe<bool> maybe_found_matcher =
2399         Intl::GetStringOption(isolate, options_obj, "localeMatcher",
2400                               matcher_values, service.c_str(), &matcher_str);
2401     MAYBE_RETURN(maybe_found_matcher, MaybeHandle<JSObject>());
2402     if (maybe_found_matcher.FromJust()) {
2403       DCHECK_NOT_NULL(matcher_str.get());
2404       if (strcmp(matcher_str.get(), "lookup") == 0) {
2405         matcher = kLookup;
2406       }
2407     }
2408   }
2409 
2410   // 3. If matcher is "best fit", then
2411   //    a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
2412   //       requestedLocales).
2413   if (matcher == kBestFit) {
2414     supported_locales =
2415         BestFitSupportedLocales(available_locales, requested_locales);
2416   } else {
2417     // 4. Else,
2418     //    a. Let supportedLocales be LookupSupportedLocales(availableLocales,
2419     //       requestedLocales).
2420     DCHECK_EQ(matcher, kLookup);
2421     supported_locales =
2422         LookupSupportedLocales(available_locales, requested_locales);
2423   }
2424 
2425   // TODO(jkummerow): Possibly revisit why the spec has the individual entries
2426   // readonly but the array is not frozen.
2427   // https://github.com/tc39/ecma402/issues/258
2428 
2429   // 5. Let subset be CreateArrayFromList(supportedLocales).
2430   // 6. Let keys be subset.[[OwnPropertyKeys]]().
2431   // 7. For each element P of keys in List order, do
2432   //    a. Let desc be PropertyDescriptor { [[Configurable]]: false,
2433   //       [[Writable]]: false }.
2434   //    b. Perform ! DefinePropertyOrThrow(subset, P, desc).
2435   MaybeHandle<JSObject> subset =
2436       CreateReadOnlyArray(isolate, supported_locales);
2437 
2438   // 8. Return subset.
2439   return subset;
2440 }
2441 }  // namespace
2442 
2443 // ECMA 402 10.2.2 Intl.Collator.supportedLocalesOf
2444 // https://tc39.github.io/ecma402/#sec-intl.collator.supportedlocalesof
2445 // of Intl::SupportedLocalesOf thru JS
SupportedLocalesOf(Isolate * isolate,Handle<String> service,Handle<Object> locales_in,Handle<Object> options_in)2446 MaybeHandle<JSObject> Intl::SupportedLocalesOf(Isolate* isolate,
2447                                                Handle<String> service,
2448                                                Handle<Object> locales_in,
2449                                                Handle<Object> options_in) {
2450   // Let availableLocales be %Collator%.[[AvailableLocales]].
2451   IcuService icu_service = Intl::StringToIcuService(service);
2452   std::set<std::string> available_locales = GetAvailableLocales(icu_service);
2453   std::vector<std::string> requested_locales;
2454   // Let requestedLocales be ? CanonicalizeLocaleList(locales).
2455   bool got_requested_locales =
2456       CanonicalizeLocaleList(isolate, locales_in, false).To(&requested_locales);
2457   if (!got_requested_locales) {
2458     return MaybeHandle<JSObject>();
2459   }
2460 
2461   // Return ? SupportedLocales(availableLocales, requestedLocales, options).
2462   std::string service_str(service->ToCString().get());
2463   return SupportedLocales(isolate, service_str, available_locales,
2464                           requested_locales, options_in);
2465 }
2466 
2467 }  // namespace internal
2468 }  // namespace v8
2469