1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 
6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h"
8 
9 #include <memory>
10 
11 #include "src/api.h"
12 #include "src/api-natives.h"
13 #include "src/arguments.h"
14 #include "src/factory.h"
15 #include "src/i18n.h"
16 #include "src/isolate-inl.h"
17 #include "src/messages.h"
18 
19 #include "unicode/brkiter.h"
20 #include "unicode/calendar.h"
21 #include "unicode/coll.h"
22 #include "unicode/curramt.h"
23 #include "unicode/datefmt.h"
24 #include "unicode/dcfmtsym.h"
25 #include "unicode/decimfmt.h"
26 #include "unicode/dtfmtsym.h"
27 #include "unicode/dtptngen.h"
28 #include "unicode/fieldpos.h"
29 #include "unicode/fpositer.h"
30 #include "unicode/locid.h"
31 #include "unicode/normalizer2.h"
32 #include "unicode/numfmt.h"
33 #include "unicode/numsys.h"
34 #include "unicode/rbbi.h"
35 #include "unicode/smpdtfmt.h"
36 #include "unicode/timezone.h"
37 #include "unicode/translit.h"
38 #include "unicode/uchar.h"
39 #include "unicode/ucol.h"
40 #include "unicode/ucurr.h"
41 #include "unicode/uloc.h"
42 #include "unicode/unistr.h"
43 #include "unicode/unum.h"
44 #include "unicode/ustring.h"
45 #include "unicode/uversion.h"
46 
47 
48 namespace v8 {
49 namespace internal {
50 namespace {
51 
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)52 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
53                                     std::unique_ptr<uc16[]>* dest,
54                                     int32_t length) {
55   DCHECK(flat.IsFlat());
56   if (flat.IsOneByte()) {
57     if (!*dest) {
58       dest->reset(NewArray<uc16>(length));
59       CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
60     }
61     return reinterpret_cast<const UChar*>(dest->get());
62   } else {
63     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
64   }
65 }
66 
67 }  // namespace
68 
69 // ECMA 402 6.2.3
RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag)70 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
71   HandleScope scope(isolate);
72   Factory* factory = isolate->factory();
73 
74   DCHECK(args.length() == 1);
75   CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
76 
77   v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
78 
79   // Return value which denotes invalid language tag.
80   // TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid
81   // language tags? If not, just add CHECK instead of returning 'invalid-tag'.
82   const char* const kInvalidTag = "invalid-tag";
83 
84   UErrorCode error = U_ZERO_ERROR;
85   char icu_result[ULOC_FULLNAME_CAPACITY];
86   int icu_length = 0;
87 
88   uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
89                       &icu_length, &error);
90   if (U_FAILURE(error) || icu_length == 0) {
91     return *factory->NewStringFromAsciiChecked(kInvalidTag);
92   }
93 
94   char result[ULOC_FULLNAME_CAPACITY];
95 
96   // Force strict BCP47 rules.
97   uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
98 
99   if (U_FAILURE(error)) {
100     return *factory->NewStringFromAsciiChecked(kInvalidTag);
101   }
102 
103   return *factory->NewStringFromAsciiChecked(result);
104 }
105 
106 
RUNTIME_FUNCTION(Runtime_AvailableLocalesOf)107 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
108   HandleScope scope(isolate);
109   Factory* factory = isolate->factory();
110 
111   DCHECK(args.length() == 1);
112   CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
113 
114   const icu::Locale* available_locales = NULL;
115   int32_t count = 0;
116 
117   if (service->IsUtf8EqualTo(CStrVector("collator"))) {
118     available_locales = icu::Collator::getAvailableLocales(count);
119   } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
120     available_locales = icu::NumberFormat::getAvailableLocales(count);
121   } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
122     available_locales = icu::DateFormat::getAvailableLocales(count);
123   } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
124     available_locales = icu::BreakIterator::getAvailableLocales(count);
125   }
126 
127   UErrorCode error = U_ZERO_ERROR;
128   char result[ULOC_FULLNAME_CAPACITY];
129   Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
130 
131   for (int32_t i = 0; i < count; ++i) {
132     const char* icu_name = available_locales[i].getName();
133 
134     error = U_ZERO_ERROR;
135     // No need to force strict BCP47 rules.
136     uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
137     if (U_FAILURE(error)) {
138       // This shouldn't happen, but lets not break the user.
139       continue;
140     }
141 
142     RETURN_FAILURE_ON_EXCEPTION(
143         isolate, JSObject::SetOwnPropertyIgnoreAttributes(
144                      locales, factory->NewStringFromAsciiChecked(result),
145                      factory->NewNumber(i), NONE));
146   }
147 
148   return *locales;
149 }
150 
151 
RUNTIME_FUNCTION(Runtime_GetDefaultICULocale)152 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
153   HandleScope scope(isolate);
154   Factory* factory = isolate->factory();
155 
156   DCHECK(args.length() == 0);
157 
158   icu::Locale default_locale;
159 
160   // Set the locale
161   char result[ULOC_FULLNAME_CAPACITY];
162   UErrorCode status = U_ZERO_ERROR;
163   uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
164                      FALSE, &status);
165   if (U_SUCCESS(status)) {
166     return *factory->NewStringFromAsciiChecked(result);
167   }
168 
169   return *factory->NewStringFromStaticChars("und");
170 }
171 
172 
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants)173 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
174   HandleScope scope(isolate);
175   Factory* factory = isolate->factory();
176 
177   DCHECK(args.length() == 1);
178 
179   CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
180 
181   uint32_t length = static_cast<uint32_t>(input->length()->Number());
182   // Set some limit to prevent fuzz tests from going OOM.
183   // Can be bumped when callers' requirements change.
184   if (length >= 100) return isolate->ThrowIllegalOperation();
185   Handle<FixedArray> output = factory->NewFixedArray(length);
186   Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
187   Handle<Name> base = factory->NewStringFromStaticChars("base");
188   for (unsigned int i = 0; i < length; ++i) {
189     Handle<Object> locale_id;
190     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
191         isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
192     if (!locale_id->IsString()) {
193       return isolate->Throw(*factory->illegal_argument_string());
194     }
195 
196     v8::String::Utf8Value utf8_locale_id(
197         v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
198 
199     UErrorCode error = U_ZERO_ERROR;
200 
201     // Convert from BCP47 to ICU format.
202     // de-DE-u-co-phonebk -> de_DE@collation=phonebook
203     char icu_locale[ULOC_FULLNAME_CAPACITY];
204     int icu_locale_length = 0;
205     uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
206                         &icu_locale_length, &error);
207     if (U_FAILURE(error) || icu_locale_length == 0) {
208       return isolate->Throw(*factory->illegal_argument_string());
209     }
210 
211     // Maximize the locale.
212     // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
213     char icu_max_locale[ULOC_FULLNAME_CAPACITY];
214     uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
215                           &error);
216 
217     // Remove extensions from maximized locale.
218     // de_Latn_DE@collation=phonebook -> de_Latn_DE
219     char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
220     uloc_getBaseName(icu_max_locale, icu_base_max_locale,
221                      ULOC_FULLNAME_CAPACITY, &error);
222 
223     // Get original name without extensions.
224     // de_DE@collation=phonebook -> de_DE
225     char icu_base_locale[ULOC_FULLNAME_CAPACITY];
226     uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
227                      &error);
228 
229     // Convert from ICU locale format to BCP47 format.
230     // de_Latn_DE -> de-Latn-DE
231     char base_max_locale[ULOC_FULLNAME_CAPACITY];
232     uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
233                        ULOC_FULLNAME_CAPACITY, FALSE, &error);
234 
235     // de_DE -> de-DE
236     char base_locale[ULOC_FULLNAME_CAPACITY];
237     uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
238                        FALSE, &error);
239 
240     if (U_FAILURE(error)) {
241       return isolate->Throw(*factory->illegal_argument_string());
242     }
243 
244     Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
245     Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
246     JSObject::AddProperty(result, maximized, value, NONE);
247     value = factory->NewStringFromAsciiChecked(base_locale);
248     JSObject::AddProperty(result, base, value, NONE);
249     output->set(i, *result);
250   }
251 
252   Handle<JSArray> result = factory->NewJSArrayWithElements(output);
253   result->set_length(Smi::FromInt(length));
254   return *result;
255 }
256 
257 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject)258 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
259   HandleScope scope(isolate);
260 
261   DCHECK(args.length() == 1);
262 
263   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
264 
265   if (!input->IsJSObject()) return isolate->heap()->false_value();
266   Handle<JSObject> obj = Handle<JSObject>::cast(input);
267 
268   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
269   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
270   return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
271 }
272 
273 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType)274 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
275   HandleScope scope(isolate);
276 
277   DCHECK(args.length() == 2);
278 
279   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
280   CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
281 
282   if (!input->IsJSObject()) return isolate->heap()->false_value();
283   Handle<JSObject> obj = Handle<JSObject>::cast(input);
284 
285   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
286   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
287   return isolate->heap()->ToBoolean(tag->IsString() &&
288                                     String::cast(*tag)->Equals(*expected_type));
289 }
290 
291 
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType)292 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
293   HandleScope scope(isolate);
294 
295   DCHECK(args.length() == 3);
296 
297   CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
298   CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
299   CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
300 
301   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
302   JSObject::SetProperty(input, marker, type, STRICT).Assert();
303 
304   marker = isolate->factory()->intl_impl_object_symbol();
305   JSObject::SetProperty(input, marker, impl, STRICT).Assert();
306 
307   return isolate->heap()->undefined_value();
308 }
309 
310 
RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject)311 RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
312   HandleScope scope(isolate);
313 
314   DCHECK(args.length() == 1);
315 
316   CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
317 
318   if (!input->IsJSObject()) {
319     THROW_NEW_ERROR_RETURN_FAILURE(
320         isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
321   }
322 
323   Handle<JSObject> obj = Handle<JSObject>::cast(input);
324 
325   Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
326 
327   Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
328   if (!impl->IsJSObject()) {
329     THROW_NEW_ERROR_RETURN_FAILURE(
330         isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
331   }
332   return *impl;
333 }
334 
335 
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat)336 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
337   HandleScope scope(isolate);
338 
339   DCHECK(args.length() == 3);
340 
341   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
342   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
343   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
344 
345   Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
346 
347   // Create an empty object wrapper.
348   Handle<JSObject> local_object;
349   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
350       isolate, local_object,
351       ApiNatives::InstantiateObject(date_format_template));
352 
353   // Set date time formatter as internal field of the resulting JS object.
354   icu::SimpleDateFormat* date_format =
355       DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
356 
357   if (!date_format) return isolate->ThrowIllegalOperation();
358 
359   local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
360 
361   Factory* factory = isolate->factory();
362   Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
363   Handle<String> value = factory->NewStringFromStaticChars("valid");
364   JSObject::AddProperty(local_object, key, value, NONE);
365 
366   // Make object handle weak so we can delete the data format once GC kicks in.
367   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
368   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
369                           DateFormat::DeleteDateFormat,
370                           WeakCallbackType::kInternalFields);
371   return *local_object;
372 }
373 
374 
RUNTIME_FUNCTION(Runtime_InternalDateFormat)375 RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
376   HandleScope scope(isolate);
377 
378   DCHECK(args.length() == 2);
379 
380   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
381   CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
382 
383   Handle<Object> value;
384   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
385 
386   icu::SimpleDateFormat* date_format =
387       DateFormat::UnpackDateFormat(isolate, date_format_holder);
388   if (!date_format) return isolate->ThrowIllegalOperation();
389 
390   icu::UnicodeString result;
391   date_format->format(value->Number(), result);
392 
393   RETURN_RESULT_OR_FAILURE(
394       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
395                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
396                    result.length())));
397 }
398 
399 namespace {
400 // The list comes from third_party/icu/source/i18n/unicode/udat.h.
401 // They're mapped to DateTimeFormat components listed at
402 // https://tc39.github.io/ecma402/#sec-datetimeformat-abstracts .
403 
IcuDateFieldIdToDateType(int32_t field_id,Isolate * isolate)404 Handle<String> IcuDateFieldIdToDateType(int32_t field_id, Isolate* isolate) {
405   switch (field_id) {
406     case -1:
407       return isolate->factory()->literal_string();
408     case UDAT_YEAR_FIELD:
409     case UDAT_EXTENDED_YEAR_FIELD:
410     case UDAT_YEAR_NAME_FIELD:
411       return isolate->factory()->year_string();
412     case UDAT_MONTH_FIELD:
413     case UDAT_STANDALONE_MONTH_FIELD:
414       return isolate->factory()->month_string();
415     case UDAT_DATE_FIELD:
416       return isolate->factory()->day_string();
417     case UDAT_HOUR_OF_DAY1_FIELD:
418     case UDAT_HOUR_OF_DAY0_FIELD:
419     case UDAT_HOUR1_FIELD:
420     case UDAT_HOUR0_FIELD:
421       return isolate->factory()->hour_string();
422     case UDAT_MINUTE_FIELD:
423       return isolate->factory()->minute_string();
424     case UDAT_SECOND_FIELD:
425       return isolate->factory()->second_string();
426     case UDAT_DAY_OF_WEEK_FIELD:
427     case UDAT_DOW_LOCAL_FIELD:
428     case UDAT_STANDALONE_DAY_FIELD:
429       return isolate->factory()->weekday_string();
430     case UDAT_AM_PM_FIELD:
431       return isolate->factory()->dayperiod_string();
432     case UDAT_TIMEZONE_FIELD:
433     case UDAT_TIMEZONE_RFC_FIELD:
434     case UDAT_TIMEZONE_GENERIC_FIELD:
435     case UDAT_TIMEZONE_SPECIAL_FIELD:
436     case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
437     case UDAT_TIMEZONE_ISO_FIELD:
438     case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
439       return isolate->factory()->timeZoneName_string();
440     case UDAT_ERA_FIELD:
441       return isolate->factory()->era_string();
442     default:
443       // Other UDAT_*_FIELD's cannot show up because there is no way to specify
444       // them via options of Intl.DateTimeFormat.
445       UNREACHABLE();
446       // To prevent MSVC from issuing C4715 warning.
447       return Handle<String>();
448   }
449 }
450 
AddElement(Handle<JSArray> array,int index,int32_t field_id,const icu::UnicodeString & formatted,int32_t begin,int32_t end,Isolate * isolate)451 bool AddElement(Handle<JSArray> array, int index, int32_t field_id,
452                 const icu::UnicodeString& formatted, int32_t begin, int32_t end,
453                 Isolate* isolate) {
454   HandleScope scope(isolate);
455   Factory* factory = isolate->factory();
456   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
457   Handle<String> value = IcuDateFieldIdToDateType(field_id, isolate);
458   JSObject::AddProperty(element, factory->type_string(), value, NONE);
459 
460   icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
461   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
462       isolate, value, factory->NewStringFromTwoByte(Vector<const uint16_t>(
463                           reinterpret_cast<const uint16_t*>(field.getBuffer()),
464                           field.length())),
465       false);
466 
467   JSObject::AddProperty(element, factory->value_string(), value, NONE);
468   RETURN_ON_EXCEPTION_VALUE(
469       isolate, JSObject::AddDataElement(array, index, element, NONE), false);
470   return true;
471 }
472 
473 }  // namespace
474 
RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts)475 RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
476   HandleScope scope(isolate);
477   Factory* factory = isolate->factory();
478 
479   DCHECK(args.length() == 2);
480 
481   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
482   CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
483 
484   Handle<Object> value;
485   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
486 
487   icu::SimpleDateFormat* date_format =
488       DateFormat::UnpackDateFormat(isolate, date_format_holder);
489   if (!date_format) return isolate->ThrowIllegalOperation();
490 
491   icu::UnicodeString formatted;
492   icu::FieldPositionIterator fp_iter;
493   icu::FieldPosition fp;
494   UErrorCode status = U_ZERO_ERROR;
495   date_format->format(value->Number(), formatted, &fp_iter, status);
496   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
497 
498   Handle<JSArray> result = factory->NewJSArray(0);
499   int32_t length = formatted.length();
500   if (length == 0) return *result;
501 
502   int index = 0;
503   int32_t previous_end_pos = 0;
504   while (fp_iter.next(fp)) {
505     int32_t begin_pos = fp.getBeginIndex();
506     int32_t end_pos = fp.getEndIndex();
507 
508     if (previous_end_pos < begin_pos) {
509       if (!AddElement(result, index, -1, formatted, previous_end_pos, begin_pos,
510                       isolate)) {
511         return isolate->heap()->undefined_value();
512       }
513       ++index;
514     }
515     if (!AddElement(result, index, fp.getField(), formatted, begin_pos, end_pos,
516                     isolate)) {
517       return isolate->heap()->undefined_value();
518     }
519     previous_end_pos = end_pos;
520     ++index;
521   }
522   if (previous_end_pos < length) {
523     if (!AddElement(result, index, -1, formatted, previous_end_pos, length,
524                     isolate)) {
525       return isolate->heap()->undefined_value();
526     }
527   }
528   JSObject::ValidateElements(result);
529   return *result;
530 }
531 
RUNTIME_FUNCTION(Runtime_InternalDateParse)532 RUNTIME_FUNCTION(Runtime_InternalDateParse) {
533   HandleScope scope(isolate);
534 
535   DCHECK(args.length() == 2);
536 
537   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
538   CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
539 
540   v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
541   icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
542   icu::SimpleDateFormat* date_format =
543       DateFormat::UnpackDateFormat(isolate, date_format_holder);
544   if (!date_format) return isolate->ThrowIllegalOperation();
545 
546   UErrorCode status = U_ZERO_ERROR;
547   UDate date = date_format->parse(u_date, status);
548   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
549 
550   RETURN_RESULT_OR_FAILURE(
551       isolate, JSDate::New(isolate->date_function(), isolate->date_function(),
552                            static_cast<double>(date)));
553 }
554 
555 
RUNTIME_FUNCTION(Runtime_CreateNumberFormat)556 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
557   HandleScope scope(isolate);
558 
559   DCHECK(args.length() == 3);
560 
561   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
562   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
563   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
564 
565   Handle<ObjectTemplateInfo> number_format_template =
566       I18N::GetTemplate(isolate);
567 
568   // Create an empty object wrapper.
569   Handle<JSObject> local_object;
570   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
571       isolate, local_object,
572       ApiNatives::InstantiateObject(number_format_template));
573 
574   // Set number formatter as internal field of the resulting JS object.
575   icu::DecimalFormat* number_format =
576       NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
577 
578   if (!number_format) return isolate->ThrowIllegalOperation();
579 
580   local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
581 
582   Factory* factory = isolate->factory();
583   Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
584   Handle<String> value = factory->NewStringFromStaticChars("valid");
585   JSObject::AddProperty(local_object, key, value, NONE);
586 
587   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
588   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
589                           NumberFormat::DeleteNumberFormat,
590                           WeakCallbackType::kInternalFields);
591   return *local_object;
592 }
593 
594 
RUNTIME_FUNCTION(Runtime_InternalNumberFormat)595 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
596   HandleScope scope(isolate);
597 
598   DCHECK(args.length() == 2);
599 
600   CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
601   CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
602 
603   Handle<Object> value;
604   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
605 
606   icu::DecimalFormat* number_format =
607       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
608   if (!number_format) return isolate->ThrowIllegalOperation();
609 
610   icu::UnicodeString result;
611   number_format->format(value->Number(), result);
612 
613   RETURN_RESULT_OR_FAILURE(
614       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
615                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
616                    result.length())));
617 }
618 
619 
RUNTIME_FUNCTION(Runtime_InternalNumberParse)620 RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
621   HandleScope scope(isolate);
622 
623   DCHECK(args.length() == 2);
624 
625   CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
626   CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
627 
628   isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
629 
630   v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
631   icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
632   icu::DecimalFormat* number_format =
633       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
634   if (!number_format) return isolate->ThrowIllegalOperation();
635 
636   UErrorCode status = U_ZERO_ERROR;
637   icu::Formattable result;
638   // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
639   // to be part of Chrome.
640   // TODO(cira): Include currency parsing code using parseCurrency call.
641   // We need to check if the formatter parses all currencies or only the
642   // one it was constructed with (it will impact the API - how to return ISO
643   // code and the value).
644   number_format->parse(u_number, result, status);
645   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
646 
647   switch (result.getType()) {
648     case icu::Formattable::kDouble:
649       return *isolate->factory()->NewNumber(result.getDouble());
650     case icu::Formattable::kLong:
651       return *isolate->factory()->NewNumberFromInt(result.getLong());
652     case icu::Formattable::kInt64:
653       return *isolate->factory()->NewNumber(
654           static_cast<double>(result.getInt64()));
655     default:
656       return isolate->heap()->undefined_value();
657   }
658 }
659 
660 
RUNTIME_FUNCTION(Runtime_CreateCollator)661 RUNTIME_FUNCTION(Runtime_CreateCollator) {
662   HandleScope scope(isolate);
663 
664   DCHECK(args.length() == 3);
665 
666   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
667   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
668   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
669 
670   Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
671 
672   // Create an empty object wrapper.
673   Handle<JSObject> local_object;
674   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
675       isolate, local_object, ApiNatives::InstantiateObject(collator_template));
676 
677   // Set collator as internal field of the resulting JS object.
678   icu::Collator* collator =
679       Collator::InitializeCollator(isolate, locale, options, resolved);
680 
681   if (!collator) return isolate->ThrowIllegalOperation();
682 
683   local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
684 
685   Factory* factory = isolate->factory();
686   Handle<String> key = factory->NewStringFromStaticChars("collator");
687   Handle<String> value = factory->NewStringFromStaticChars("valid");
688   JSObject::AddProperty(local_object, key, value, NONE);
689 
690   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
691   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
692                           Collator::DeleteCollator,
693                           WeakCallbackType::kInternalFields);
694   return *local_object;
695 }
696 
697 
RUNTIME_FUNCTION(Runtime_InternalCompare)698 RUNTIME_FUNCTION(Runtime_InternalCompare) {
699   HandleScope scope(isolate);
700 
701   DCHECK(args.length() == 3);
702 
703   CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
704   CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
705   CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
706 
707   icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
708   if (!collator) return isolate->ThrowIllegalOperation();
709 
710   string1 = String::Flatten(string1);
711   string2 = String::Flatten(string2);
712 
713   UCollationResult result;
714   UErrorCode status = U_ZERO_ERROR;
715   {
716     DisallowHeapAllocation no_gc;
717     int32_t length1 = string1->length();
718     int32_t length2 = string2->length();
719     String::FlatContent flat1 = string1->GetFlatContent();
720     String::FlatContent flat2 = string2->GetFlatContent();
721     std::unique_ptr<uc16[]> sap1;
722     std::unique_ptr<uc16[]> sap2;
723     const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
724     const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
725     result =
726         collator->compare(string_val1, length1, string_val2, length2, status);
727   }
728   if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
729 
730   return *isolate->factory()->NewNumberFromInt(result);
731 }
732 
733 
RUNTIME_FUNCTION(Runtime_StringNormalize)734 RUNTIME_FUNCTION(Runtime_StringNormalize) {
735   HandleScope scope(isolate);
736   static const struct {
737     const char* name;
738     UNormalization2Mode mode;
739   } normalizationForms[] = {
740       {"nfc", UNORM2_COMPOSE},
741       {"nfc", UNORM2_DECOMPOSE},
742       {"nfkc", UNORM2_COMPOSE},
743       {"nfkc", UNORM2_DECOMPOSE},
744   };
745 
746   DCHECK(args.length() == 2);
747 
748   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
749   CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
750   CHECK(form_id >= 0 &&
751         static_cast<size_t>(form_id) < arraysize(normalizationForms));
752 
753   int length = s->length();
754   s = String::Flatten(s);
755   icu::UnicodeString result;
756   std::unique_ptr<uc16[]> sap;
757   UErrorCode status = U_ZERO_ERROR;
758   {
759     DisallowHeapAllocation no_gc;
760     String::FlatContent flat = s->GetFlatContent();
761     const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
762     icu::UnicodeString input(false, src, length);
763     // Getting a singleton. Should not free it.
764     const icu::Normalizer2* normalizer =
765         icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
766                                       normalizationForms[form_id].mode, status);
767     DCHECK(U_SUCCESS(status));
768     CHECK(normalizer != nullptr);
769     int32_t normalized_prefix_length =
770         normalizer->spanQuickCheckYes(input, status);
771     // Quick return if the input is already normalized.
772     if (length == normalized_prefix_length) return *s;
773     icu::UnicodeString unnormalized =
774         input.tempSubString(normalized_prefix_length);
775     // Read-only alias of the normalized prefix.
776     result.setTo(false, input.getBuffer(), normalized_prefix_length);
777     // copy-on-write; normalize the suffix and append to |result|.
778     normalizer->normalizeSecondAndAppend(result, unnormalized, status);
779   }
780 
781   if (U_FAILURE(status)) {
782     return isolate->heap()->undefined_value();
783   }
784 
785   RETURN_RESULT_OR_FAILURE(
786       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
787                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
788                    result.length())));
789 }
790 
791 
RUNTIME_FUNCTION(Runtime_CreateBreakIterator)792 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
793   HandleScope scope(isolate);
794 
795   DCHECK(args.length() == 3);
796 
797   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
798   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
799   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
800 
801   Handle<ObjectTemplateInfo> break_iterator_template =
802       I18N::GetTemplate2(isolate);
803 
804   // Create an empty object wrapper.
805   Handle<JSObject> local_object;
806   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
807       isolate, local_object,
808       ApiNatives::InstantiateObject(break_iterator_template));
809 
810   // Set break iterator as internal field of the resulting JS object.
811   icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
812       isolate, locale, options, resolved);
813 
814   if (!break_iterator) return isolate->ThrowIllegalOperation();
815 
816   local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
817   // Make sure that the pointer to adopted text is NULL.
818   local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
819 
820   Factory* factory = isolate->factory();
821   Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
822   Handle<String> value = factory->NewStringFromStaticChars("valid");
823   JSObject::AddProperty(local_object, key, value, NONE);
824 
825   // Make object handle weak so we can delete the break iterator once GC kicks
826   // in.
827   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
828   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
829                           BreakIterator::DeleteBreakIterator,
830                           WeakCallbackType::kInternalFields);
831   return *local_object;
832 }
833 
834 
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText)835 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
836   HandleScope scope(isolate);
837 
838   DCHECK(args.length() == 2);
839 
840   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
841   CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
842 
843   icu::BreakIterator* break_iterator =
844       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
845   if (!break_iterator) return isolate->ThrowIllegalOperation();
846 
847   icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
848       break_iterator_holder->GetInternalField(1));
849   delete u_text;
850 
851   int length = text->length();
852   text = String::Flatten(text);
853   DisallowHeapAllocation no_gc;
854   String::FlatContent flat = text->GetFlatContent();
855   std::unique_ptr<uc16[]> sap;
856   const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
857   u_text = new icu::UnicodeString(text_value, length);
858   break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
859 
860   break_iterator->setText(*u_text);
861 
862   return isolate->heap()->undefined_value();
863 }
864 
865 
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst)866 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
867   HandleScope scope(isolate);
868 
869   DCHECK(args.length() == 1);
870 
871   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
872 
873   icu::BreakIterator* break_iterator =
874       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
875   if (!break_iterator) return isolate->ThrowIllegalOperation();
876 
877   return *isolate->factory()->NewNumberFromInt(break_iterator->first());
878 }
879 
880 
RUNTIME_FUNCTION(Runtime_BreakIteratorNext)881 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
882   HandleScope scope(isolate);
883 
884   DCHECK(args.length() == 1);
885 
886   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
887 
888   icu::BreakIterator* break_iterator =
889       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
890   if (!break_iterator) return isolate->ThrowIllegalOperation();
891 
892   return *isolate->factory()->NewNumberFromInt(break_iterator->next());
893 }
894 
895 
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent)896 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
897   HandleScope scope(isolate);
898 
899   DCHECK(args.length() == 1);
900 
901   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
902 
903   icu::BreakIterator* break_iterator =
904       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
905   if (!break_iterator) return isolate->ThrowIllegalOperation();
906 
907   return *isolate->factory()->NewNumberFromInt(break_iterator->current());
908 }
909 
910 
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType)911 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
912   HandleScope scope(isolate);
913 
914   DCHECK(args.length() == 1);
915 
916   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
917 
918   icu::BreakIterator* break_iterator =
919       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
920   if (!break_iterator) return isolate->ThrowIllegalOperation();
921 
922   // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
923   icu::RuleBasedBreakIterator* rule_based_iterator =
924       static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
925   int32_t status = rule_based_iterator->getRuleStatus();
926   // Keep return values in sync with JavaScript BreakType enum.
927   if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
928     return *isolate->factory()->NewStringFromStaticChars("none");
929   } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
930     return isolate->heap()->number_string();
931   } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
932     return *isolate->factory()->NewStringFromStaticChars("letter");
933   } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
934     return *isolate->factory()->NewStringFromStaticChars("kana");
935   } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
936     return *isolate->factory()->NewStringFromStaticChars("ideo");
937   } else {
938     return *isolate->factory()->NewStringFromStaticChars("unknown");
939   }
940 }
941 
942 namespace {
LocaleConvertCase(Handle<String> s,Isolate * isolate,bool is_to_upper,const char * lang)943 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
944                                           bool is_to_upper, const char* lang) {
945   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
946   int32_t src_length = s->length();
947   int32_t dest_length = src_length;
948   UErrorCode status;
949   Handle<SeqTwoByteString> result;
950   std::unique_ptr<uc16[]> sap;
951 
952   // This is not a real loop. It'll be executed only once (no overflow) or
953   // twice (overflow).
954   for (int i = 0; i < 2; ++i) {
955     // Case conversion can increase the string length (e.g. sharp-S => SS) so
956     // that we have to handle RangeError exceptions here.
957     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
958         isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
959     DisallowHeapAllocation no_gc;
960     String::FlatContent flat = s->GetFlatContent();
961     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
962     status = U_ZERO_ERROR;
963     dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
964                                  dest_length, src, src_length, lang, &status);
965     if (status != U_BUFFER_OVERFLOW_ERROR) break;
966   }
967 
968   // In most cases, the output will fill the destination buffer completely
969   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
970   // Only in rare cases, it'll be shorter than the destination buffer and
971   // |result| has to be truncated.
972   DCHECK(U_SUCCESS(status));
973   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
974     DCHECK(dest_length == result->length());
975     return *result;
976   }
977   if (U_SUCCESS(status)) {
978     DCHECK(dest_length < result->length());
979     return *Handle<SeqTwoByteString>::cast(
980         SeqString::Truncate(result, dest_length));
981   }
982   return *s;
983 }
984 
IsASCIIUpper(uint16_t ch)985 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
986 
987 const uint8_t kToLower[256] = {
988     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
989     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
990     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
991     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
992     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
993     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
994     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
995     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
996     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
997     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
998     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
999     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
1000     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
1001     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1002     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
1003     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
1004     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
1005     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
1006     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
1007     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
1008     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
1009     0xFC, 0xFD, 0xFE, 0xFF,
1010 };
1011 
ToLatin1Lower(uint16_t ch)1012 inline uint16_t ToLatin1Lower(uint16_t ch) {
1013   return static_cast<uint16_t>(kToLower[ch]);
1014 }
1015 
ToASCIIUpper(uint16_t ch)1016 inline uint16_t ToASCIIUpper(uint16_t ch) {
1017   return ch & ~((ch >= 'a' && ch <= 'z') << 5);
1018 }
1019 
1020 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)1021 inline uint16_t ToLatin1Upper(uint16_t ch) {
1022   DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
1023   return ch &
1024          ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
1025            << 5);
1026 }
1027 
1028 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)1029 bool ToUpperFastASCII(const Vector<const Char>& src,
1030                       Handle<SeqOneByteString> result) {
1031   // Do a faster loop for the case where all the characters are ASCII.
1032   uint16_t ored = 0;
1033   int32_t index = 0;
1034   for (auto it = src.begin(); it != src.end(); ++it) {
1035     uint16_t ch = static_cast<uint16_t>(*it);
1036     ored |= ch;
1037     result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
1038   }
1039   return !(ored & ~0x7F);
1040 }
1041 
1042 const uint16_t sharp_s = 0xDF;
1043 
1044 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,Handle<SeqOneByteString> result,int * sharp_s_count)1045 bool ToUpperOneByte(const Vector<const Char>& src,
1046                     Handle<SeqOneByteString> result, int* sharp_s_count) {
1047   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
1048 
1049   // There are two special cases.
1050   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
1051   //  2. Lower case sharp-S converts to "SS" (two characters)
1052   *sharp_s_count = 0;
1053   int32_t index = 0;
1054   for (auto it = src.begin(); it != src.end(); ++it) {
1055     uint16_t ch = static_cast<uint16_t>(*it);
1056     if (V8_UNLIKELY(ch == sharp_s)) {
1057       ++(*sharp_s_count);
1058       continue;
1059     }
1060     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
1061       // Since this upper-cased character does not fit in an 8-bit string, we
1062       // need to take the 16-bit path.
1063       return false;
1064     }
1065     result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
1066   }
1067 
1068   return true;
1069 }
1070 
1071 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)1072 void ToUpperWithSharpS(const Vector<const Char>& src,
1073                        Handle<SeqOneByteString> result) {
1074   int32_t dest_index = 0;
1075   for (auto it = src.begin(); it != src.end(); ++it) {
1076     uint16_t ch = static_cast<uint16_t>(*it);
1077     if (ch == sharp_s) {
1078       result->SeqOneByteStringSet(dest_index++, 'S');
1079       result->SeqOneByteStringSet(dest_index++, 'S');
1080     } else {
1081       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
1082     }
1083   }
1084 }
1085 
1086 }  // namespace
1087 
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N)1088 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
1089   HandleScope scope(isolate);
1090   DCHECK_EQ(args.length(), 1);
1091   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1092 
1093   int length = s->length();
1094   s = String::Flatten(s);
1095   // First scan the string for uppercase and non-ASCII characters:
1096   if (s->HasOnlyOneByteChars()) {
1097     int first_index_to_lower = length;
1098     for (int index = 0; index < length; ++index) {
1099       // Blink specializes this path for one-byte strings, so it
1100       // does not need to do a generic get, but can do the equivalent
1101       // of SeqOneByteStringGet.
1102       uint16_t ch = s->Get(index);
1103       if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
1104         first_index_to_lower = index;
1105         break;
1106       }
1107     }
1108 
1109     // Nothing to do if the string is all ASCII with no uppercase.
1110     if (first_index_to_lower == length) return *s;
1111 
1112     // We depend here on the invariant that the length of a Latin1
1113     // string is invariant under ToLowerCase, and the result always
1114     // fits in the Latin1 range in the *root locale*. It does not hold
1115     // for ToUpperCase even in the root locale.
1116     Handle<SeqOneByteString> result;
1117     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1118         isolate, result, isolate->factory()->NewRawOneByteString(length));
1119 
1120     DisallowHeapAllocation no_gc;
1121     String::FlatContent flat = s->GetFlatContent();
1122     if (flat.IsOneByte()) {
1123       const uint8_t* src = flat.ToOneByteVector().start();
1124       CopyChars(result->GetChars(), src,
1125                 static_cast<size_t>(first_index_to_lower));
1126       for (int index = first_index_to_lower; index < length; ++index) {
1127         uint16_t ch = static_cast<uint16_t>(src[index]);
1128         result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1129       }
1130     } else {
1131       const uint16_t* src = flat.ToUC16Vector().start();
1132       CopyChars(result->GetChars(), src,
1133                 static_cast<size_t>(first_index_to_lower));
1134       for (int index = first_index_to_lower; index < length; ++index) {
1135         uint16_t ch = src[index];
1136         result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1137       }
1138     }
1139 
1140     return *result;
1141   }
1142 
1143   // Blink had an additional case here for ASCII 2-byte strings, but
1144   // that is subsumed by the above code (assuming there isn't a false
1145   // negative for HasOnlyOneByteChars).
1146 
1147   // Do a slower implementation for cases that include non-ASCII characters.
1148   return LocaleConvertCase(s, isolate, false, "");
1149 }
1150 
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N)1151 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1152   HandleScope scope(isolate);
1153   DCHECK_EQ(args.length(), 1);
1154   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1155 
1156   // This function could be optimized for no-op cases the way lowercase
1157   // counterpart is, but in empirical testing, few actual calls to upper()
1158   // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
1159 
1160   int32_t length = s->length();
1161   s = String::Flatten(s);
1162 
1163   if (s->HasOnlyOneByteChars()) {
1164     Handle<SeqOneByteString> result;
1165     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1166         isolate, result, isolate->factory()->NewRawOneByteString(length));
1167 
1168     int sharp_s_count;
1169     bool is_result_single_byte;
1170     {
1171       DisallowHeapAllocation no_gc;
1172       String::FlatContent flat = s->GetFlatContent();
1173       // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
1174       // could be removed  because ToUpperOneByte is pretty fast now (it
1175       // does not call ICU API any more.).
1176       if (flat.IsOneByte()) {
1177         Vector<const uint8_t> src = flat.ToOneByteVector();
1178         if (ToUpperFastASCII(src, result)) return *result;
1179         is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1180       } else {
1181         DCHECK(flat.IsTwoByte());
1182         Vector<const uint16_t> src = flat.ToUC16Vector();
1183         if (ToUpperFastASCII(src, result)) return *result;
1184         is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1185       }
1186     }
1187 
1188     // Go to the full Unicode path if there are characters whose uppercase
1189     // is beyond the Latin-1 range (cannot be represented in OneByteString).
1190     if (V8_UNLIKELY(!is_result_single_byte)) {
1191       return LocaleConvertCase(s, isolate, true, "");
1192     }
1193 
1194     if (sharp_s_count == 0) return *result;
1195 
1196     // We have sharp_s_count sharp-s characters, but the result is still
1197     // in the Latin-1 range.
1198     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1199         isolate, result,
1200         isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1201     DisallowHeapAllocation no_gc;
1202     String::FlatContent flat = s->GetFlatContent();
1203     if (flat.IsOneByte()) {
1204       ToUpperWithSharpS(flat.ToOneByteVector(), result);
1205     } else {
1206       ToUpperWithSharpS(flat.ToUC16Vector(), result);
1207     }
1208 
1209     return *result;
1210   }
1211 
1212   return LocaleConvertCase(s, isolate, true, "");
1213 }
1214 
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase)1215 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1216   HandleScope scope(isolate);
1217   DCHECK_EQ(args.length(), 3);
1218   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1219   CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1220   CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
1221 
1222   // All the languages requiring special handling ("az", "el", "lt", "tr")
1223   // have a 2-letter language code.
1224   DCHECK(lang->length() == 2);
1225   uint8_t lang_str[3];
1226   memcpy(lang_str, lang->GetChars(), 2);
1227   lang_str[2] = 0;
1228   s = String::Flatten(s);
1229   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1230   // in the root locale needs to be adjusted for az, lt and tr because even case
1231   // mapping of ASCII range characters are different in those locales.
1232   // Greek (el) does not require any adjustment, though.
1233   return LocaleConvertCase(s, isolate, is_upper,
1234                            reinterpret_cast<const char*>(lang_str));
1235 }
1236 
RUNTIME_FUNCTION(Runtime_DateCacheVersion)1237 RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1238   HandleScope scope(isolate);
1239   DCHECK_EQ(0, args.length());
1240   if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1241   if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1242     Handle<FixedArray> date_cache_version =
1243         isolate->factory()->NewFixedArray(1, TENURED);
1244     date_cache_version->set(0, Smi::kZero);
1245     isolate->eternal_handles()->CreateSingleton(
1246         isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1247   }
1248   Handle<FixedArray> date_cache_version =
1249       Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1250           EternalHandles::DATE_CACHE_VERSION));
1251   return date_cache_version->get(0);
1252 }
1253 
1254 }  // namespace internal
1255 }  // namespace v8
1256 
1257 #endif  // V8_I18N_SUPPORT
1258