1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5
6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h"
8
9 #include <memory>
10
11 #include "src/api.h"
12 #include "src/api-natives.h"
13 #include "src/arguments.h"
14 #include "src/factory.h"
15 #include "src/i18n.h"
16 #include "src/isolate-inl.h"
17 #include "src/messages.h"
18
19 #include "unicode/brkiter.h"
20 #include "unicode/calendar.h"
21 #include "unicode/coll.h"
22 #include "unicode/curramt.h"
23 #include "unicode/datefmt.h"
24 #include "unicode/dcfmtsym.h"
25 #include "unicode/decimfmt.h"
26 #include "unicode/dtfmtsym.h"
27 #include "unicode/dtptngen.h"
28 #include "unicode/fieldpos.h"
29 #include "unicode/fpositer.h"
30 #include "unicode/locid.h"
31 #include "unicode/normalizer2.h"
32 #include "unicode/numfmt.h"
33 #include "unicode/numsys.h"
34 #include "unicode/rbbi.h"
35 #include "unicode/smpdtfmt.h"
36 #include "unicode/timezone.h"
37 #include "unicode/translit.h"
38 #include "unicode/uchar.h"
39 #include "unicode/ucol.h"
40 #include "unicode/ucurr.h"
41 #include "unicode/uloc.h"
42 #include "unicode/unistr.h"
43 #include "unicode/unum.h"
44 #include "unicode/ustring.h"
45 #include "unicode/uversion.h"
46
47
48 namespace v8 {
49 namespace internal {
50 namespace {
51
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)52 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
53 std::unique_ptr<uc16[]>* dest,
54 int32_t length) {
55 DCHECK(flat.IsFlat());
56 if (flat.IsOneByte()) {
57 if (!*dest) {
58 dest->reset(NewArray<uc16>(length));
59 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
60 }
61 return reinterpret_cast<const UChar*>(dest->get());
62 } else {
63 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
64 }
65 }
66
67 } // namespace
68
69 // ECMA 402 6.2.3
RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag)70 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
71 HandleScope scope(isolate);
72 Factory* factory = isolate->factory();
73
74 DCHECK(args.length() == 1);
75 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
76
77 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
78
79 // Return value which denotes invalid language tag.
80 // TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid
81 // language tags? If not, just add CHECK instead of returning 'invalid-tag'.
82 const char* const kInvalidTag = "invalid-tag";
83
84 UErrorCode error = U_ZERO_ERROR;
85 char icu_result[ULOC_FULLNAME_CAPACITY];
86 int icu_length = 0;
87
88 uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
89 &icu_length, &error);
90 if (U_FAILURE(error) || icu_length == 0) {
91 return *factory->NewStringFromAsciiChecked(kInvalidTag);
92 }
93
94 char result[ULOC_FULLNAME_CAPACITY];
95
96 // Force strict BCP47 rules.
97 uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
98
99 if (U_FAILURE(error)) {
100 return *factory->NewStringFromAsciiChecked(kInvalidTag);
101 }
102
103 return *factory->NewStringFromAsciiChecked(result);
104 }
105
106
RUNTIME_FUNCTION(Runtime_AvailableLocalesOf)107 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
108 HandleScope scope(isolate);
109 Factory* factory = isolate->factory();
110
111 DCHECK(args.length() == 1);
112 CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
113
114 const icu::Locale* available_locales = NULL;
115 int32_t count = 0;
116
117 if (service->IsUtf8EqualTo(CStrVector("collator"))) {
118 available_locales = icu::Collator::getAvailableLocales(count);
119 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
120 available_locales = icu::NumberFormat::getAvailableLocales(count);
121 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
122 available_locales = icu::DateFormat::getAvailableLocales(count);
123 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
124 available_locales = icu::BreakIterator::getAvailableLocales(count);
125 }
126
127 UErrorCode error = U_ZERO_ERROR;
128 char result[ULOC_FULLNAME_CAPACITY];
129 Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
130
131 for (int32_t i = 0; i < count; ++i) {
132 const char* icu_name = available_locales[i].getName();
133
134 error = U_ZERO_ERROR;
135 // No need to force strict BCP47 rules.
136 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
137 if (U_FAILURE(error)) {
138 // This shouldn't happen, but lets not break the user.
139 continue;
140 }
141
142 RETURN_FAILURE_ON_EXCEPTION(
143 isolate, JSObject::SetOwnPropertyIgnoreAttributes(
144 locales, factory->NewStringFromAsciiChecked(result),
145 factory->NewNumber(i), NONE));
146 }
147
148 return *locales;
149 }
150
151
RUNTIME_FUNCTION(Runtime_GetDefaultICULocale)152 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
153 HandleScope scope(isolate);
154 Factory* factory = isolate->factory();
155
156 DCHECK(args.length() == 0);
157
158 icu::Locale default_locale;
159
160 // Set the locale
161 char result[ULOC_FULLNAME_CAPACITY];
162 UErrorCode status = U_ZERO_ERROR;
163 uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
164 FALSE, &status);
165 if (U_SUCCESS(status)) {
166 return *factory->NewStringFromAsciiChecked(result);
167 }
168
169 return *factory->NewStringFromStaticChars("und");
170 }
171
172
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants)173 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
174 HandleScope scope(isolate);
175 Factory* factory = isolate->factory();
176
177 DCHECK(args.length() == 1);
178
179 CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
180
181 uint32_t length = static_cast<uint32_t>(input->length()->Number());
182 // Set some limit to prevent fuzz tests from going OOM.
183 // Can be bumped when callers' requirements change.
184 if (length >= 100) return isolate->ThrowIllegalOperation();
185 Handle<FixedArray> output = factory->NewFixedArray(length);
186 Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
187 Handle<Name> base = factory->NewStringFromStaticChars("base");
188 for (unsigned int i = 0; i < length; ++i) {
189 Handle<Object> locale_id;
190 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
191 isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
192 if (!locale_id->IsString()) {
193 return isolate->Throw(*factory->illegal_argument_string());
194 }
195
196 v8::String::Utf8Value utf8_locale_id(
197 v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
198
199 UErrorCode error = U_ZERO_ERROR;
200
201 // Convert from BCP47 to ICU format.
202 // de-DE-u-co-phonebk -> de_DE@collation=phonebook
203 char icu_locale[ULOC_FULLNAME_CAPACITY];
204 int icu_locale_length = 0;
205 uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
206 &icu_locale_length, &error);
207 if (U_FAILURE(error) || icu_locale_length == 0) {
208 return isolate->Throw(*factory->illegal_argument_string());
209 }
210
211 // Maximize the locale.
212 // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
213 char icu_max_locale[ULOC_FULLNAME_CAPACITY];
214 uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
215 &error);
216
217 // Remove extensions from maximized locale.
218 // de_Latn_DE@collation=phonebook -> de_Latn_DE
219 char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
220 uloc_getBaseName(icu_max_locale, icu_base_max_locale,
221 ULOC_FULLNAME_CAPACITY, &error);
222
223 // Get original name without extensions.
224 // de_DE@collation=phonebook -> de_DE
225 char icu_base_locale[ULOC_FULLNAME_CAPACITY];
226 uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
227 &error);
228
229 // Convert from ICU locale format to BCP47 format.
230 // de_Latn_DE -> de-Latn-DE
231 char base_max_locale[ULOC_FULLNAME_CAPACITY];
232 uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
233 ULOC_FULLNAME_CAPACITY, FALSE, &error);
234
235 // de_DE -> de-DE
236 char base_locale[ULOC_FULLNAME_CAPACITY];
237 uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
238 FALSE, &error);
239
240 if (U_FAILURE(error)) {
241 return isolate->Throw(*factory->illegal_argument_string());
242 }
243
244 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
245 Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
246 JSObject::AddProperty(result, maximized, value, NONE);
247 value = factory->NewStringFromAsciiChecked(base_locale);
248 JSObject::AddProperty(result, base, value, NONE);
249 output->set(i, *result);
250 }
251
252 Handle<JSArray> result = factory->NewJSArrayWithElements(output);
253 result->set_length(Smi::FromInt(length));
254 return *result;
255 }
256
257
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject)258 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
259 HandleScope scope(isolate);
260
261 DCHECK(args.length() == 1);
262
263 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
264
265 if (!input->IsJSObject()) return isolate->heap()->false_value();
266 Handle<JSObject> obj = Handle<JSObject>::cast(input);
267
268 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
269 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
270 return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
271 }
272
273
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType)274 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
275 HandleScope scope(isolate);
276
277 DCHECK(args.length() == 2);
278
279 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
280 CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
281
282 if (!input->IsJSObject()) return isolate->heap()->false_value();
283 Handle<JSObject> obj = Handle<JSObject>::cast(input);
284
285 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
286 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
287 return isolate->heap()->ToBoolean(tag->IsString() &&
288 String::cast(*tag)->Equals(*expected_type));
289 }
290
291
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType)292 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
293 HandleScope scope(isolate);
294
295 DCHECK(args.length() == 3);
296
297 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
298 CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
299 CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
300
301 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
302 JSObject::SetProperty(input, marker, type, STRICT).Assert();
303
304 marker = isolate->factory()->intl_impl_object_symbol();
305 JSObject::SetProperty(input, marker, impl, STRICT).Assert();
306
307 return isolate->heap()->undefined_value();
308 }
309
310
RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject)311 RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
312 HandleScope scope(isolate);
313
314 DCHECK(args.length() == 1);
315
316 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
317
318 if (!input->IsJSObject()) {
319 THROW_NEW_ERROR_RETURN_FAILURE(
320 isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
321 }
322
323 Handle<JSObject> obj = Handle<JSObject>::cast(input);
324
325 Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
326
327 Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
328 if (!impl->IsJSObject()) {
329 THROW_NEW_ERROR_RETURN_FAILURE(
330 isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
331 }
332 return *impl;
333 }
334
335
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat)336 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
337 HandleScope scope(isolate);
338
339 DCHECK(args.length() == 3);
340
341 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
342 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
343 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
344
345 Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
346
347 // Create an empty object wrapper.
348 Handle<JSObject> local_object;
349 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
350 isolate, local_object,
351 ApiNatives::InstantiateObject(date_format_template));
352
353 // Set date time formatter as internal field of the resulting JS object.
354 icu::SimpleDateFormat* date_format =
355 DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
356
357 if (!date_format) return isolate->ThrowIllegalOperation();
358
359 local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
360
361 Factory* factory = isolate->factory();
362 Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
363 Handle<String> value = factory->NewStringFromStaticChars("valid");
364 JSObject::AddProperty(local_object, key, value, NONE);
365
366 // Make object handle weak so we can delete the data format once GC kicks in.
367 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
368 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
369 DateFormat::DeleteDateFormat,
370 WeakCallbackType::kInternalFields);
371 return *local_object;
372 }
373
374
RUNTIME_FUNCTION(Runtime_InternalDateFormat)375 RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
376 HandleScope scope(isolate);
377
378 DCHECK(args.length() == 2);
379
380 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
381 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
382
383 Handle<Object> value;
384 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
385
386 icu::SimpleDateFormat* date_format =
387 DateFormat::UnpackDateFormat(isolate, date_format_holder);
388 if (!date_format) return isolate->ThrowIllegalOperation();
389
390 icu::UnicodeString result;
391 date_format->format(value->Number(), result);
392
393 RETURN_RESULT_OR_FAILURE(
394 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
395 reinterpret_cast<const uint16_t*>(result.getBuffer()),
396 result.length())));
397 }
398
399 namespace {
400 // The list comes from third_party/icu/source/i18n/unicode/udat.h.
401 // They're mapped to DateTimeFormat components listed at
402 // https://tc39.github.io/ecma402/#sec-datetimeformat-abstracts .
403
IcuDateFieldIdToDateType(int32_t field_id,Isolate * isolate)404 Handle<String> IcuDateFieldIdToDateType(int32_t field_id, Isolate* isolate) {
405 switch (field_id) {
406 case -1:
407 return isolate->factory()->literal_string();
408 case UDAT_YEAR_FIELD:
409 case UDAT_EXTENDED_YEAR_FIELD:
410 case UDAT_YEAR_NAME_FIELD:
411 return isolate->factory()->year_string();
412 case UDAT_MONTH_FIELD:
413 case UDAT_STANDALONE_MONTH_FIELD:
414 return isolate->factory()->month_string();
415 case UDAT_DATE_FIELD:
416 return isolate->factory()->day_string();
417 case UDAT_HOUR_OF_DAY1_FIELD:
418 case UDAT_HOUR_OF_DAY0_FIELD:
419 case UDAT_HOUR1_FIELD:
420 case UDAT_HOUR0_FIELD:
421 return isolate->factory()->hour_string();
422 case UDAT_MINUTE_FIELD:
423 return isolate->factory()->minute_string();
424 case UDAT_SECOND_FIELD:
425 return isolate->factory()->second_string();
426 case UDAT_DAY_OF_WEEK_FIELD:
427 case UDAT_DOW_LOCAL_FIELD:
428 case UDAT_STANDALONE_DAY_FIELD:
429 return isolate->factory()->weekday_string();
430 case UDAT_AM_PM_FIELD:
431 return isolate->factory()->dayperiod_string();
432 case UDAT_TIMEZONE_FIELD:
433 case UDAT_TIMEZONE_RFC_FIELD:
434 case UDAT_TIMEZONE_GENERIC_FIELD:
435 case UDAT_TIMEZONE_SPECIAL_FIELD:
436 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
437 case UDAT_TIMEZONE_ISO_FIELD:
438 case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
439 return isolate->factory()->timeZoneName_string();
440 case UDAT_ERA_FIELD:
441 return isolate->factory()->era_string();
442 default:
443 // Other UDAT_*_FIELD's cannot show up because there is no way to specify
444 // them via options of Intl.DateTimeFormat.
445 UNREACHABLE();
446 // To prevent MSVC from issuing C4715 warning.
447 return Handle<String>();
448 }
449 }
450
AddElement(Handle<JSArray> array,int index,int32_t field_id,const icu::UnicodeString & formatted,int32_t begin,int32_t end,Isolate * isolate)451 bool AddElement(Handle<JSArray> array, int index, int32_t field_id,
452 const icu::UnicodeString& formatted, int32_t begin, int32_t end,
453 Isolate* isolate) {
454 HandleScope scope(isolate);
455 Factory* factory = isolate->factory();
456 Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
457 Handle<String> value = IcuDateFieldIdToDateType(field_id, isolate);
458 JSObject::AddProperty(element, factory->type_string(), value, NONE);
459
460 icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
461 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
462 isolate, value, factory->NewStringFromTwoByte(Vector<const uint16_t>(
463 reinterpret_cast<const uint16_t*>(field.getBuffer()),
464 field.length())),
465 false);
466
467 JSObject::AddProperty(element, factory->value_string(), value, NONE);
468 RETURN_ON_EXCEPTION_VALUE(
469 isolate, JSObject::AddDataElement(array, index, element, NONE), false);
470 return true;
471 }
472
473 } // namespace
474
RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts)475 RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
476 HandleScope scope(isolate);
477 Factory* factory = isolate->factory();
478
479 DCHECK(args.length() == 2);
480
481 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
482 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
483
484 Handle<Object> value;
485 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
486
487 icu::SimpleDateFormat* date_format =
488 DateFormat::UnpackDateFormat(isolate, date_format_holder);
489 if (!date_format) return isolate->ThrowIllegalOperation();
490
491 icu::UnicodeString formatted;
492 icu::FieldPositionIterator fp_iter;
493 icu::FieldPosition fp;
494 UErrorCode status = U_ZERO_ERROR;
495 date_format->format(value->Number(), formatted, &fp_iter, status);
496 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
497
498 Handle<JSArray> result = factory->NewJSArray(0);
499 int32_t length = formatted.length();
500 if (length == 0) return *result;
501
502 int index = 0;
503 int32_t previous_end_pos = 0;
504 while (fp_iter.next(fp)) {
505 int32_t begin_pos = fp.getBeginIndex();
506 int32_t end_pos = fp.getEndIndex();
507
508 if (previous_end_pos < begin_pos) {
509 if (!AddElement(result, index, -1, formatted, previous_end_pos, begin_pos,
510 isolate)) {
511 return isolate->heap()->undefined_value();
512 }
513 ++index;
514 }
515 if (!AddElement(result, index, fp.getField(), formatted, begin_pos, end_pos,
516 isolate)) {
517 return isolate->heap()->undefined_value();
518 }
519 previous_end_pos = end_pos;
520 ++index;
521 }
522 if (previous_end_pos < length) {
523 if (!AddElement(result, index, -1, formatted, previous_end_pos, length,
524 isolate)) {
525 return isolate->heap()->undefined_value();
526 }
527 }
528 JSObject::ValidateElements(result);
529 return *result;
530 }
531
RUNTIME_FUNCTION(Runtime_InternalDateParse)532 RUNTIME_FUNCTION(Runtime_InternalDateParse) {
533 HandleScope scope(isolate);
534
535 DCHECK(args.length() == 2);
536
537 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
538 CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
539
540 v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
541 icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
542 icu::SimpleDateFormat* date_format =
543 DateFormat::UnpackDateFormat(isolate, date_format_holder);
544 if (!date_format) return isolate->ThrowIllegalOperation();
545
546 UErrorCode status = U_ZERO_ERROR;
547 UDate date = date_format->parse(u_date, status);
548 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
549
550 RETURN_RESULT_OR_FAILURE(
551 isolate, JSDate::New(isolate->date_function(), isolate->date_function(),
552 static_cast<double>(date)));
553 }
554
555
RUNTIME_FUNCTION(Runtime_CreateNumberFormat)556 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
557 HandleScope scope(isolate);
558
559 DCHECK(args.length() == 3);
560
561 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
562 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
563 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
564
565 Handle<ObjectTemplateInfo> number_format_template =
566 I18N::GetTemplate(isolate);
567
568 // Create an empty object wrapper.
569 Handle<JSObject> local_object;
570 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
571 isolate, local_object,
572 ApiNatives::InstantiateObject(number_format_template));
573
574 // Set number formatter as internal field of the resulting JS object.
575 icu::DecimalFormat* number_format =
576 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
577
578 if (!number_format) return isolate->ThrowIllegalOperation();
579
580 local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
581
582 Factory* factory = isolate->factory();
583 Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
584 Handle<String> value = factory->NewStringFromStaticChars("valid");
585 JSObject::AddProperty(local_object, key, value, NONE);
586
587 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
588 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
589 NumberFormat::DeleteNumberFormat,
590 WeakCallbackType::kInternalFields);
591 return *local_object;
592 }
593
594
RUNTIME_FUNCTION(Runtime_InternalNumberFormat)595 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
596 HandleScope scope(isolate);
597
598 DCHECK(args.length() == 2);
599
600 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
601 CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
602
603 Handle<Object> value;
604 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
605
606 icu::DecimalFormat* number_format =
607 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
608 if (!number_format) return isolate->ThrowIllegalOperation();
609
610 icu::UnicodeString result;
611 number_format->format(value->Number(), result);
612
613 RETURN_RESULT_OR_FAILURE(
614 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
615 reinterpret_cast<const uint16_t*>(result.getBuffer()),
616 result.length())));
617 }
618
619
RUNTIME_FUNCTION(Runtime_InternalNumberParse)620 RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
621 HandleScope scope(isolate);
622
623 DCHECK(args.length() == 2);
624
625 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
626 CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
627
628 isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
629
630 v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
631 icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
632 icu::DecimalFormat* number_format =
633 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
634 if (!number_format) return isolate->ThrowIllegalOperation();
635
636 UErrorCode status = U_ZERO_ERROR;
637 icu::Formattable result;
638 // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
639 // to be part of Chrome.
640 // TODO(cira): Include currency parsing code using parseCurrency call.
641 // We need to check if the formatter parses all currencies or only the
642 // one it was constructed with (it will impact the API - how to return ISO
643 // code and the value).
644 number_format->parse(u_number, result, status);
645 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
646
647 switch (result.getType()) {
648 case icu::Formattable::kDouble:
649 return *isolate->factory()->NewNumber(result.getDouble());
650 case icu::Formattable::kLong:
651 return *isolate->factory()->NewNumberFromInt(result.getLong());
652 case icu::Formattable::kInt64:
653 return *isolate->factory()->NewNumber(
654 static_cast<double>(result.getInt64()));
655 default:
656 return isolate->heap()->undefined_value();
657 }
658 }
659
660
RUNTIME_FUNCTION(Runtime_CreateCollator)661 RUNTIME_FUNCTION(Runtime_CreateCollator) {
662 HandleScope scope(isolate);
663
664 DCHECK(args.length() == 3);
665
666 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
667 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
668 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
669
670 Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
671
672 // Create an empty object wrapper.
673 Handle<JSObject> local_object;
674 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
675 isolate, local_object, ApiNatives::InstantiateObject(collator_template));
676
677 // Set collator as internal field of the resulting JS object.
678 icu::Collator* collator =
679 Collator::InitializeCollator(isolate, locale, options, resolved);
680
681 if (!collator) return isolate->ThrowIllegalOperation();
682
683 local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
684
685 Factory* factory = isolate->factory();
686 Handle<String> key = factory->NewStringFromStaticChars("collator");
687 Handle<String> value = factory->NewStringFromStaticChars("valid");
688 JSObject::AddProperty(local_object, key, value, NONE);
689
690 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
691 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
692 Collator::DeleteCollator,
693 WeakCallbackType::kInternalFields);
694 return *local_object;
695 }
696
697
RUNTIME_FUNCTION(Runtime_InternalCompare)698 RUNTIME_FUNCTION(Runtime_InternalCompare) {
699 HandleScope scope(isolate);
700
701 DCHECK(args.length() == 3);
702
703 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
704 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
705 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
706
707 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
708 if (!collator) return isolate->ThrowIllegalOperation();
709
710 string1 = String::Flatten(string1);
711 string2 = String::Flatten(string2);
712
713 UCollationResult result;
714 UErrorCode status = U_ZERO_ERROR;
715 {
716 DisallowHeapAllocation no_gc;
717 int32_t length1 = string1->length();
718 int32_t length2 = string2->length();
719 String::FlatContent flat1 = string1->GetFlatContent();
720 String::FlatContent flat2 = string2->GetFlatContent();
721 std::unique_ptr<uc16[]> sap1;
722 std::unique_ptr<uc16[]> sap2;
723 const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
724 const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
725 result =
726 collator->compare(string_val1, length1, string_val2, length2, status);
727 }
728 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
729
730 return *isolate->factory()->NewNumberFromInt(result);
731 }
732
733
RUNTIME_FUNCTION(Runtime_StringNormalize)734 RUNTIME_FUNCTION(Runtime_StringNormalize) {
735 HandleScope scope(isolate);
736 static const struct {
737 const char* name;
738 UNormalization2Mode mode;
739 } normalizationForms[] = {
740 {"nfc", UNORM2_COMPOSE},
741 {"nfc", UNORM2_DECOMPOSE},
742 {"nfkc", UNORM2_COMPOSE},
743 {"nfkc", UNORM2_DECOMPOSE},
744 };
745
746 DCHECK(args.length() == 2);
747
748 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
749 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
750 CHECK(form_id >= 0 &&
751 static_cast<size_t>(form_id) < arraysize(normalizationForms));
752
753 int length = s->length();
754 s = String::Flatten(s);
755 icu::UnicodeString result;
756 std::unique_ptr<uc16[]> sap;
757 UErrorCode status = U_ZERO_ERROR;
758 {
759 DisallowHeapAllocation no_gc;
760 String::FlatContent flat = s->GetFlatContent();
761 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
762 icu::UnicodeString input(false, src, length);
763 // Getting a singleton. Should not free it.
764 const icu::Normalizer2* normalizer =
765 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
766 normalizationForms[form_id].mode, status);
767 DCHECK(U_SUCCESS(status));
768 CHECK(normalizer != nullptr);
769 int32_t normalized_prefix_length =
770 normalizer->spanQuickCheckYes(input, status);
771 // Quick return if the input is already normalized.
772 if (length == normalized_prefix_length) return *s;
773 icu::UnicodeString unnormalized =
774 input.tempSubString(normalized_prefix_length);
775 // Read-only alias of the normalized prefix.
776 result.setTo(false, input.getBuffer(), normalized_prefix_length);
777 // copy-on-write; normalize the suffix and append to |result|.
778 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
779 }
780
781 if (U_FAILURE(status)) {
782 return isolate->heap()->undefined_value();
783 }
784
785 RETURN_RESULT_OR_FAILURE(
786 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
787 reinterpret_cast<const uint16_t*>(result.getBuffer()),
788 result.length())));
789 }
790
791
RUNTIME_FUNCTION(Runtime_CreateBreakIterator)792 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
793 HandleScope scope(isolate);
794
795 DCHECK(args.length() == 3);
796
797 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
798 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
799 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
800
801 Handle<ObjectTemplateInfo> break_iterator_template =
802 I18N::GetTemplate2(isolate);
803
804 // Create an empty object wrapper.
805 Handle<JSObject> local_object;
806 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
807 isolate, local_object,
808 ApiNatives::InstantiateObject(break_iterator_template));
809
810 // Set break iterator as internal field of the resulting JS object.
811 icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
812 isolate, locale, options, resolved);
813
814 if (!break_iterator) return isolate->ThrowIllegalOperation();
815
816 local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
817 // Make sure that the pointer to adopted text is NULL.
818 local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
819
820 Factory* factory = isolate->factory();
821 Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
822 Handle<String> value = factory->NewStringFromStaticChars("valid");
823 JSObject::AddProperty(local_object, key, value, NONE);
824
825 // Make object handle weak so we can delete the break iterator once GC kicks
826 // in.
827 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
828 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
829 BreakIterator::DeleteBreakIterator,
830 WeakCallbackType::kInternalFields);
831 return *local_object;
832 }
833
834
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText)835 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
836 HandleScope scope(isolate);
837
838 DCHECK(args.length() == 2);
839
840 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
841 CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
842
843 icu::BreakIterator* break_iterator =
844 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
845 if (!break_iterator) return isolate->ThrowIllegalOperation();
846
847 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
848 break_iterator_holder->GetInternalField(1));
849 delete u_text;
850
851 int length = text->length();
852 text = String::Flatten(text);
853 DisallowHeapAllocation no_gc;
854 String::FlatContent flat = text->GetFlatContent();
855 std::unique_ptr<uc16[]> sap;
856 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
857 u_text = new icu::UnicodeString(text_value, length);
858 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
859
860 break_iterator->setText(*u_text);
861
862 return isolate->heap()->undefined_value();
863 }
864
865
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst)866 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
867 HandleScope scope(isolate);
868
869 DCHECK(args.length() == 1);
870
871 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
872
873 icu::BreakIterator* break_iterator =
874 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
875 if (!break_iterator) return isolate->ThrowIllegalOperation();
876
877 return *isolate->factory()->NewNumberFromInt(break_iterator->first());
878 }
879
880
RUNTIME_FUNCTION(Runtime_BreakIteratorNext)881 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
882 HandleScope scope(isolate);
883
884 DCHECK(args.length() == 1);
885
886 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
887
888 icu::BreakIterator* break_iterator =
889 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
890 if (!break_iterator) return isolate->ThrowIllegalOperation();
891
892 return *isolate->factory()->NewNumberFromInt(break_iterator->next());
893 }
894
895
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent)896 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
897 HandleScope scope(isolate);
898
899 DCHECK(args.length() == 1);
900
901 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
902
903 icu::BreakIterator* break_iterator =
904 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
905 if (!break_iterator) return isolate->ThrowIllegalOperation();
906
907 return *isolate->factory()->NewNumberFromInt(break_iterator->current());
908 }
909
910
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType)911 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
912 HandleScope scope(isolate);
913
914 DCHECK(args.length() == 1);
915
916 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
917
918 icu::BreakIterator* break_iterator =
919 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
920 if (!break_iterator) return isolate->ThrowIllegalOperation();
921
922 // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
923 icu::RuleBasedBreakIterator* rule_based_iterator =
924 static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
925 int32_t status = rule_based_iterator->getRuleStatus();
926 // Keep return values in sync with JavaScript BreakType enum.
927 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
928 return *isolate->factory()->NewStringFromStaticChars("none");
929 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
930 return isolate->heap()->number_string();
931 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
932 return *isolate->factory()->NewStringFromStaticChars("letter");
933 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
934 return *isolate->factory()->NewStringFromStaticChars("kana");
935 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
936 return *isolate->factory()->NewStringFromStaticChars("ideo");
937 } else {
938 return *isolate->factory()->NewStringFromStaticChars("unknown");
939 }
940 }
941
942 namespace {
LocaleConvertCase(Handle<String> s,Isolate * isolate,bool is_to_upper,const char * lang)943 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
944 bool is_to_upper, const char* lang) {
945 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
946 int32_t src_length = s->length();
947 int32_t dest_length = src_length;
948 UErrorCode status;
949 Handle<SeqTwoByteString> result;
950 std::unique_ptr<uc16[]> sap;
951
952 // This is not a real loop. It'll be executed only once (no overflow) or
953 // twice (overflow).
954 for (int i = 0; i < 2; ++i) {
955 // Case conversion can increase the string length (e.g. sharp-S => SS) so
956 // that we have to handle RangeError exceptions here.
957 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
958 isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
959 DisallowHeapAllocation no_gc;
960 String::FlatContent flat = s->GetFlatContent();
961 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
962 status = U_ZERO_ERROR;
963 dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
964 dest_length, src, src_length, lang, &status);
965 if (status != U_BUFFER_OVERFLOW_ERROR) break;
966 }
967
968 // In most cases, the output will fill the destination buffer completely
969 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
970 // Only in rare cases, it'll be shorter than the destination buffer and
971 // |result| has to be truncated.
972 DCHECK(U_SUCCESS(status));
973 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
974 DCHECK(dest_length == result->length());
975 return *result;
976 }
977 if (U_SUCCESS(status)) {
978 DCHECK(dest_length < result->length());
979 return *Handle<SeqTwoByteString>::cast(
980 SeqString::Truncate(result, dest_length));
981 }
982 return *s;
983 }
984
IsASCIIUpper(uint16_t ch)985 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
986
987 const uint8_t kToLower[256] = {
988 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
989 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
990 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
991 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
992 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
993 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
994 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
995 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
996 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
997 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
998 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
999 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
1000 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
1001 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1002 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
1003 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
1004 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
1005 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
1006 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
1007 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
1008 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
1009 0xFC, 0xFD, 0xFE, 0xFF,
1010 };
1011
ToLatin1Lower(uint16_t ch)1012 inline uint16_t ToLatin1Lower(uint16_t ch) {
1013 return static_cast<uint16_t>(kToLower[ch]);
1014 }
1015
ToASCIIUpper(uint16_t ch)1016 inline uint16_t ToASCIIUpper(uint16_t ch) {
1017 return ch & ~((ch >= 'a' && ch <= 'z') << 5);
1018 }
1019
1020 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)1021 inline uint16_t ToLatin1Upper(uint16_t ch) {
1022 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
1023 return ch &
1024 ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
1025 << 5);
1026 }
1027
1028 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)1029 bool ToUpperFastASCII(const Vector<const Char>& src,
1030 Handle<SeqOneByteString> result) {
1031 // Do a faster loop for the case where all the characters are ASCII.
1032 uint16_t ored = 0;
1033 int32_t index = 0;
1034 for (auto it = src.begin(); it != src.end(); ++it) {
1035 uint16_t ch = static_cast<uint16_t>(*it);
1036 ored |= ch;
1037 result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
1038 }
1039 return !(ored & ~0x7F);
1040 }
1041
1042 const uint16_t sharp_s = 0xDF;
1043
1044 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,Handle<SeqOneByteString> result,int * sharp_s_count)1045 bool ToUpperOneByte(const Vector<const Char>& src,
1046 Handle<SeqOneByteString> result, int* sharp_s_count) {
1047 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
1048
1049 // There are two special cases.
1050 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
1051 // 2. Lower case sharp-S converts to "SS" (two characters)
1052 *sharp_s_count = 0;
1053 int32_t index = 0;
1054 for (auto it = src.begin(); it != src.end(); ++it) {
1055 uint16_t ch = static_cast<uint16_t>(*it);
1056 if (V8_UNLIKELY(ch == sharp_s)) {
1057 ++(*sharp_s_count);
1058 continue;
1059 }
1060 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
1061 // Since this upper-cased character does not fit in an 8-bit string, we
1062 // need to take the 16-bit path.
1063 return false;
1064 }
1065 result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
1066 }
1067
1068 return true;
1069 }
1070
1071 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)1072 void ToUpperWithSharpS(const Vector<const Char>& src,
1073 Handle<SeqOneByteString> result) {
1074 int32_t dest_index = 0;
1075 for (auto it = src.begin(); it != src.end(); ++it) {
1076 uint16_t ch = static_cast<uint16_t>(*it);
1077 if (ch == sharp_s) {
1078 result->SeqOneByteStringSet(dest_index++, 'S');
1079 result->SeqOneByteStringSet(dest_index++, 'S');
1080 } else {
1081 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
1082 }
1083 }
1084 }
1085
1086 } // namespace
1087
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N)1088 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
1089 HandleScope scope(isolate);
1090 DCHECK_EQ(args.length(), 1);
1091 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1092
1093 int length = s->length();
1094 s = String::Flatten(s);
1095 // First scan the string for uppercase and non-ASCII characters:
1096 if (s->HasOnlyOneByteChars()) {
1097 int first_index_to_lower = length;
1098 for (int index = 0; index < length; ++index) {
1099 // Blink specializes this path for one-byte strings, so it
1100 // does not need to do a generic get, but can do the equivalent
1101 // of SeqOneByteStringGet.
1102 uint16_t ch = s->Get(index);
1103 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
1104 first_index_to_lower = index;
1105 break;
1106 }
1107 }
1108
1109 // Nothing to do if the string is all ASCII with no uppercase.
1110 if (first_index_to_lower == length) return *s;
1111
1112 // We depend here on the invariant that the length of a Latin1
1113 // string is invariant under ToLowerCase, and the result always
1114 // fits in the Latin1 range in the *root locale*. It does not hold
1115 // for ToUpperCase even in the root locale.
1116 Handle<SeqOneByteString> result;
1117 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1118 isolate, result, isolate->factory()->NewRawOneByteString(length));
1119
1120 DisallowHeapAllocation no_gc;
1121 String::FlatContent flat = s->GetFlatContent();
1122 if (flat.IsOneByte()) {
1123 const uint8_t* src = flat.ToOneByteVector().start();
1124 CopyChars(result->GetChars(), src,
1125 static_cast<size_t>(first_index_to_lower));
1126 for (int index = first_index_to_lower; index < length; ++index) {
1127 uint16_t ch = static_cast<uint16_t>(src[index]);
1128 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1129 }
1130 } else {
1131 const uint16_t* src = flat.ToUC16Vector().start();
1132 CopyChars(result->GetChars(), src,
1133 static_cast<size_t>(first_index_to_lower));
1134 for (int index = first_index_to_lower; index < length; ++index) {
1135 uint16_t ch = src[index];
1136 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1137 }
1138 }
1139
1140 return *result;
1141 }
1142
1143 // Blink had an additional case here for ASCII 2-byte strings, but
1144 // that is subsumed by the above code (assuming there isn't a false
1145 // negative for HasOnlyOneByteChars).
1146
1147 // Do a slower implementation for cases that include non-ASCII characters.
1148 return LocaleConvertCase(s, isolate, false, "");
1149 }
1150
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N)1151 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1152 HandleScope scope(isolate);
1153 DCHECK_EQ(args.length(), 1);
1154 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1155
1156 // This function could be optimized for no-op cases the way lowercase
1157 // counterpart is, but in empirical testing, few actual calls to upper()
1158 // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
1159
1160 int32_t length = s->length();
1161 s = String::Flatten(s);
1162
1163 if (s->HasOnlyOneByteChars()) {
1164 Handle<SeqOneByteString> result;
1165 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1166 isolate, result, isolate->factory()->NewRawOneByteString(length));
1167
1168 int sharp_s_count;
1169 bool is_result_single_byte;
1170 {
1171 DisallowHeapAllocation no_gc;
1172 String::FlatContent flat = s->GetFlatContent();
1173 // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
1174 // could be removed because ToUpperOneByte is pretty fast now (it
1175 // does not call ICU API any more.).
1176 if (flat.IsOneByte()) {
1177 Vector<const uint8_t> src = flat.ToOneByteVector();
1178 if (ToUpperFastASCII(src, result)) return *result;
1179 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1180 } else {
1181 DCHECK(flat.IsTwoByte());
1182 Vector<const uint16_t> src = flat.ToUC16Vector();
1183 if (ToUpperFastASCII(src, result)) return *result;
1184 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1185 }
1186 }
1187
1188 // Go to the full Unicode path if there are characters whose uppercase
1189 // is beyond the Latin-1 range (cannot be represented in OneByteString).
1190 if (V8_UNLIKELY(!is_result_single_byte)) {
1191 return LocaleConvertCase(s, isolate, true, "");
1192 }
1193
1194 if (sharp_s_count == 0) return *result;
1195
1196 // We have sharp_s_count sharp-s characters, but the result is still
1197 // in the Latin-1 range.
1198 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1199 isolate, result,
1200 isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1201 DisallowHeapAllocation no_gc;
1202 String::FlatContent flat = s->GetFlatContent();
1203 if (flat.IsOneByte()) {
1204 ToUpperWithSharpS(flat.ToOneByteVector(), result);
1205 } else {
1206 ToUpperWithSharpS(flat.ToUC16Vector(), result);
1207 }
1208
1209 return *result;
1210 }
1211
1212 return LocaleConvertCase(s, isolate, true, "");
1213 }
1214
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase)1215 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1216 HandleScope scope(isolate);
1217 DCHECK_EQ(args.length(), 3);
1218 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1219 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1220 CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
1221
1222 // All the languages requiring special handling ("az", "el", "lt", "tr")
1223 // have a 2-letter language code.
1224 DCHECK(lang->length() == 2);
1225 uint8_t lang_str[3];
1226 memcpy(lang_str, lang->GetChars(), 2);
1227 lang_str[2] = 0;
1228 s = String::Flatten(s);
1229 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1230 // in the root locale needs to be adjusted for az, lt and tr because even case
1231 // mapping of ASCII range characters are different in those locales.
1232 // Greek (el) does not require any adjustment, though.
1233 return LocaleConvertCase(s, isolate, is_upper,
1234 reinterpret_cast<const char*>(lang_str));
1235 }
1236
RUNTIME_FUNCTION(Runtime_DateCacheVersion)1237 RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1238 HandleScope scope(isolate);
1239 DCHECK_EQ(0, args.length());
1240 if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1241 if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1242 Handle<FixedArray> date_cache_version =
1243 isolate->factory()->NewFixedArray(1, TENURED);
1244 date_cache_version->set(0, Smi::kZero);
1245 isolate->eternal_handles()->CreateSingleton(
1246 isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1247 }
1248 Handle<FixedArray> date_cache_version =
1249 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1250 EternalHandles::DATE_CACHE_VERSION));
1251 return date_cache_version->get(0);
1252 }
1253
1254 } // namespace internal
1255 } // namespace v8
1256
1257 #endif // V8_I18N_SUPPORT
1258