%PDF- %PDF-
Direktori : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/objects/ |
Current File : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/objects/intl-objects.cc |
// Copyright 2013 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_INTL_SUPPORT #error Internationalization is expected to be enabled. #endif // V8_INTL_SUPPORT #include "src/objects/intl-objects.h" #include <algorithm> #include <memory> #include <string> #include <vector> #include "src/api/api-inl.h" #include "src/base/strings.h" #include "src/date/date.h" #include "src/execution/isolate.h" #include "src/execution/local-isolate.h" #include "src/handles/global-handles.h" #include "src/heap/factory.h" #include "src/objects/js-collator-inl.h" #include "src/objects/js-date-time-format-inl.h" #include "src/objects/js-locale-inl.h" #include "src/objects/js-locale.h" #include "src/objects/js-number-format-inl.h" #include "src/objects/js-temporal-objects.h" #include "src/objects/managed-inl.h" #include "src/objects/objects-inl.h" #include "src/objects/option-utils.h" #include "src/objects/property-descriptor.h" #include "src/objects/smi.h" #include "src/objects/string.h" #include "src/strings/string-case.h" #include "unicode/basictz.h" #include "unicode/brkiter.h" #include "unicode/calendar.h" #include "unicode/coll.h" #include "unicode/datefmt.h" #include "unicode/decimfmt.h" #include "unicode/formattedvalue.h" #include "unicode/localebuilder.h" #include "unicode/localematcher.h" #include "unicode/locid.h" #include "unicode/normalizer2.h" #include "unicode/numberformatter.h" #include "unicode/numfmt.h" #include "unicode/numsys.h" #include "unicode/timezone.h" #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM #define XSTR(s) STR(s) #define STR(s) #s static_assert( V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM, "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up"); #undef STR #undef XSTR namespace v8 { namespace internal { namespace { constexpr uint8_t kToLower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, }; inline constexpr uint16_t ToLatin1Lower(uint16_t ch) { return static_cast<uint16_t>(kToLower[ch]); } // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF. inline constexpr uint16_t ToLatin1Upper(uint16_t ch) { DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF); return ch & ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5); } template <typename Char> bool ToUpperFastASCII(base::Vector<const Char> src, Handle<SeqOneByteString> result) { // Do a faster loop for the case where all the characters are ASCII. uint16_t ored = 0; int32_t index = 0; for (auto it = src.begin(); it != src.end(); ++it) { uint16_t ch = static_cast<uint16_t>(*it); ored |= ch; result->SeqOneByteStringSet(index++, ToAsciiUpper(ch)); } return !(ored & ~0x7F); } const uint16_t sharp_s = 0xDF; template <typename Char> bool ToUpperOneByte(base::Vector<const Char> src, uint8_t* dest, int* sharp_s_count) { // Still pretty-fast path for the input with non-ASCII Latin-1 characters. // There are two special cases. // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF. // 2. Lower case sharp-S converts to "SS" (two characters) *sharp_s_count = 0; for (auto it = src.begin(); it != src.end(); ++it) { uint16_t ch = static_cast<uint16_t>(*it); if (V8_UNLIKELY(ch == sharp_s)) { ++(*sharp_s_count); continue; } if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) { // Since this upper-cased character does not fit in an 8-bit string, we // need to take the 16-bit path. return false; } *dest++ = ToLatin1Upper(ch); } return true; } template <typename Char> void ToUpperWithSharpS(base::Vector<const Char> src, Handle<SeqOneByteString> result) { int32_t dest_index = 0; for (auto it = src.begin(); it != src.end(); ++it) { uint16_t ch = static_cast<uint16_t>(*it); if (ch == sharp_s) { result->SeqOneByteStringSet(dest_index++, 'S'); result->SeqOneByteStringSet(dest_index++, 'S'); } else { result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch)); } } } inline int FindFirstUpperOrNonAscii(Tagged<String> s, int length) { for (int index = 0; index < length; ++index) { uint16_t ch = s->Get(index); if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) { return index; } } return length; } const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, std::unique_ptr<base::uc16[]>* dest, int32_t length) { DCHECK(flat.IsFlat()); if (flat.IsOneByte()) { if (!*dest) { dest->reset(NewArray<base::uc16>(length)); CopyChars(dest->get(), flat.ToOneByteVector().begin(), length); } return reinterpret_cast<const UChar*>(dest->get()); } else { return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin()); } } template <typename T> MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor, Handle<Object> locales, Handle<Object> options, const char* method_name) { Handle<Map> map; ASSIGN_RETURN_ON_EXCEPTION( isolate, map, JSFunction::GetDerivedMap(isolate, constructor, constructor), T); return T::New(isolate, map, locales, options, method_name); } } // namespace const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; } icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate, Handle<String> string, int offset) { DCHECK(string->IsFlat()); DisallowGarbageCollection no_gc; std::unique_ptr<base::uc16[]> sap; // Short one-byte strings can be expanded on the stack to avoid allocating a // temporary buffer. constexpr int kShortStringSize = 80; UChar short_string_buffer[kShortStringSize]; const UChar* uchar_buffer = nullptr; const String::FlatContent& flat = string->GetFlatContent(no_gc); int32_t length = string->length(); DCHECK_LE(offset, length); if (flat.IsOneByte() && length <= kShortStringSize) { CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length); uchar_buffer = short_string_buffer; } else { uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length); } return icu::UnicodeString(uchar_buffer + offset, length - offset); } namespace { icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string, int offset = 0) { DCHECK(string->IsFlat()); DisallowGarbageCollection no_gc; const String::FlatContent& flat = string->GetFlatContent(no_gc); if (!flat.IsOneByte()) return icu::StringPiece(); int32_t length = string->length(); const char* char_buffer = reinterpret_cast<const char*>(flat.ToOneByteVector().begin()); if (!String::IsAscii(char_buffer, length)) { return icu::StringPiece(); } return icu::StringPiece(char_buffer + offset, length - offset); } MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s, bool is_to_upper, const char* lang) { auto case_converter = is_to_upper ? u_strToUpper : u_strToLower; int32_t src_length = s->length(); int32_t dest_length = src_length; UErrorCode status; Handle<SeqTwoByteString> result; std::unique_ptr<base::uc16[]> sap; if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle(); // This is not a real loop. It'll be executed only once (no overflow) or // twice (overflow). for (int i = 0; i < 2; ++i) { // Case conversion can increase the string length (e.g. sharp-S => SS) so // that we have to handle RangeError exceptions here. ASSIGN_RETURN_ON_EXCEPTION( isolate, result, isolate->factory()->NewRawTwoByteString(dest_length), String); DisallowGarbageCollection no_gc; DCHECK(s->IsFlat()); String::FlatContent flat = s->GetFlatContent(no_gc); const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length); status = U_ZERO_ERROR; dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)), dest_length, src, src_length, lang, &status); if (status != U_BUFFER_OVERFLOW_ERROR) break; } // In most cases, the output will fill the destination buffer completely // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING). // Only in rare cases, it'll be shorter than the destination buffer and // |result| has to be truncated. DCHECK(U_SUCCESS(status)); if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) { DCHECK(dest_length == result->length()); return result; } DCHECK(dest_length < result->length()); return SeqString::Truncate(isolate, result, dest_length); } } // namespace // A stripped-down version of ConvertToLower that can only handle flat one-byte // strings and does not allocate. Note that {src} could still be, e.g., a // one-byte sliced string with a two-byte parent string. // Called from TF builtins. Tagged<String> Intl::ConvertOneByteToLower(Tagged<String> src, Tagged<String> dst) { DCHECK_EQ(src->length(), dst->length()); DCHECK(src->IsOneByteRepresentation()); DCHECK(src->IsFlat()); DCHECK(IsSeqOneByteString(dst)); DisallowGarbageCollection no_gc; const int length = src->length(); String::FlatContent src_flat = src->GetFlatContent(no_gc); uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(no_gc); if (src_flat.IsOneByte()) { const uint8_t* src_data = src_flat.ToOneByteVector().begin(); bool has_changed_character = false; int index_to_first_unprocessed = FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data), reinterpret_cast<const char*>(src_data), length, &has_changed_character); if (index_to_first_unprocessed == length) { return has_changed_character ? dst : src; } // If not ASCII, we keep the result up to index_to_first_unprocessed and // process the rest. for (int index = index_to_first_unprocessed; index < length; ++index) { dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index])); } } else { DCHECK(src_flat.IsTwoByte()); int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length); if (index_to_first_unprocessed == length) return src; const uint16_t* src_data = src_flat.ToUC16Vector().begin(); CopyChars(dst_data, src_data, index_to_first_unprocessed); for (int index = index_to_first_unprocessed; index < length; ++index) { dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index])); } } return dst; } MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) { if (!s->IsOneByteRepresentation()) { // Use a slower implementation for strings with characters beyond U+00FF. return LocaleConvertCase(isolate, s, false, ""); } int length = s->length(); // We depend here on the invariant that the length of a Latin1 // string is invariant under ToLowerCase, and the result always // fits in the Latin1 range in the *root locale*. It does not hold // for ToUpperCase even in the root locale. // Scan the string for uppercase and non-ASCII characters for strings // shorter than a machine-word without any memory allocation overhead. // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() // to two parts, one for scanning the prefix with no change and the other for // handling ASCII-only characters. bool is_short = length < static_cast<int>(sizeof(uintptr_t)); if (is_short) { bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length; if (is_lower_ascii) return s; } Handle<SeqOneByteString> result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate); } MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) { int32_t length = s->length(); if (s->IsOneByteRepresentation() && length > 0) { Handle<SeqOneByteString> result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); DCHECK(s->IsFlat()); int sharp_s_count; bool is_result_single_byte; { DisallowGarbageCollection no_gc; String::FlatContent flat = s->GetFlatContent(no_gc); uint8_t* dest = result->GetChars(no_gc); if (flat.IsOneByte()) { base::Vector<const uint8_t> src = flat.ToOneByteVector(); bool has_changed_character = false; int index_to_first_unprocessed = FastAsciiConvert<false>( reinterpret_cast<char*>(result->GetChars(no_gc)), reinterpret_cast<const char*>(src.begin()), length, &has_changed_character); if (index_to_first_unprocessed == length) { return has_changed_character ? result : s; } // If not ASCII, we keep the result up to index_to_first_unprocessed and // process the rest. is_result_single_byte = ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length), dest + index_to_first_unprocessed, &sharp_s_count); } else { DCHECK(flat.IsTwoByte()); base::Vector<const uint16_t> src = flat.ToUC16Vector(); if (ToUpperFastASCII(src, result)) return result; is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count); } } // Go to the full Unicode path if there are characters whose uppercase // is beyond the Latin-1 range (cannot be represented in OneByteString). if (V8_UNLIKELY(!is_result_single_byte)) { return LocaleConvertCase(isolate, s, true, ""); } if (sharp_s_count == 0) return result; // We have sharp_s_count sharp-s characters, but the result is still // in the Latin-1 range. ASSIGN_RETURN_ON_EXCEPTION( isolate, result, isolate->factory()->NewRawOneByteString(length + sharp_s_count), String); DisallowGarbageCollection no_gc; String::FlatContent flat = s->GetFlatContent(no_gc); if (flat.IsOneByte()) { ToUpperWithSharpS(flat.ToOneByteVector(), result); } else { ToUpperWithSharpS(flat.ToUC16Vector(), result); } return result; } return LocaleConvertCase(isolate, s, true, ""); } std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) { // Ugly hack. ICU doesn't expose numbering system in any way, so we have // to assume that for given locale NumberingSystem constructor produces the // same digits as NumberFormat/Calendar would. UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::NumberingSystem> numbering_system( icu::NumberingSystem::createInstance(icu_locale, status)); if (U_SUCCESS(status) && !numbering_system->isAlgorithmic()) { return numbering_system->getName(); } return "latn"; } namespace { Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) { DisallowGarbageCollection no_gc; // Convert BCP47 into ICU locale format. UErrorCode status = U_ZERO_ERROR; icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status); if (U_FAILURE(status) || icu_locale.isBogus()) { return Nothing<icu::Locale>(); } return Just(icu_locale); } } // anonymous namespace // static MaybeHandle<String> Intl::ToString(Isolate* isolate, const icu::UnicodeString& string) { return isolate->factory()->NewStringFromTwoByte(base::Vector<const uint16_t>( reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length())); } MaybeHandle<String> Intl::ToString(Isolate* isolate, const icu::UnicodeString& string, int32_t begin, int32_t end) { return Intl::ToString(isolate, string.tempSubStringBetween(begin, end)); } namespace { Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array, int index, Handle<String> field_type_string, Handle<String> value) { // let element = $array[$index] = { // type: $field_type_string, // value: $value // } // return element; Factory* factory = isolate->factory(); Handle<JSObject> element = factory->NewJSObject(isolate->object_function()); JSObject::AddProperty(isolate, element, factory->type_string(), field_type_string, NONE); JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE); // TODO(victorgomes): Temporarily forcing a fatal error here in case of // overflow, until Intl::AddElement can handle exceptions. if (JSObject::AddDataElement(array, index, element, NONE).IsNothing()) { FATAL("Fatal JavaScript invalid size error when adding element"); UNREACHABLE(); } return element; } } // namespace void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index, Handle<String> field_type_string, Handle<String> value) { // Same as $array[$index] = {type: $field_type_string, value: $value}; InnerAddElement(isolate, array, index, field_type_string, value); } void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index, Handle<String> field_type_string, Handle<String> value, Handle<String> additional_property_name, Handle<String> additional_property_value) { // Same as $array[$index] = { // type: $field_type_string, value: $value, // $additional_property_name: $additional_property_value // } Handle<JSObject> element = InnerAddElement(isolate, array, index, field_type_string, value); JSObject::AddProperty(isolate, element, additional_property_name, additional_property_value, NONE); } namespace { // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ. // // If locale has a script tag then return true and the locale without the // script else return false and an empty string. bool RemoveLocaleScriptTag(const std::string& icu_locale, std::string* locale_less_script) { icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str()); const char* icu_script = new_locale.getScript(); if (icu_script == nullptr || strlen(icu_script) == 0) { *locale_less_script = std::string(); return false; } const char* icu_language = new_locale.getLanguage(); const char* icu_country = new_locale.getCountry(); icu::Locale short_locale = icu::Locale(icu_language, icu_country); *locale_less_script = short_locale.getName(); return true; } bool ValidateResource(const icu::Locale locale, const char* path, const char* key) { bool result = false; UErrorCode status = U_ZERO_ERROR; UResourceBundle* bundle = ures_open(path, locale.getName(), &status); if (bundle != nullptr && status == U_ZERO_ERROR) { if (key == nullptr) { result = true; } else { UResourceBundle* key_bundle = ures_getByKey(bundle, key, nullptr, &status); result = key_bundle != nullptr && (status == U_ZERO_ERROR); ures_close(key_bundle); } } ures_close(bundle); if (!result) { if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) { // Fallback to try without country. std::string without_country(locale.getLanguage()); without_country = without_country.append("-").append(locale.getScript()); return ValidateResource(without_country.c_str(), path, key); } else if ((locale.getCountry()[0] != '\0') || (locale.getScript()[0] != '\0')) { // Fallback to try with only language. std::string language(locale.getLanguage()); return ValidateResource(language.c_str(), path, key); } } return result; } } // namespace std::set<std::string> Intl::BuildLocaleSet( const std::vector<std::string>& icu_available_locales, const char* path, const char* validate_key) { std::set<std::string> locales; for (const std::string& locale : icu_available_locales) { if (path != nullptr || validate_key != nullptr) { if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) { // FIXME(chromium:1215606) Find a beter fix for nb->no fallback if (locale != "nb") { continue; } // Try no for nb if (!ValidateResource(icu::Locale("no"), path, validate_key)) { continue; } } } locales.insert(locale); std::string shortened_locale; if (RemoveLocaleScriptTag(locale, &shortened_locale)) { std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-'); locales.insert(shortened_locale); } } return locales; } Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) { UErrorCode status = U_ZERO_ERROR; std::string res = locale.toLanguageTag<std::string>(status); if (U_FAILURE(status)) { return Nothing<std::string>(); } DCHECK(U_SUCCESS(status)); return Just(res); } // See ecma402/#legacy-constructor. MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate, Handle<JSReceiver> receiver, Handle<JSFunction> constructor, bool has_initialized_slot) { Handle<Object> obj_ordinary_has_instance; ASSIGN_RETURN_ON_EXCEPTION( isolate, obj_ordinary_has_instance, Object::OrdinaryHasInstance(isolate, constructor, receiver), Object); bool ordinary_has_instance = Object::BooleanValue(*obj_ordinary_has_instance, isolate); // 2. If receiver does not have an [[Initialized...]] internal slot // and ? OrdinaryHasInstance(constructor, receiver) is true, then if (!has_initialized_slot && ordinary_has_instance) { // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]). Handle<Object> new_receiver; ASSIGN_RETURN_ON_EXCEPTION( isolate, new_receiver, JSReceiver::GetProperty(isolate, receiver, isolate->factory()->intl_fallback_symbol()), Object); return new_receiver; } return receiver; } namespace { bool IsTwoLetterLanguage(const std::string& locale) { // Two letters, both in range 'a'-'z'... return locale.length() == 2 && IsAsciiLower(locale[0]) && IsAsciiLower(locale[1]); } bool IsDeprecatedOrLegacyLanguage(const std::string& locale) { // Check if locale is one of the deprecated language tags: return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" || locale == "mo" || // Check if locale is one of the legacy language tags: locale == "sh" || locale == "tl" || locale == "no"; } bool IsStructurallyValidLanguageTag(const std::string& tag) { return JSLocale::StartsWithUnicodeLanguageId(tag); } // Canonicalize the locale. // https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag, // including type check and structural validity check. Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate, const std::string& locale_in) { std::string locale = locale_in; if (locale.length() == 0 || !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError( MessageTemplate::kInvalidLanguageTag, isolate->factory()->NewStringFromAsciiChecked(locale.c_str())), Nothing<std::string>()); } // Optimize for the most common case: a 2-letter language code in the // canonical form/lowercase that is not one of the deprecated codes // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language // codes. Instead, let them be handled by ICU in the slow path. However, // fast-track 'fil' (3-letter canonical code). if ((IsTwoLetterLanguage(locale) && !IsDeprecatedOrLegacyLanguage(locale)) || locale == "fil") { return Just(locale); } // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase // the input before any more check. std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower); // // ECMA 402 6.2.3 // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid // language tag if it's too long (much longer than 100 chars). Even if we // allocate a longer buffer, ICU will still fail if it's too long. Either // propose to Ecma 402 to put a limit on the locale length or change ICU to // handle long locale names better. See // https://unicode-org.atlassian.net/browse/ICU-13417 UErrorCode error = U_ZERO_ERROR; // uloc_forLanguageTag checks the structrual validity. If the input BCP47 // language tag is parsed all the way to the end, it indicates that the input // is structurally valid. Due to a couple of bugs, we can't use it // without Chromium patches or ICU 62 or earlier. icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error); if (U_FAILURE(error) || icu_locale.isBogus()) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError( MessageTemplate::kInvalidLanguageTag, isolate->factory()->NewStringFromAsciiChecked(locale.c_str())), Nothing<std::string>()); } // Use LocaleBuilder to validate locale. icu_locale = icu::LocaleBuilder().setLocale(icu_locale).build(error); icu_locale.canonicalize(error); if (U_FAILURE(error) || icu_locale.isBogus()) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError( MessageTemplate::kInvalidLanguageTag, isolate->factory()->NewStringFromAsciiChecked(locale.c_str())), Nothing<std::string>()); } Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale); if (maybe_to_language_tag.IsNothing()) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError( MessageTemplate::kInvalidLanguageTag, isolate->factory()->NewStringFromAsciiChecked(locale.c_str())), Nothing<std::string>()); } return maybe_to_language_tag; } Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate, Handle<Object> locale_in) { Handle<String> locale_str; // This does part of the validity checking spec'ed in CanonicalizeLocaleList: // 7c ii. If Type(kValue) is not String or Object, throw a TypeError // exception. // 7c iii. Let tag be ? ToString(kValue). // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a // RangeError exception. if (IsString(*locale_in)) { locale_str = Handle<String>::cast(locale_in); } else if (IsJSReceiver(*locale_in)) { ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str, Object::ToString(isolate, locale_in), Nothing<std::string>()); } else { THROW_NEW_ERROR_RETURN_VALUE(isolate, NewTypeError(MessageTemplate::kLanguageID), Nothing<std::string>()); } std::string locale(locale_str->ToCString().get()); if (!IsStructurallyValidLanguageTag(locale)) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kLocaleBadParameters), Nothing<std::string>()); } return CanonicalizeLanguageTag(isolate, locale); } } // anonymous namespace Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList( Isolate* isolate, Handle<Object> locales, bool only_return_one_result) { // 1. If locales is undefined, then if (IsUndefined(*locales, isolate)) { // 1a. Return a new empty List. return Just(std::vector<std::string>()); } // 2. Let seen be a new empty List. std::vector<std::string> seen; // 3. If Type(locales) is String or locales has an [[InitializedLocale]] // internal slot, then if (IsJSLocale(*locales)) { // Since this value came from JSLocale, which is already went though the // CanonializeLanguageTag process once, therefore there are no need to // call CanonializeLanguageTag again. seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales))); return Just(seen); } if (IsString(*locales)) { // 3a. Let O be CreateArrayFromList(« locales »). // Instead of creating a one-element array and then iterating over it, // we inline the body of the iteration: std::string canonicalized_tag; if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) { return Nothing<std::vector<std::string>>(); } seen.push_back(canonicalized_tag); return Just(seen); } // 4. Else, // 4a. Let O be ? ToObject(locales). Handle<JSReceiver> o; ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o, Object::ToObject(isolate, locales), Nothing<std::vector<std::string>>()); // 5. Let len be ? ToLength(? Get(O, "length")). Handle<Object> length_obj; ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj, Object::GetLengthFromArrayLike(isolate, o), Nothing<std::vector<std::string>>()); // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably // don't happen in practice (and would be very slow if they do), we'll keep // the code simple for now by using a saturating to-uint32 conversion. double raw_length = Object::Number(*length_obj); uint32_t len = raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length); // 6. Let k be 0. // 7. Repeat, while k < len for (uint32_t k = 0; k < len; k++) { // 7a. Let Pk be ToString(k). // 7b. Let kPresent be ? HasProperty(O, Pk). LookupIterator it(isolate, o, k); Maybe<bool> maybe_found = JSReceiver::HasProperty(&it); MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>()); // 7c. If kPresent is true, then if (!maybe_found.FromJust()) continue; // 7c i. Let kValue be ? Get(O, Pk). Handle<Object> k_value; ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it), Nothing<std::vector<std::string>>()); // 7c ii. If Type(kValue) is not String or Object, throw a TypeError // exception. // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] // internal slot, then std::string canonicalized_tag; if (IsJSLocale(*k_value)) { // 7c iii. 1. Let tag be kValue.[[Locale]]. canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value)); // 7c iv. Else, } else { // 7c iv 1. Let tag be ? ToString(kValue). // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a // RangeError exception. // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag). if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) { return Nothing<std::vector<std::string>>(); } } // 7c vi. If canonicalizedTag is not an element of seen, append // canonicalizedTag as the last element of seen. if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) { seen.push_back(canonicalized_tag); } // 7d. Increase k by 1. (See loop header.) // Optimization: some callers only need one result. if (only_return_one_result) return Just(seen); } // 8. Return seen. return Just(seen); } // ecma402 #sup-string.prototype.tolocalelowercase // ecma402 #sup-string.prototype.tolocaleuppercase MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate, Handle<String> s, bool to_upper, Handle<Object> locales) { std::vector<std::string> requested_locales; if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) { return MaybeHandle<String>(); } std::string requested_locale = requested_locales.size() == 0 ? isolate->DefaultLocale() : requested_locales[0]; size_t dash = requested_locale.find('-'); if (dash != std::string::npos) { requested_locale = requested_locale.substr(0, dash); } // Primary language tag can be up to 8 characters long in theory. // https://tools.ietf.org/html/bcp47#section-2.2.1 DCHECK_LE(requested_locale.length(), 8); s = String::Flatten(isolate, s); // All the languages requiring special-handling have two-letter codes. // Note that we have to check for '!= 2' here because private-use language // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have // only 'x' or 'i' when they get here. if (V8_UNLIKELY(requested_locale.length() != 2)) { if (to_upper) { return ConvertToUpper(isolate, s); } return ConvertToLower(isolate, s); } // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath // in the root locale needs to be adjusted for az, lt and tr because even case // mapping of ASCII range characters are different in those locales. // Greek (el) does not require any adjustment. if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") || (requested_locale == "lt") || (requested_locale == "az"))) { return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str()); } else { if (to_upper) { return ConvertToUpper(isolate, s); } return ConvertToLower(isolate, s); } } // static template <class IsolateT> Intl::CompareStringsOptions Intl::CompareStringsOptionsFor( IsolateT* isolate, Handle<Object> locales, Handle<Object> options) { if (!IsUndefined(*options, isolate)) { return CompareStringsOptions::kNone; } // Lists all of the available locales that are statically known to fulfill // fast path conditions. See the StringLocaleCompareFastPath test as a // starting point to update this list. // // Locale entries are roughly sorted s.t. common locales come first. // // The actual conditions are verified in debug builds in // CollatorAllowsFastComparison. static const char* const kFastLocales[] = { "en-US", "en", "fr", "es", "de", "pt", "it", "ca", "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro", "sl", "sv", "sw", "vi", "en-DE", "en-GB", }; if (IsUndefined(*locales, isolate)) { const std::string& default_locale = isolate->DefaultLocale(); for (const char* fast_locale : kFastLocales) { if (strcmp(fast_locale, default_locale.c_str()) == 0) { return CompareStringsOptions::kTryFastPath; } } return CompareStringsOptions::kNone; } if (!IsString(*locales)) return CompareStringsOptions::kNone; Handle<String> locales_string = Handle<String>::cast(locales); for (const char* fast_locale : kFastLocales) { if (locales_string->IsEqualTo(base::CStrVector(fast_locale), isolate)) { return CompareStringsOptions::kTryFastPath; } } return CompareStringsOptions::kNone; } // Instantiations. template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor( Isolate*, Handle<Object>, Handle<Object>); template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor( LocalIsolate*, Handle<Object>, Handle<Object>); base::Optional<int> Intl::StringLocaleCompare( Isolate* isolate, Handle<String> string1, Handle<String> string2, Handle<Object> locales, Handle<Object> options, const char* method_name) { // We only cache the instance when locales is a string/undefined and // options is undefined, as that is the only case when the specified // side-effects of examining those arguments are unobservable. const bool can_cache = (IsString(*locales) || IsUndefined(*locales, isolate)) && IsUndefined(*options, isolate); // We may be able to take the fast path, depending on the `locales` and // `options` arguments. const CompareStringsOptions compare_strings_options = CompareStringsOptionsFor(isolate, locales, options); if (can_cache) { // Both locales and options are undefined, check the cache. icu::Collator* cached_icu_collator = static_cast<icu::Collator*>(isolate->get_cached_icu_object( Isolate::ICUObjectCacheType::kDefaultCollator, locales)); // We may use the cached icu::Collator for a fast path. if (cached_icu_collator != nullptr) { return Intl::CompareStrings(isolate, *cached_icu_collator, string1, string2, compare_strings_options); } } Handle<JSFunction> constructor = Handle<JSFunction>( JSFunction::cast( isolate->context()->native_context()->intl_collator_function()), isolate); Handle<JSCollator> collator; MaybeHandle<JSCollator> maybe_collator = New<JSCollator>(isolate, constructor, locales, options, method_name); if (!maybe_collator.ToHandle(&collator)) return {}; if (can_cache) { isolate->set_icu_object_in_cache( Isolate::ICUObjectCacheType::kDefaultCollator, locales, std::static_pointer_cast<icu::UMemory>( collator->icu_collator()->get())); } icu::Collator* icu_collator = collator->icu_collator()->raw(); return Intl::CompareStrings(isolate, *icu_collator, string1, string2, compare_strings_options); } namespace { // Weights for the Unicode Collation Algorithm for charcodes [0x00,0x7F]. // https://unicode.org/reports/tr10/. // // Generated from: // // $ wget http://www.unicode.org/Public/UCA/latest/allkeys.txt // $ cat ~/allkeys.txt | grep '^00[0-7]. ;' | sort | sed 's/[*.]/ /g' |\ // sed 's/.*\[ \(.*\)\].*/\1/' | python ~/gen_weights.py // // Where gen_weights.py does an ordinal rank s.t. weights fit in a uint8_t: // // import sys // // def to_ordinal(ws): // weight_map = {} // weights_uniq_sorted = sorted(set(ws)) // for i in range(0, len(weights_uniq_sorted)): // weight_map[weights_uniq_sorted[i]] = i // return [weight_map[x] for x in ws] // // def print_weight_list(array_name, ws): // print("constexpr uint8_t %s[256] = {" % array_name, end = "") // i = 0 // for w in ws: // if (i % 16) == 0: // print("\n ", end = "") // print("%3d," % w, end = "") // i += 1 // print("\n};\n") // // if __name__ == "__main__": // l1s = [] // l3s = [] // for line in sys.stdin: // weights = line.split() // l1s.append(int(weights[0], 16)) // l3s.append(int(weights[2], 16)) // print_weight_list("kCollationWeightsL1", to_ordinal(l1s)) // print_weight_list("kCollationWeightsL3", to_ordinal(l3s)) // clang-format off constexpr uint8_t kCollationWeightsL1[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 12, 16, 28, 38, 29, 27, 15, 17, 18, 24, 32, 9, 8, 14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10, 33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7, 30, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 21, 36, 22, 37, 0, }; constexpr uint8_t kCollationWeightsL3[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, }; constexpr int kCollationWeightsLength = arraysize(kCollationWeightsL1); static_assert(kCollationWeightsLength == arraysize(kCollationWeightsL3)); // clang-format on // Normalize a comparison delta (usually `lhs - rhs`) to UCollationResult // values. constexpr UCollationResult ToUCollationResult(int delta) { return delta < 0 ? UCollationResult::UCOL_LESS : (delta > 0 ? UCollationResult::UCOL_GREATER : UCollationResult::UCOL_EQUAL); } struct FastCompareStringsData { UCollationResult l1_result = UCollationResult::UCOL_EQUAL; UCollationResult l3_result = UCollationResult::UCOL_EQUAL; int processed_until = 0; int first_diff_at = 0; // The first relevant diff (L1 if exists, else L3). bool has_diff = false; base::Optional<UCollationResult> FastCompareFailed( int* processed_until_out) const { if (has_diff) { // Found some difference, continue there to ensure the generic algorithm // picks it up. *processed_until_out = first_diff_at; } else { // No difference found, reprocess the last processed character since it // may be followed by a unicode combining character (which alters it's // meaning). *processed_until_out = std::max(processed_until - 1, 0); } return {}; } }; template <class CharT> constexpr bool CanFastCompare(CharT c) { return c < kCollationWeightsLength && kCollationWeightsL1[c] != 0; } template <class Char1T, class Char2T> bool FastCompareFlatString(const Char1T* lhs, const Char2T* rhs, int length, FastCompareStringsData* d) { for (int i = 0; i < length; i++) { const Char1T l = lhs[i]; const Char2T r = rhs[i]; if (!CanFastCompare(l) || !CanFastCompare(r)) { d->processed_until = i; return false; } UCollationResult l1_result = ToUCollationResult(kCollationWeightsL1[l] - kCollationWeightsL1[r]); if (l1_result != UCollationResult::UCOL_EQUAL) { d->has_diff = true; d->first_diff_at = i; d->processed_until = i; d->l1_result = l1_result; return true; } if (l != r && d->l3_result == UCollationResult::UCOL_EQUAL) { // Collapse the two-pass algorithm into one: if we find a difference in // L1 weights, that is our result. If not, use the first L3 weight // difference. UCollationResult l3_result = ToUCollationResult(kCollationWeightsL3[l] - kCollationWeightsL3[r]); d->l3_result = l3_result; if (!d->has_diff) { d->has_diff = true; d->first_diff_at = i; } } } d->processed_until = length; return true; } bool FastCompareStringFlatContent(const String::FlatContent& lhs, const String::FlatContent& rhs, int length, FastCompareStringsData* d) { if (lhs.IsOneByte()) { base::Vector<const uint8_t> l = lhs.ToOneByteVector(); if (rhs.IsOneByte()) { base::Vector<const uint8_t> r = rhs.ToOneByteVector(); return FastCompareFlatString(l.data(), r.data(), length, d); } else { base::Vector<const uint16_t> r = rhs.ToUC16Vector(); return FastCompareFlatString(l.data(), r.data(), length, d); } } else { base::Vector<const uint16_t> l = lhs.ToUC16Vector(); if (rhs.IsOneByte()) { base::Vector<const uint8_t> r = rhs.ToOneByteVector(); return FastCompareFlatString(l.data(), r.data(), length, d); } else { base::Vector<const uint16_t> r = rhs.ToUC16Vector(); return FastCompareFlatString(l.data(), r.data(), length, d); } } UNREACHABLE(); } bool CharIsAsciiOrOutOfBounds(const String::FlatContent& string, int string_length, int index) { DCHECK_EQ(string.length(), string_length); return index >= string_length || isascii(string.Get(index)); } bool CharCanFastCompareOrOutOfBounds(const String::FlatContent& string, int string_length, int index) { DCHECK_EQ(string.length(), string_length); return index >= string_length || CanFastCompare(string.Get(index)); } #ifdef DEBUG bool USetContainsAllAsciiItem(USet* set) { static constexpr int kBufferSize = 64; UChar buffer[kBufferSize]; const int length = uset_getItemCount(set); for (int i = 0; i < length; i++) { UChar32 start, end; UErrorCode status = U_ZERO_ERROR; const int item_length = uset_getItem(set, i, &start, &end, buffer, kBufferSize, &status); CHECK(U_SUCCESS(status)); DCHECK_GE(item_length, 0); if (item_length == 0) { // Empty string or a range. if (isascii(start)) return true; } else { // A non-empty string. bool all_ascii = true; for (int j = 0; j < item_length; j++) { if (!isascii(buffer[j])) { all_ascii = false; break; } } if (all_ascii) return true; } } return false; } bool CollatorAllowsFastComparison(const icu::Collator& icu_collator) { UErrorCode status = U_ZERO_ERROR; icu::Locale icu_locale(icu_collator.getLocale(ULOC_VALID_LOCALE, status)); DCHECK(U_SUCCESS(status)); static constexpr int kBufferSize = 64; char buffer[kBufferSize]; const int collation_keyword_length = icu_locale.getKeywordValue("collation", buffer, kBufferSize, status); DCHECK(U_SUCCESS(status)); if (collation_keyword_length != 0) return false; // These attributes must be set to the expected value for fast comparisons. static constexpr struct { UColAttribute attribute; UColAttributeValue legal_value; } kAttributeChecks[] = { {UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE}, {UCOL_CASE_FIRST, UCOL_OFF}, {UCOL_CASE_LEVEL, UCOL_OFF}, {UCOL_FRENCH_COLLATION, UCOL_OFF}, {UCOL_NUMERIC_COLLATION, UCOL_OFF}, {UCOL_STRENGTH, UCOL_TERTIARY}, }; for (const auto& check : kAttributeChecks) { if (icu_collator.getAttribute(check.attribute, status) != check.legal_value) { return false; } DCHECK(U_SUCCESS(status)); } // No reordering codes are allowed. int num_reorder_codes = ucol_getReorderCodes(icu_collator.toUCollator(), nullptr, 0, &status); if (num_reorder_codes != 0) return false; DCHECK(U_SUCCESS(status)); // Must check *after* num_reorder_codes != 0. // No tailored rules are allowed. int32_t rules_length = 0; ucol_getRules(icu_collator.toUCollator(), &rules_length); if (rules_length != 0) return false; USet* tailored_set = ucol_getTailoredSet(icu_collator.toUCollator(), &status); DCHECK(U_SUCCESS(status)); if (USetContainsAllAsciiItem(tailored_set)) return false; uset_close(tailored_set); // No ASCII contractions or expansions are allowed. USet* contractions = uset_openEmpty(); USet* expansions = uset_openEmpty(); ucol_getContractionsAndExpansions(icu_collator.toUCollator(), contractions, expansions, true, &status); if (USetContainsAllAsciiItem(contractions)) return false; if (USetContainsAllAsciiItem(expansions)) return false; DCHECK(U_SUCCESS(status)); uset_close(contractions); uset_close(expansions); return true; } #endif // DEBUG // Fast comparison is implemented for charcodes for which the L1 collation // weight (see kCollactionWeightsL1 above) is not 0. // // Note it's possible to partially process strings as long as their leading // characters all satisfy the above criteria. In that case, and if the L3 // result is EQUAL, we set `processed_until_out` to the first non-processed // index - future processing can begin at that offset. // // This fast path looks somewhat complex; mostly because it combines multiple // passes into one. The pseudo-code for simplified multi-pass algorithm is: // // { // // We can only fast-compare a certain subset of the ASCII range. // // Additionally, unicode characters can change the meaning of preceding // // characters, for example: "o\u0308" is treated like "ö". // // // // Note, in the actual single-pass algorithm below, we tolerate non-ASCII // // contents outside the relevant range. // for (int i = 0; i < string1.length; i++) { // if (!CanFastCompare(string1[i])) return {}; // } // for (int i = 0; i < string2.length; i++) { // if (!CanFastCompare(string2[i])) return {}; // } // // // Apply L1 weights. // for (int i = 0; i < common_length; i++) { // Char1T c1 = string1[i]; // Char2T c2 = string2[i]; // if (L1Weight[c1] != L1Weight[c2]) { // return L1Weight[c1] - L1Weight[c2]; // } // } // // // Strings are L1-equal up to the common length; if lengths differ, the // // longer string is treated as 'greater'. // if (string1.length != string2.length) string1.length - string2.length; // // // Apply L3 weights. // for (int i = 0; i < common_length; i++) { // Char1T c1 = string1[i]; // Char2T c2 = string2[i]; // if (L3Weight[c1] != L3Weight[c2]) { // return L3Weight[c1] - L3Weight[c2]; // } // } // // return UCOL_EQUAL; // } base::Optional<UCollationResult> TryFastCompareStrings( Isolate* isolate, const icu::Collator& icu_collator, Handle<String> string1, Handle<String> string2, int* processed_until_out) { // TODO(jgruber): We could avoid the flattening (done by the caller) as well // by implementing comparison through string iteration. This has visible // performance benefits (e.g. 7% on CDJS) but complicates the code. Consider // doing this in the future. DCHECK(string1->IsFlat()); DCHECK(string2->IsFlat()); *processed_until_out = 0; #ifdef DEBUG // Checked by the caller, see CompareStringsOptionsFor. SLOW_DCHECK(CollatorAllowsFastComparison(icu_collator)); USE(CollatorAllowsFastComparison); #endif // DEBUG DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string1)); DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string2)); const int length1 = string1->length(); const int length2 = string2->length(); int common_length = std::min(length1, length2); FastCompareStringsData d; DisallowGarbageCollection no_gc; const String::FlatContent& flat1 = string1->GetFlatContent(no_gc); const String::FlatContent& flat2 = string2->GetFlatContent(no_gc); if (!FastCompareStringFlatContent(flat1, flat2, common_length, &d)) { DCHECK_EQ(d.l1_result, UCollationResult::UCOL_EQUAL); return d.FastCompareFailed(processed_until_out); } // The result is only valid if the last processed character is not followed // by a unicode combining character (we are overly strict and restrict to // ASCII). if (!CharIsAsciiOrOutOfBounds(flat1, length1, d.processed_until + 1) || !CharIsAsciiOrOutOfBounds(flat2, length2, d.processed_until + 1)) { return d.FastCompareFailed(processed_until_out); } if (d.l1_result != UCollationResult::UCOL_EQUAL) { return d.l1_result; } // Strings are L1-equal up to their common length, length differences win. UCollationResult length_result = ToUCollationResult(length1 - length2); if (length_result != UCollationResult::UCOL_EQUAL) { // Strings of different lengths may still compare as equal if the longer // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}". if (!CharCanFastCompareOrOutOfBounds(flat1, length1, common_length) || !CharCanFastCompareOrOutOfBounds(flat2, length2, common_length)) { return d.FastCompareFailed(processed_until_out); } return length_result; } // L1-equal and same length, the L3 result wins. return d.l3_result; } } // namespace // static const uint8_t* Intl::AsciiCollationWeightsL1() { return &kCollationWeightsL1[0]; } // static const uint8_t* Intl::AsciiCollationWeightsL3() { return &kCollationWeightsL3[0]; } // static const int Intl::kAsciiCollationWeightsLength = kCollationWeightsLength; // ecma402/#sec-collator-comparestrings int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator, Handle<String> string1, Handle<String> string2, CompareStringsOptions compare_strings_options) { // Early return for identical strings. if (string1.is_identical_to(string2)) { return UCollationResult::UCOL_EQUAL; } // We cannot return early for 0-length strings because of Unicode // ignorable characters. See also crbug.com/1347690. string1 = String::Flatten(isolate, string1); string2 = String::Flatten(isolate, string2); int processed_until = 0; if (compare_strings_options == CompareStringsOptions::kTryFastPath) { base::Optional<int> maybe_result = TryFastCompareStrings( isolate, icu_collator, string1, string2, &processed_until); if (maybe_result.has_value()) return maybe_result.value(); } UCollationResult result; UErrorCode status = U_ZERO_ERROR; icu::StringPiece string_piece1 = ToICUStringPiece(isolate, string1, processed_until); if (!string_piece1.empty()) { icu::StringPiece string_piece2 = ToICUStringPiece(isolate, string2, processed_until); if (!string_piece2.empty()) { result = icu_collator.compareUTF8(string_piece1, string_piece2, status); DCHECK(U_SUCCESS(status)); return result; } } icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1, processed_until); icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2, processed_until); result = icu_collator.compare(string_val1, string_val2, status); DCHECK(U_SUCCESS(status)); return result; } // ecma402/#sup-properties-of-the-number-prototype-object MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate, Handle<Object> num, Handle<Object> locales, Handle<Object> options, const char* method_name) { Handle<Object> numeric_obj; ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj, Object::ToNumeric(isolate, num), String); // We only cache the instance when locales is a string/undefined and // options is undefined, as that is the only case when the specified // side-effects of examining those arguments are unobservable. bool can_cache = (IsString(*locales) || IsUndefined(*locales, isolate)) && IsUndefined(*options, isolate); if (can_cache) { icu::number::LocalizedNumberFormatter* cached_number_format = static_cast<icu::number::LocalizedNumberFormatter*>( isolate->get_cached_icu_object( Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales)); // We may use the cached icu::NumberFormat for a fast path. if (cached_number_format != nullptr) { return JSNumberFormat::FormatNumeric(isolate, *cached_number_format, numeric_obj); } } Handle<JSFunction> constructor = Handle<JSFunction>( JSFunction::cast( isolate->context()->native_context()->intl_number_format_function()), isolate); Handle<JSNumberFormat> number_format; // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »). StackLimitCheck stack_check(isolate); // New<JSNumberFormat>() requires a lot of stack space. const int kStackSpaceRequiredForNewJSNumberFormat = 16 * KB; if (stack_check.JsHasOverflowed(kStackSpaceRequiredForNewJSNumberFormat)) { isolate->StackOverflow(); return MaybeHandle<String>(); } ASSIGN_RETURN_ON_EXCEPTION( isolate, number_format, New<JSNumberFormat>(isolate, constructor, locales, options, method_name), String); if (can_cache) { isolate->set_icu_object_in_cache( Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales, std::static_pointer_cast<icu::UMemory>( number_format->icu_number_formatter()->get())); } // Return FormatNumber(numberFormat, x). icu::number::LocalizedNumberFormatter* icu_number_format = number_format->icu_number_formatter()->raw(); return JSNumberFormat::FormatNumeric(isolate, *icu_number_format, numeric_obj); } namespace { // 22. is in « 1, 2, 5, 10, 20, 25, 50, 100, 200, 250, 500, 1000, 2000, 2500, // 5000 » bool IsValidRoundingIncrement(int value) { switch (value) { case 1: case 2: case 5: case 10: case 20: case 25: case 50: case 100: case 200: case 250: case 500: case 1000: case 2000: case 2500: case 5000: return true; default: return false; } } } // namespace Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions( Isolate* isolate, Handle<JSReceiver> options, int mnfd_default, int mxfd_default, bool notation_is_compact, const char* service) { Factory* factory = isolate->factory(); Intl::NumberFormatDigitOptions digit_options; // 1. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21, // 1). int mnid = 1; if (!GetNumberOption(isolate, options, factory->minimumIntegerDigits_string(), 1, 21, 1) .To(&mnid)) { return Nothing<NumberFormatDigitOptions>(); } // 2. Let mnfd be ? Get(options, "minimumFractionDigits"). Handle<Object> mnfd_obj; ASSIGN_RETURN_ON_EXCEPTION_VALUE( isolate, mnfd_obj, JSReceiver::GetProperty(isolate, options, factory->minimumFractionDigits_string()), Nothing<NumberFormatDigitOptions>()); // 3. Let mxfd be ? Get(options, "maximumFractionDigits"). Handle<Object> mxfd_obj; ASSIGN_RETURN_ON_EXCEPTION_VALUE( isolate, mxfd_obj, JSReceiver::GetProperty(isolate, options, factory->maximumFractionDigits_string()), Nothing<NumberFormatDigitOptions>()); // 4. Let mnsd be ? Get(options, "minimumSignificantDigits"). Handle<Object> mnsd_obj; ASSIGN_RETURN_ON_EXCEPTION_VALUE( isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, factory->minimumSignificantDigits_string()), Nothing<NumberFormatDigitOptions>()); // 5. Let mxsd be ? Get(options, "maximumSignificantDigits"). Handle<Object> mxsd_obj; ASSIGN_RETURN_ON_EXCEPTION_VALUE( isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, factory->maximumSignificantDigits_string()), Nothing<NumberFormatDigitOptions>()); digit_options.rounding_priority = RoundingPriority::kAuto; digit_options.minimum_significant_digits = 0; digit_options.maximum_significant_digits = 0; // 6. Set intlObj.[[MinimumIntegerDigits]] to mnid. digit_options.minimum_integer_digits = mnid; // 7. Let roundingPriority be ? GetOption(options, "roundingPriority", // "string", « "auto", "morePrecision", "lessPrecision" », "auto"). Maybe<RoundingPriority> maybe_rounding_priority = GetStringOption<RoundingPriority>( isolate, options, "roundingPriority", service, {"auto", "morePrecision", "lessPrecision"}, {RoundingPriority::kAuto, RoundingPriority::kMorePrecision, RoundingPriority::kLessPrecision}, RoundingPriority::kAuto); MAYBE_RETURN(maybe_rounding_priority, Nothing<NumberFormatDigitOptions>()); digit_options.rounding_priority = maybe_rounding_priority.FromJust(); // 8. Let roundingIncrement be ? GetNumberOption(options, "roundingIncrement", // 1, 5000, 1). Maybe<int> maybe_rounding_increment = GetNumberOption( isolate, options, factory->roundingIncrement_string(), 1, 5000, 1); if (!maybe_rounding_increment.To(&digit_options.rounding_increment)) { return Nothing<NumberFormatDigitOptions>(); } // 9. If roundingIncrement is not in « 1, 2, 5, 10, 20, 25, 50, 100, 200, 250, // 500, 1000, 2000, 2500, 5000 », throw a RangeError exception. if (!IsValidRoundingIncrement(digit_options.rounding_increment)) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kPropertyValueOutOfRange, factory->roundingIncrement_string()), Nothing<NumberFormatDigitOptions>()); } // 10. Let roundingMode be ? GetOption(options, "roundingMode", string, « // "ceil", "floor", "expand", "trunc", "halfCeil", "halfFloor", "halfExpand", // "halfTrunc", "halfEven" », "halfExpand"). Maybe<RoundingMode> maybe_rounding_mode = GetStringOption<RoundingMode>( isolate, options, "roundingMode", service, {"ceil", "floor", "expand", "trunc", "halfCeil", "halfFloor", "halfExpand", "halfTrunc", "halfEven"}, {RoundingMode::kCeil, RoundingMode::kFloor, RoundingMode::kExpand, RoundingMode::kTrunc, RoundingMode::kHalfCeil, RoundingMode::kHalfFloor, RoundingMode::kHalfExpand, RoundingMode::kHalfTrunc, RoundingMode::kHalfEven}, RoundingMode::kHalfExpand); MAYBE_RETURN(maybe_rounding_mode, Nothing<NumberFormatDigitOptions>()); digit_options.rounding_mode = maybe_rounding_mode.FromJust(); // 11. Let trailingZeroDisplay be ? GetOption(options, "trailingZeroDisplay", // string, « "auto", "stripIfInteger" », "auto"). Maybe<TrailingZeroDisplay> maybe_trailing_zero_display = GetStringOption<TrailingZeroDisplay>( isolate, options, "trailingZeroDisplay", service, {"auto", "stripIfInteger"}, {TrailingZeroDisplay::kAuto, TrailingZeroDisplay::kStripIfInteger}, TrailingZeroDisplay::kAuto); MAYBE_RETURN(maybe_trailing_zero_display, Nothing<NumberFormatDigitOptions>()); digit_options.trailing_zero_display = maybe_trailing_zero_display.FromJust(); // 12. NOTE: All fields required by SetNumberFormatDigitOptions have now been // read from options. The remainder of this AO interprets the options and may // throw exceptions. // 17. If mnsd is not undefined or mxsd is not undefined, then // a. Set hasSd to true. // 18. Else, // a. Set hasSd to false. bool has_sd = (!IsUndefined(*mnsd_obj, isolate)) || (!IsUndefined(*mxsd_obj, isolate)); // 19. If mnfd is not undefined or mxfd is not undefined, then // a. Set hasFd to true. // 22. Else, // a. Set hasFd to false. bool has_fd = (!IsUndefined(*mnfd_obj, isolate)) || (!IsUndefined(*mxfd_obj, isolate)); // 21. Let needSd be true. bool need_sd = true; // 22. Let needFd be true. bool need_fd = true; // 23. If roundingPriority is "auto", then if (RoundingPriority::kAuto == digit_options.rounding_priority) { // a. Set needSd to hasSd. need_sd = has_sd; // b. If needSd is true, or hasFd is false and notation is "compact", then if (need_sd || ((!has_fd) && notation_is_compact)) { // i. Set needFd to false. need_fd = false; } } // 24. If needSd is true, then if (need_sd) { // 24.a If hasSd is true, then if (has_sd) { // i. Set intlObj.[[MinimumSignificantDigits]] to ? // DefaultNumberOption(mnsd, 1, 21, 1). int mnsd; if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, factory->minimumSignificantDigits_string()) .To(&mnsd)) { return Nothing<NumberFormatDigitOptions>(); } digit_options.minimum_significant_digits = mnsd; // ii. Set intlObj.[[MaximumSignificantDigits]] to ? // DefaultNumberOption(mxsd, intlObj.[[MinimumSignificantDigits]], 21, // 21). int mxsd; if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, factory->maximumSignificantDigits_string()) .To(&mxsd)) { return Nothing<NumberFormatDigitOptions>(); } digit_options.maximum_significant_digits = mxsd; } else { // 24.b Else // 24.b.i Set intlObj.[[MinimumSignificantDigits]] to 1. digit_options.minimum_significant_digits = 1; // 24.b.ii Set intlObj.[[MaximumSignificantDigits]] to 21. digit_options.maximum_significant_digits = 21; } } Handle<String> mxfd_str = factory->maximumFractionDigits_string(); // 25. If needFd is true, then if (need_fd) { // a. If hasFd is true, then if (has_fd) { Handle<String> mnfd_str = factory->minimumFractionDigits_string(); // i. Let mnfd be ? DefaultNumberOption(mnfd, 0, 100, undefined). int mnfd; if (!DefaultNumberOption(isolate, mnfd_obj, 0, 100, -1, mnfd_str) .To(&mnfd)) { return Nothing<NumberFormatDigitOptions>(); } // ii. Let mxfd be ? DefaultNumberOption(mxfd, 0, 100, undefined). int mxfd; if (!DefaultNumberOption(isolate, mxfd_obj, 0, 100, -1, mxfd_str) .To(&mxfd)) { return Nothing<NumberFormatDigitOptions>(); } // iii. If mnfd is undefined, set mnfd to min(mnfdDefault, mxfd). if (IsUndefined(*mnfd_obj, isolate)) { mnfd = std::min(mnfd_default, mxfd); } else if (IsUndefined(*mxfd_obj, isolate)) { // iv. Else if mxfd is undefined, set mxfd to max(mxfdDefault, // mnfd). mxfd = std::max(mxfd_default, mnfd); } else if (mnfd > mxfd) { // v. Else if mnfd is greater than mxfd, throw a RangeError // exception. THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str), Nothing<NumberFormatDigitOptions>()); } // vi. Set intlObj.[[MinimumFractionDigits]] to mnfd. digit_options.minimum_fraction_digits = mnfd; // vii. Set intlObj.[[MaximumFractionDigits]] to mxfd. digit_options.maximum_fraction_digits = mxfd; } else { // b. Else // i. Set intlObj.[[MinimumFractionDigits]] to mnfdDefault. digit_options.minimum_fraction_digits = mnfd_default; // ii. Set intlObj.[[MaximumFractionDigits]] to mxfdDefault. digit_options.maximum_fraction_digits = mxfd_default; } } // 26. If needSd is false and needFd is false, then if ((!need_sd) && (!need_fd)) { // a. Set intlObj.[[MinimumFractionDigits]] to 0. digit_options.minimum_fraction_digits = 0; // b. Set intlObj.[[MaximumFractionDigits]] to 0. digit_options.maximum_fraction_digits = 0; // c. Set intlObj.[[MinimumSignificantDigits]] to 1. digit_options.minimum_significant_digits = 1; // d. Set intlObj.[[MaximumSignificantDigits]] to 2. digit_options.maximum_significant_digits = 2; // e. Set intlObj.[[RoundingType]] to morePrecision. digit_options.rounding_type = RoundingType::kMorePrecision; // 27. Else if roundingPriority is "morePrecision", then } else if (digit_options.rounding_priority == RoundingPriority::kMorePrecision) { // i. Set intlObj.[[RoundingType]] to morePrecision. digit_options.rounding_type = RoundingType::kMorePrecision; // 28. Else if roundingPriority is "lessPrecision", then } else if (digit_options.rounding_priority == RoundingPriority::kLessPrecision) { // i. Set intlObj.[[RoundingType]] to lessPrecision. digit_options.rounding_type = RoundingType::kLessPrecision; // 29. Else if hasSd, then } else if (has_sd) { // i. Set intlObj.[[RoundingType]] to significantDigits. digit_options.rounding_type = RoundingType::kSignificantDigits; // 30. Else, } else { // i.Set intlObj.[[RoundingType]] to fractionDigits. digit_options.rounding_type = RoundingType::kFractionDigits; } // 31. If roundingIncrement is not 1, then if (digit_options.rounding_increment != 1) { // a. If intlObj.[[RoundingType]] is not fractionDigits, throw a TypeError // exception. if (digit_options.rounding_type != RoundingType::kFractionDigits) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewTypeError(MessageTemplate::kBadRoundingType), Nothing<NumberFormatDigitOptions>()); } // b. If intlObj.[[MaximumFractionDigits]] is not equal to // intlObj.[[MinimumFractionDigits]], throw a RangeError exception. if (digit_options.maximum_fraction_digits != digit_options.minimum_fraction_digits) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str), Nothing<NumberFormatDigitOptions>()); } } return Just(digit_options); } namespace { // ecma402/#sec-bestavailablelocale std::string BestAvailableLocale(const std::set<std::string>& available_locales, const std::string& locale) { // 1. Let candidate be locale. std::string candidate = locale; // 2. Repeat, while (true) { // 2.a. If availableLocales contains an element equal to candidate, return // candidate. if (available_locales.find(candidate) != available_locales.end()) { return candidate; } // 2.b. Let pos be the character index of the last occurrence of "-" // (U+002D) within candidate. If that character does not occur, return // undefined. size_t pos = candidate.rfind('-'); if (pos == std::string::npos) { return std::string(); } // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, // decrease pos by 2. if (pos >= 2 && candidate[pos - 2] == '-') { pos -= 2; } // 2.d. Let candidate be the substring of candidate from position 0, // inclusive, to position pos, exclusive. candidate = candidate.substr(0, pos); } } struct ParsedLocale { std::string no_extensions_locale; std::string extension; }; // Returns a struct containing a bcp47 tag without unicode extensions // and the removed unicode extensions. // // For example, given 'en-US-u-co-emoji' returns 'en-US' and // 'u-co-emoji'. ParsedLocale ParseBCP47Locale(const std::string& locale) { size_t length = locale.length(); ParsedLocale parsed_locale; // Privateuse or grandfathered locales have no extension sequences. if ((length > 1) && (locale[1] == '-')) { // Check to make sure that this really is a grandfathered or // privateuse extension. ICU can sometimes mess up the // canonicalization. DCHECK(locale[0] == 'x' || locale[0] == 'i'); parsed_locale.no_extensions_locale = locale; return parsed_locale; } size_t unicode_extension_start = locale.find("-u-"); // No unicode extensions found. if (unicode_extension_start == std::string::npos) { parsed_locale.no_extensions_locale = locale; return parsed_locale; } size_t private_extension_start = locale.find("-x-"); // Unicode extensions found within privateuse subtags don't count. if (private_extension_start != std::string::npos && private_extension_start < unicode_extension_start) { parsed_locale.no_extensions_locale = locale; return parsed_locale; } const std::string beginning = locale.substr(0, unicode_extension_start); size_t unicode_extension_end = length; DCHECK_GT(length, 2); // Find the end of the extension production as per the bcp47 grammar // by looking for '-' followed by 2 chars and then another '-'. for (size_t i = unicode_extension_start + 1; i < length - 2; i++) { if (locale[i] != '-') continue; if (locale[i + 2] == '-') { unicode_extension_end = i; break; } i += 2; } const std::string end = locale.substr(unicode_extension_end); parsed_locale.no_extensions_locale = beginning + end; parsed_locale.extension = locale.substr( unicode_extension_start, unicode_extension_end - unicode_extension_start); return parsed_locale; } // ecma402/#sec-lookupsupportedlocales std::vector<std::string> LookupSupportedLocales( const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales) { // 1. Let subset be a new empty List. std::vector<std::string> subset; // 2. For each element locale of requestedLocales in List order, do for (const std::string& locale : requested_locales) { // 2. a. Let noExtensionsLocale be the String value that is locale // with all Unicode locale extension sequences removed. std::string no_extension_locale = ParseBCP47Locale(locale).no_extensions_locale; // 2. b. Let availableLocale be // BestAvailableLocale(availableLocales, noExtensionsLocale). std::string available_locale = BestAvailableLocale(available_locales, no_extension_locale); // 2. c. If availableLocale is not undefined, append locale to the // end of subset. if (!available_locale.empty()) { subset.push_back(locale); } } // 3. Return subset. return subset; } icu::LocaleMatcher BuildLocaleMatcher( Isolate* isolate, const std::set<std::string>& available_locales, UErrorCode* status) { icu::Locale default_locale = icu::Locale::forLanguageTag(isolate->DefaultLocale(), *status); icu::LocaleMatcher::Builder builder; if (U_FAILURE(*status)) { return builder.build(*status); } builder.setDefaultLocale(&default_locale); for (auto it = available_locales.begin(); it != available_locales.end(); ++it) { *status = U_ZERO_ERROR; icu::Locale l = icu::Locale::forLanguageTag(it->c_str(), *status); // skip invalid locale such as no-NO-NY if (U_SUCCESS(*status)) { builder.addSupportedLocale(l); } } return builder.build(*status); } class Iterator : public icu::Locale::Iterator { public: Iterator(std::vector<std::string>::const_iterator begin, std::vector<std::string>::const_iterator end) : iter_(begin), end_(end) {} ~Iterator() override = default; UBool hasNext() const override { return iter_ != end_; } const icu::Locale& next() override { UErrorCode status = U_ZERO_ERROR; locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status); DCHECK(U_SUCCESS(status)); ++iter_; return locale_; } private: std::vector<std::string>::const_iterator iter_; std::vector<std::string>::const_iterator end_; icu::Locale locale_; }; // ecma402/#sec-bestfitmatcher // The BestFitMatcher abstract operation compares requestedLocales, which must // be a List as returned by CanonicalizeLocaleList, against the locales in // availableLocales and determines the best available language to meet the // request. The algorithm is implementation dependent, but should produce // results that a typical user of the requested locales would perceive // as at least as good as those produced by the LookupMatcher abstract // operation. Options specified through Unicode locale extension sequences must // be ignored by the algorithm. Information about such subsequences is returned // separately. The abstract operation returns a record with a [[locale]] field, // whose value is the language tag of the selected locale, which must be an // element of availableLocales. If the language tag of the request locale that // led to the selected locale contained a Unicode locale extension sequence, // then the returned record also contains an [[extension]] field whose value is // the first Unicode locale extension sequence within the request locale // language tag. std::string BestFitMatcher(Isolate* isolate, const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales) { UErrorCode status = U_ZERO_ERROR; Iterator iter(requested_locales.cbegin(), requested_locales.cend()); std::string bestfit = BuildLocaleMatcher(isolate, available_locales, &status) .getBestMatchResult(iter, status) .makeResolvedLocale(status) .toLanguageTag<std::string>(status); DCHECK(U_SUCCESS(status)); return bestfit; } // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales) // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales std::vector<std::string> BestFitSupportedLocales( Isolate* isolate, const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales) { UErrorCode status = U_ZERO_ERROR; icu::LocaleMatcher matcher = BuildLocaleMatcher(isolate, available_locales, &status); std::vector<std::string> result; if (U_SUCCESS(status)) { for (auto it = requested_locales.cbegin(); it != requested_locales.cend(); it++) { status = U_ZERO_ERROR; icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status); icu::LocaleMatcher::Result matched = matcher.getBestMatchResult(desired, status); if (U_FAILURE(status)) continue; if (matched.getSupportedIndex() < 0) continue; // The BestFitSupportedLocales abstract operation returns the *SUBSET* of // the provided BCP 47 language priority list requestedLocales for which // availableLocales has a matching locale when using the Best Fit Matcher // algorithm. Locales appear in the same order in the returned list as in // requestedLocales. The steps taken are implementation dependent. std::string bestfit = desired.toLanguageTag<std::string>(status); if (U_FAILURE(status)) continue; result.push_back(bestfit); } } return result; } // ecma262 #sec-createarrayfromlist MaybeHandle<JSArray> CreateArrayFromList(Isolate* isolate, std::vector<std::string> elements, PropertyAttributes attr) { Factory* factory = isolate->factory(); // Let array be ! ArrayCreate(0). Handle<JSArray> array = factory->NewJSArray(0); uint32_t length = static_cast<uint32_t>(elements.size()); // 3. Let n be 0. // 4. For each element e of elements, do for (uint32_t i = 0; i < length; i++) { // a. Let status be CreateDataProperty(array, ! ToString(n), e). const std::string& part = elements[i]; Handle<String> value = factory->NewStringFromUtf8(base::CStrVector(part.c_str())) .ToHandleChecked(); MAYBE_RETURN(JSObject::AddDataElement(array, i, value, attr), MaybeHandle<JSArray>()); } // 5. Return array. return MaybeHandle<JSArray>(array); } // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options) // https://tc39.github.io/ecma402/#sec-supportedlocales MaybeHandle<JSObject> SupportedLocales( Isolate* isolate, const char* method_name, const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales, Handle<Object> options) { std::vector<std::string> supported_locales; // 1. Set options to ? CoerceOptionsToObject(options). Handle<JSReceiver> options_obj; ASSIGN_RETURN_ON_EXCEPTION( isolate, options_obj, CoerceOptionsToObject(isolate, options, method_name), JSObject); // 2. Let matcher be ? GetOption(options, "localeMatcher", "string", // « "lookup", "best fit" », "best fit"). Maybe<Intl::MatcherOption> maybe_locale_matcher = Intl::GetLocaleMatcher(isolate, options_obj, method_name); MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>()); Intl::MatcherOption matcher = maybe_locale_matcher.FromJust(); // 3. If matcher is "best fit", then // a. Let supportedLocales be BestFitSupportedLocales(availableLocales, // requestedLocales). if (matcher == Intl::MatcherOption::kBestFit && v8_flags.harmony_intl_best_fit_matcher) { supported_locales = BestFitSupportedLocales(isolate, available_locales, requested_locales); } else { // 4. Else, // a. Let supportedLocales be LookupSupportedLocales(availableLocales, // requestedLocales). supported_locales = LookupSupportedLocales(available_locales, requested_locales); } // 5. Return CreateArrayFromList(supportedLocales). return CreateArrayFromList(isolate, supported_locales, PropertyAttributes::NONE); } } // namespace // ecma-402 #sec-intl.getcanonicallocales MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate, Handle<Object> locales) { // 1. Let ll be ? CanonicalizeLocaleList(locales). Maybe<std::vector<std::string>> maybe_ll = CanonicalizeLocaleList(isolate, locales, false); MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>()); // 2. Return CreateArrayFromList(ll). return CreateArrayFromList(isolate, maybe_ll.FromJust(), PropertyAttributes::NONE); } namespace { MaybeHandle<JSArray> AvailableCollations(Isolate* isolate) { UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::StringEnumeration> enumeration( icu::Collator::getKeywordValues("collation", status)); if (U_FAILURE(status)) { THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError), JSArray); } return Intl::ToJSArray(isolate, "co", enumeration.get(), Intl::RemoveCollation, true); } MaybeHandle<JSArray> VectorToJSArray(Isolate* isolate, const std::vector<std::string>& array) { Factory* factory = isolate->factory(); Handle<FixedArray> fixed_array = factory->NewFixedArray(static_cast<int32_t>(array.size())); int32_t index = 0; for (std::string item : array) { Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str()); fixed_array->set(index++, *str); } return factory->NewJSArrayWithElements(fixed_array); } namespace { class ResourceAvailableCurrencies { public: ResourceAvailableCurrencies() { UErrorCode status = U_ZERO_ERROR; UEnumeration* uenum = ucurr_openISOCurrencies(UCURR_COMMON | UCURR_NON_DEPRECATED, &status); DCHECK(U_SUCCESS(status)); const char* next = nullptr; while (U_SUCCESS(status) && (next = uenum_next(uenum, nullptr, &status)) != nullptr) { // Work around the issue that we do not support VEF currency code // in DisplayNames by not reporting it. if (strcmp(next, "VEF") == 0) continue; AddIfAvailable(next); } // Work around the issue that we do support the following currency codes // in DisplayNames but the ICU API is not reporting it. AddIfAvailable("SVC"); AddIfAvailable("XDR"); AddIfAvailable("XSU"); AddIfAvailable("ZWL"); std::sort(list_.begin(), list_.end()); uenum_close(uenum); } const std::vector<std::string>& Get() const { return list_; } void AddIfAvailable(const char* currency) { icu::UnicodeString code(currency, -1, US_INV); UErrorCode status = U_ZERO_ERROR; int32_t len = 0; const UChar* result = ucurr_getName(code.getTerminatedBuffer(), "en", UCURR_LONG_NAME, nullptr, &len, &status); if (U_SUCCESS(status) && u_strcmp(result, code.getTerminatedBuffer()) != 0) { list_.push_back(currency); } } private: std::vector<std::string> list_; }; const std::vector<std::string>& GetAvailableCurrencies() { static base::LazyInstance<ResourceAvailableCurrencies>::type available_currencies = LAZY_INSTANCE_INITIALIZER; return available_currencies.Pointer()->Get(); } } // namespace MaybeHandle<JSArray> AvailableCurrencies(Isolate* isolate) { return VectorToJSArray(isolate, GetAvailableCurrencies()); } MaybeHandle<JSArray> AvailableNumberingSystems(Isolate* isolate) { UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::StringEnumeration> enumeration( icu::NumberingSystem::getAvailableNames(status)); if (U_FAILURE(status)) { THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError), JSArray); } // Need to filter out isAlgorithmic return Intl::ToJSArray( isolate, "nu", enumeration.get(), [](const char* value) { UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::NumberingSystem> numbering_system( icu::NumberingSystem::createInstanceByName(value, status)); // Skip algorithmic one since chrome filter out the resource. return U_FAILURE(status) || numbering_system->isAlgorithmic(); }, true); } MaybeHandle<JSArray> AvailableTimeZones(Isolate* isolate) { UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::StringEnumeration> enumeration( icu::TimeZone::createTimeZoneIDEnumeration( UCAL_ZONE_TYPE_CANONICAL_LOCATION, nullptr, nullptr, status)); if (U_FAILURE(status)) { THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError), JSArray); } return Intl::ToJSArray(isolate, nullptr, enumeration.get(), nullptr, true); } MaybeHandle<JSArray> AvailableUnits(Isolate* isolate) { Factory* factory = isolate->factory(); std::set<std::string> sanctioned(Intl::SanctionedSimpleUnits()); Handle<FixedArray> fixed_array = factory->NewFixedArray(static_cast<int32_t>(sanctioned.size())); int32_t index = 0; for (std::string item : sanctioned) { Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str()); fixed_array->set(index++, *str); } return factory->NewJSArrayWithElements(fixed_array); } } // namespace // ecma-402 #sec-intl.supportedvaluesof MaybeHandle<JSArray> Intl::SupportedValuesOf(Isolate* isolate, Handle<Object> key_obj) { Factory* factory = isolate->factory(); // 1. 1. Let key be ? ToString(key). Handle<String> key_str; ASSIGN_RETURN_ON_EXCEPTION(isolate, key_str, Object::ToString(isolate, key_obj), JSArray); // 2. If key is "calendar", then if (factory->calendar_string()->Equals(*key_str)) { // a. Let list be ! AvailableCalendars( ). return Intl::AvailableCalendars(isolate); } // 3. Else if key is "collation", then if (factory->collation_string()->Equals(*key_str)) { // a. Let list be ! AvailableCollations( ). return AvailableCollations(isolate); } // 4. Else if key is "currency", then if (factory->currency_string()->Equals(*key_str)) { // a. Let list be ! AvailableCurrencies( ). return AvailableCurrencies(isolate); } // 5. Else if key is "numberingSystem", then if (factory->numberingSystem_string()->Equals(*key_str)) { // a. Let list be ! AvailableNumberingSystems( ). return AvailableNumberingSystems(isolate); } // 6. Else if key is "timeZone", then if (factory->timeZone_string()->Equals(*key_str)) { // a. Let list be ! AvailableTimeZones( ). return AvailableTimeZones(isolate); } // 7. Else if key is "unit", then if (factory->unit_string()->Equals(*key_str)) { // a. Let list be ! AvailableUnits( ). return AvailableUnits(isolate); } // 8. Else, // a. Throw a RangeError exception. // 9. Return ! CreateArrayFromList( list ). THROW_NEW_ERROR( isolate, NewRangeError(MessageTemplate::kInvalid, factory->NewStringFromStaticChars("key"), key_str), JSArray); } // ECMA 402 Intl.*.supportedLocalesOf MaybeHandle<JSObject> Intl::SupportedLocalesOf( Isolate* isolate, const char* method_name, const std::set<std::string>& available_locales, Handle<Object> locales, Handle<Object> options) { // Let availableLocales be %Collator%.[[AvailableLocales]]. // Let requestedLocales be ? CanonicalizeLocaleList(locales). Maybe<std::vector<std::string>> requested_locales = CanonicalizeLocaleList(isolate, locales, false); MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>()); // Return ? SupportedLocales(availableLocales, requestedLocales, options). return SupportedLocales(isolate, method_name, available_locales, requested_locales.FromJust(), options); } namespace { template <typename T> bool IsValidExtension(const icu::Locale& locale, const char* key, const std::string& value) { const char* legacy_type = uloc_toLegacyType(key, value.c_str()); if (legacy_type == nullptr) { return false; } UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::StringEnumeration> enumeration( T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()), false, status)); if (U_FAILURE(status)) { return false; } int32_t length; for (const char* item = enumeration->next(&length, status); U_SUCCESS(status) && item != nullptr; item = enumeration->next(&length, status)) { if (strcmp(legacy_type, item) == 0) { return true; } } return false; } } // namespace bool Intl::IsValidCollation(const icu::Locale& locale, const std::string& value) { std::set<std::string> invalid_values = {"standard", "search"}; if (invalid_values.find(value) != invalid_values.end()) return false; return IsValidExtension<icu::Collator>(locale, "collation", value); } bool Intl::IsWellFormedCalendar(const std::string& value) { return JSLocale::Is38AlphaNumList(value); } // ecma402/#sec-iswellformedcurrencycode bool Intl::IsWellFormedCurrency(const std::string& currency) { return JSLocale::Is3Alpha(currency); } bool Intl::IsValidCalendar(const icu::Locale& locale, const std::string& value) { return IsValidExtension<icu::Calendar>(locale, "calendar", value); } bool Intl::IsValidNumberingSystem(const std::string& value) { std::set<std::string> invalid_values = {"native", "traditio", "finance"}; if (invalid_values.find(value) != invalid_values.end()) return false; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::NumberingSystem> numbering_system( icu::NumberingSystem::createInstanceByName(value.c_str(), status)); return U_SUCCESS(status) && numbering_system.get() != nullptr && !numbering_system->isAlgorithmic(); } namespace { bool IsWellFormedNumberingSystem(const std::string& value) { return JSLocale::Is38AlphaNumList(value); } std::map<std::string, std::string> LookupAndValidateUnicodeExtensions( icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) { std::map<std::string, std::string> extensions; UErrorCode status = U_ZERO_ERROR; icu::LocaleBuilder builder; builder.setLocale(*icu_locale).clearExtensions(); std::unique_ptr<icu::StringEnumeration> keywords( icu_locale->createKeywords(status)); if (U_FAILURE(status)) return extensions; if (!keywords) return extensions; char value[ULOC_FULLNAME_CAPACITY]; int32_t length; status = U_ZERO_ERROR; for (const char* keyword = keywords->next(&length, status); keyword != nullptr; keyword = keywords->next(&length, status)) { // Ignore failures in ICU and skip to the next keyword. // // This is fine.™ if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status); // Ignore failures in ICU and skip to the next keyword. // // This is fine.™ if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword); if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); bool is_valid_value = false; // 8.h.ii.1.a If keyLocaleData contains requestedValue, then if (strcmp("ca", bcp47_key) == 0) { is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value); } else if (strcmp("co", bcp47_key) == 0) { is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value); } else if (strcmp("hc", bcp47_key) == 0) { // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"}; is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); } else if (strcmp("lb", bcp47_key) == 0) { // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml std::set<std::string> valid_values = {"strict", "normal", "loose"}; is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); } else if (strcmp("kn", bcp47_key) == 0) { // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml std::set<std::string> valid_values = {"true", "false"}; is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); } else if (strcmp("kf", bcp47_key) == 0) { // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml std::set<std::string> valid_values = {"upper", "lower", "false"}; is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); } else if (strcmp("nu", bcp47_key) == 0) { is_valid_value = Intl::IsValidNumberingSystem(bcp47_value); } if (is_valid_value) { extensions.insert( std::pair<std::string, std::string>(bcp47_key, bcp47_value)); builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value); } } } status = U_ZERO_ERROR; *icu_locale = builder.build(status); return extensions; } // ecma402/#sec-lookupmatcher std::string LookupMatcher(Isolate* isolate, const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales) { // 1. Let result be a new Record. std::string result; // 2. For each element locale of requestedLocales in List order, do for (const std::string& locale : requested_locales) { // 2. a. Let noExtensionsLocale be the String value that is locale // with all Unicode locale extension sequences removed. ParsedLocale parsed_locale = ParseBCP47Locale(locale); std::string no_extensions_locale = parsed_locale.no_extensions_locale; // 2. b. Let availableLocale be // BestAvailableLocale(availableLocales, noExtensionsLocale). std::string available_locale = BestAvailableLocale(available_locales, no_extensions_locale); // 2. c. If availableLocale is not undefined, append locale to the // end of subset. if (!available_locale.empty()) { // Note: The following steps are not performed here because we // can use ICU to parse the unicode locale extension sequence // as part of Intl::ResolveLocale. // // There's no need to separate the unicode locale extensions // right here. Instead just return the available locale with the // extensions. // // 2. c. i. Set result.[[locale]] to availableLocale. // 2. c. ii. If locale and noExtensionsLocale are not the same // String value, then // 2. c. ii. 1. Let extension be the String value consisting of // the first substring of locale that is a Unicode locale // extension sequence. // 2. c. ii. 2. Set result.[[extension]] to extension. // 2. c. iii. Return result. return available_locale + parsed_locale.extension; } } // 3. Let defLocale be DefaultLocale(); // 4. Set result.[[locale]] to defLocale. // 5. Return result. return isolate->DefaultLocale(); } } // namespace // This function doesn't correspond exactly with the spec. Instead // we use ICU to do all the string manipulations that the spec // peforms. // // The spec uses this function to normalize values for various // relevant extension keys (such as disallowing "search" for // collation). Instead of doing this here, we let the callers of // this method perform such normalization. // // ecma402/#sec-resolvelocale Maybe<Intl::ResolvedLocale> Intl::ResolveLocale( Isolate* isolate, const std::set<std::string>& available_locales, const std::vector<std::string>& requested_locales, MatcherOption matcher, const std::set<std::string>& relevant_extension_keys) { std::string locale; if (matcher == Intl::MatcherOption::kBestFit && v8_flags.harmony_intl_best_fit_matcher) { locale = BestFitMatcher(isolate, available_locales, requested_locales); } else { locale = LookupMatcher(isolate, available_locales, requested_locales); } Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale); MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>()); icu::Locale icu_locale = maybe_icu_locale.FromJust(); std::map<std::string, std::string> extensions = LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys); std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust(); // TODO(gsathya): Remove privateuse subtags from extensions. return Just( Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions}); } Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator( Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) { text = String::Flatten(isolate, text); icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>( Intl::ToICUUnicodeString(isolate, text).clone()); Handle<Managed<icu::UnicodeString>> new_u_text = Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text); break_iterator->setText(*u_text); return new_u_text; } // ecma262 #sec-string.prototype.normalize MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string, Handle<Object> form_input) { const char* form_name; UNormalization2Mode form_mode; if (IsUndefined(*form_input, isolate)) { // default is FNC form_name = "nfc"; form_mode = UNORM2_COMPOSE; } else { Handle<String> form; ASSIGN_RETURN_ON_EXCEPTION(isolate, form, Object::ToString(isolate, form_input), String); if (String::Equals(isolate, form, isolate->factory()->NFC_string())) { form_name = "nfc"; form_mode = UNORM2_COMPOSE; } else if (String::Equals(isolate, form, isolate->factory()->NFD_string())) { form_name = "nfc"; form_mode = UNORM2_DECOMPOSE; } else if (String::Equals(isolate, form, isolate->factory()->NFKC_string())) { form_name = "nfkc"; form_mode = UNORM2_COMPOSE; } else if (String::Equals(isolate, form, isolate->factory()->NFKD_string())) { form_name = "nfkc"; form_mode = UNORM2_DECOMPOSE; } else { Handle<String> valid_forms = isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD"); THROW_NEW_ERROR( isolate, NewRangeError(MessageTemplate::kNormalizationForm, valid_forms), String); } } int length = string->length(); string = String::Flatten(isolate, string); icu::UnicodeString result; std::unique_ptr<base::uc16[]> sap; UErrorCode status = U_ZERO_ERROR; icu::UnicodeString input = ToICUUnicodeString(isolate, string); // Getting a singleton. Should not free it. const icu::Normalizer2* normalizer = icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status); DCHECK(U_SUCCESS(status)); DCHECK_NOT_NULL(normalizer); int32_t normalized_prefix_length = normalizer->spanQuickCheckYes(input, status); // Quick return if the input is already normalized. if (length == normalized_prefix_length) return string; icu::UnicodeString unnormalized = input.tempSubString(normalized_prefix_length); // Read-only alias of the normalized prefix. result.setTo(false, input.getBuffer(), normalized_prefix_length); // copy-on-write; normalize the suffix and append to |result|. normalizer->normalizeSecondAndAppend(result, unnormalized, status); if (U_FAILURE(status)) { THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String); } return Intl::ToString(isolate, result); } // ICUTimezoneCache calls out to ICU for TimezoneCache // functionality in a straightforward way. class ICUTimezoneCache : public base::TimezoneCache { public: ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); } ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); } const char* LocalTimezone(double time_ms) override; double DaylightSavingsOffset(double time_ms) override; double LocalTimeOffset(double time_ms, bool is_utc) override; void Clear(TimeZoneDetection time_zone_detection) override; private: icu::TimeZone* GetTimeZone(); bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset, int32_t* dst_offset); icu::TimeZone* timezone_; std::string timezone_name_; std::string dst_timezone_name_; }; const char* ICUTimezoneCache::LocalTimezone(double time_ms) { bool is_dst = DaylightSavingsOffset(time_ms) != 0; std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_; if (name->empty()) { icu::UnicodeString result; GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result); result += '\0'; icu::StringByteSink<std::string> byte_sink(name); result.toUTF8(byte_sink); } DCHECK(!name->empty()); return name->c_str(); } icu::TimeZone* ICUTimezoneCache::GetTimeZone() { if (timezone_ == nullptr) { timezone_ = icu::TimeZone::createDefault(); } return timezone_; } bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset, int32_t* dst_offset) { UErrorCode status = U_ZERO_ERROR; if (is_utc) { GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status); } else { // Note that casting TimeZone to BasicTimeZone is safe because we know that // icu::TimeZone used here is a BasicTimeZone. static_cast<const icu::BasicTimeZone*>(GetTimeZone()) ->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_FORMER, UCAL_TZ_LOCAL_FORMER, *raw_offset, *dst_offset, status); } return U_SUCCESS(status); } double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) { int32_t raw_offset, dst_offset; if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0; return dst_offset; } double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) { int32_t raw_offset, dst_offset; if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0; return raw_offset + dst_offset; } void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) { delete timezone_; timezone_ = nullptr; timezone_name_.clear(); dst_timezone_name_.clear(); if (time_zone_detection == TimeZoneDetection::kRedetect) { icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone()); } } base::TimezoneCache* Intl::CreateTimeZoneCache() { return v8_flags.icu_timezone_data ? new ICUTimezoneCache() : base::OS::CreateTimezoneCache(); } Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate, Handle<JSReceiver> options, const char* method_name) { return GetStringOption<Intl::MatcherOption>( isolate, options, "localeMatcher", method_name, {"best fit", "lookup"}, {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup}, Intl::MatcherOption::kBestFit); } Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate, Handle<JSReceiver> options, const char* method_name, std::unique_ptr<char[]>* result) { const std::vector<const char*> empty_values = {}; Maybe<bool> maybe = GetStringOption(isolate, options, "numberingSystem", empty_values, method_name, result); MAYBE_RETURN(maybe, Nothing<bool>()); if (maybe.FromJust() && *result != nullptr) { if (!IsWellFormedNumberingSystem(result->get())) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError( MessageTemplate::kInvalid, isolate->factory()->numberingSystem_string(), isolate->factory()->NewStringFromAsciiChecked(result->get())), Nothing<bool>()); } return Just(true); } return Just(false); } const std::set<std::string>& Intl::GetAvailableLocales() { static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales = LAZY_INSTANCE_INITIALIZER; return available_locales.Pointer()->Get(); } namespace { struct CheckCalendar { static const char* key() { return "calendar"; } static const char* path() { return nullptr; } }; } // namespace const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() { static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type available_locales = LAZY_INSTANCE_INITIALIZER; return available_locales.Pointer()->Get(); } constexpr uint16_t kInfinityChar = 0x221e; Handle<String> Intl::NumberFieldToType(Isolate* isolate, const NumberFormatSpan& part, const icu::UnicodeString& text, bool is_nan) { switch (static_cast<UNumberFormatFields>(part.field_id)) { case UNUM_INTEGER_FIELD: if (is_nan) return isolate->factory()->nan_string(); if (text.charAt(part.begin_pos) == kInfinityChar || // en-US-POSIX output "INF" for Infinity (part.end_pos - part.begin_pos == 3 && text.tempSubString(part.begin_pos, 3) == "INF")) { return isolate->factory()->infinity_string(); } return isolate->factory()->integer_string(); case UNUM_FRACTION_FIELD: return isolate->factory()->fraction_string(); case UNUM_DECIMAL_SEPARATOR_FIELD: return isolate->factory()->decimal_string(); case UNUM_GROUPING_SEPARATOR_FIELD: return isolate->factory()->group_string(); case UNUM_CURRENCY_FIELD: return isolate->factory()->currency_string(); case UNUM_PERCENT_FIELD: return isolate->factory()->percentSign_string(); case UNUM_SIGN_FIELD: return (text.charAt(part.begin_pos) == '+') ? isolate->factory()->plusSign_string() : isolate->factory()->minusSign_string(); case UNUM_EXPONENT_SYMBOL_FIELD: return isolate->factory()->exponentSeparator_string(); case UNUM_EXPONENT_SIGN_FIELD: return isolate->factory()->exponentMinusSign_string(); case UNUM_EXPONENT_FIELD: return isolate->factory()->exponentInteger_string(); case UNUM_PERMILL_FIELD: // We're not creating any permill formatter, and it's not even clear how // that would be possible with the ICU API. UNREACHABLE(); case UNUM_COMPACT_FIELD: return isolate->factory()->compact_string(); case UNUM_MEASURE_UNIT_FIELD: return isolate->factory()->unit_string(); case UNUM_APPROXIMATELY_SIGN_FIELD: return isolate->factory()->approximatelySign_string(); default: UNREACHABLE(); } } // A helper function to convert the FormattedValue for several Intl objects. MaybeHandle<String> Intl::FormattedToString( Isolate* isolate, const icu::FormattedValue& formatted) { UErrorCode status = U_ZERO_ERROR; icu::UnicodeString result = formatted.toString(status); if (U_FAILURE(status)) { THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String); } return Intl::ToString(isolate, result); } MaybeHandle<JSArray> Intl::ToJSArray( Isolate* isolate, const char* unicode_key, icu::StringEnumeration* enumeration, const std::function<bool(const char*)>& removes, bool sort) { UErrorCode status = U_ZERO_ERROR; std::vector<std::string> array; for (const char* item = enumeration->next(nullptr, status); U_SUCCESS(status) && item != nullptr; item = enumeration->next(nullptr, status)) { if (unicode_key != nullptr) { item = uloc_toUnicodeLocaleType(unicode_key, item); } if (removes == nullptr || !(removes)(item)) { array.push_back(item); } } if (sort) { std::sort(array.begin(), array.end()); } return VectorToJSArray(isolate, array); } bool Intl::RemoveCollation(const char* collation) { return strcmp("standard", collation) == 0 || strcmp("search", collation) == 0; } // See the list in ecma402 #sec-issanctionedsimpleunitidentifier std::set<std::string> Intl::SanctionedSimpleUnits() { return std::set<std::string>( {"acre", "bit", "byte", "celsius", "centimeter", "day", "degree", "fahrenheit", "fluid-ounce", "foot", "gallon", "gigabit", "gigabyte", "gram", "hectare", "hour", "inch", "kilobit", "kilobyte", "kilogram", "kilometer", "liter", "megabit", "megabyte", "meter", "microsecond", "mile", "mile-scandinavian", "millimeter", "milliliter", "millisecond", "minute", "month", "nanosecond", "ounce", "percent", "petabyte", "pound", "second", "stone", "terabit", "terabyte", "week", "yard", "year"}); } // ecma-402/#sec-isvalidtimezonename namespace { bool IsUnicodeStringValidTimeZoneName(const icu::UnicodeString& id) { UErrorCode status = U_ZERO_ERROR; icu::UnicodeString canonical; icu::TimeZone::getCanonicalID(id, canonical, status); return U_SUCCESS(status) && canonical != icu::UnicodeString("Etc/Unknown", -1, US_INV); } } // namespace MaybeHandle<String> Intl::CanonicalizeTimeZoneName(Isolate* isolate, Handle<String> identifier) { UErrorCode status = U_ZERO_ERROR; std::string time_zone = JSDateTimeFormat::CanonicalizeTimeZoneID(identifier->ToCString().get()); icu::UnicodeString time_zone_ustring = icu::UnicodeString(time_zone.c_str(), -1, US_INV); icu::UnicodeString canonical; icu::TimeZone::getCanonicalID(time_zone_ustring, canonical, status); CHECK(U_SUCCESS(status)); return JSDateTimeFormat::TimeZoneIdToString(isolate, canonical); } bool Intl::IsValidTimeZoneName(Isolate* isolate, Handle<String> id) { std::string time_zone = JSDateTimeFormat::CanonicalizeTimeZoneID(id->ToCString().get()); icu::UnicodeString time_zone_ustring = icu::UnicodeString(time_zone.c_str(), -1, US_INV); return IsUnicodeStringValidTimeZoneName(time_zone_ustring); } bool Intl::IsValidTimeZoneName(const icu::TimeZone& tz) { icu::UnicodeString id; tz.getID(id); return IsUnicodeStringValidTimeZoneName(id); } // Function to support Temporal std::string Intl::TimeZoneIdFromIndex(int32_t index) { if (index == JSTemporalTimeZone::kUTCTimeZoneIndex) { return "UTC"; } std::unique_ptr<icu::StringEnumeration> enumeration( icu::TimeZone::createEnumeration()); int32_t curr = 0; const char* id; UErrorCode status = U_ZERO_ERROR; while (U_SUCCESS(status) && curr < index && ((id = enumeration->next(nullptr, status)) != nullptr)) { CHECK(U_SUCCESS(status)); curr++; } CHECK(U_SUCCESS(status)); CHECK(id != nullptr); return id; } int32_t Intl::GetTimeZoneIndex(Isolate* isolate, Handle<String> identifier) { if (identifier->Equals(*isolate->factory()->UTC_string())) { return 0; } std::string identifier_str(identifier->ToCString().get()); std::unique_ptr<icu::TimeZone> tz( icu::TimeZone::createTimeZone(identifier_str.c_str())); if (!IsValidTimeZoneName(*tz)) { return -1; } std::unique_ptr<icu::StringEnumeration> enumeration( icu::TimeZone::createEnumeration()); int32_t curr = 0; const char* id; UErrorCode status = U_ZERO_ERROR; while (U_SUCCESS(status) && (id = enumeration->next(nullptr, status)) != nullptr) { curr++; if (identifier_str == id) { return curr; } } CHECK(U_SUCCESS(status)); // We should not reach here, the !IsValidTimeZoneName should return earlier UNREACHABLE(); } Intl::FormatRangeSourceTracker::FormatRangeSourceTracker() { start_[0] = start_[1] = limit_[0] = limit_[1] = 0; } void Intl::FormatRangeSourceTracker::Add(int32_t field, int32_t start, int32_t limit) { DCHECK_LT(field, 2); start_[field] = start; limit_[field] = limit; } Intl::FormatRangeSource Intl::FormatRangeSourceTracker::GetSource( int32_t start, int32_t limit) const { FormatRangeSource source = FormatRangeSource::kShared; if (FieldContains(0, start, limit)) { source = FormatRangeSource::kStartRange; } else if (FieldContains(1, start, limit)) { source = FormatRangeSource::kEndRange; } return source; } bool Intl::FormatRangeSourceTracker::FieldContains(int32_t field, int32_t start, int32_t limit) const { DCHECK_LT(field, 2); return (start_[field] <= start) && (start <= limit_[field]) && (start_[field] <= limit) && (limit <= limit_[field]); } Handle<String> Intl::SourceString(Isolate* isolate, FormatRangeSource source) { switch (source) { case FormatRangeSource::kShared: return ReadOnlyRoots(isolate).shared_string_handle(); case FormatRangeSource::kStartRange: return ReadOnlyRoots(isolate).startRange_string_handle(); case FormatRangeSource::kEndRange: return ReadOnlyRoots(isolate).endRange_string_handle(); } } Handle<String> Intl::DefaultTimeZone(Isolate* isolate) { icu::UnicodeString id; { std::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createDefault()); tz->getID(id); } UErrorCode status = U_ZERO_ERROR; icu::UnicodeString canonical; icu::TimeZone::getCanonicalID(id, canonical, status); DCHECK(U_SUCCESS(status)); return JSDateTimeFormat::TimeZoneIdToString(isolate, canonical) .ToHandleChecked(); } namespace { const icu::BasicTimeZone* CreateBasicTimeZoneFromIndex( int32_t time_zone_index) { DCHECK_NE(time_zone_index, 0); return static_cast<const icu::BasicTimeZone*>( icu::TimeZone::createTimeZone(icu::UnicodeString( Intl::TimeZoneIdFromIndex(time_zone_index).c_str(), -1, US_INV))); } // ICU only support TimeZone information in millisecond but Temporal require // nanosecond. For most of the case, we find a approximate millisecond by // floor to the millisecond just past the nanosecond_epoch. For negative epoch // value, the BigInt Divide will floor closer to zero so we need to minus 1 if // the remainder is not zero. For the case of finding previous transition, we // need to ceil to the millisecond in the near future of the nanosecond_epoch. enum class Direction { kPast, kFuture }; int64_t ApproximateMillisecondEpoch(Isolate* isolate, Handle<BigInt> nanosecond_epoch, Direction direction = Direction::kPast) { Handle<BigInt> one_million = BigInt::FromUint64(isolate, 1000000); int64_t ms = BigInt::Divide(isolate, nanosecond_epoch, one_million) .ToHandleChecked() ->AsInt64(); Handle<BigInt> remainder = BigInt::Remainder(isolate, nanosecond_epoch, one_million) .ToHandleChecked(); // If the nanosecond_epoch is not on the exact millisecond if (remainder->ToBoolean()) { if (direction == Direction::kPast) { if (remainder->IsNegative()) { // If the remaninder is negative, we know we have an negative epoch // We need to decrease one millisecond. // Move to the previous millisecond ms -= 1; } } else { if (!remainder->IsNegative()) { // Move to the future millisecond ms += 1; } } } return ms; } // Helper function to convert the milliseconds in int64_t // to a BigInt in nanoseconds. Handle<BigInt> MillisecondToNanosecond(Isolate* isolate, int64_t ms) { return BigInt::Multiply(isolate, BigInt::FromInt64(isolate, ms), BigInt::FromUint64(isolate, 1000000)) .ToHandleChecked(); } } // namespace Handle<Object> Intl::GetTimeZoneOffsetTransitionNanoseconds( Isolate* isolate, int32_t time_zone_index, Handle<BigInt> nanosecond_epoch, Intl::Transition transition) { std::unique_ptr<const icu::BasicTimeZone> basic_time_zone( CreateBasicTimeZoneFromIndex(time_zone_index)); icu::TimeZoneTransition icu_transition; UBool has_transition; switch (transition) { case Intl::Transition::kNext: has_transition = basic_time_zone->getNextTransition( ApproximateMillisecondEpoch(isolate, nanosecond_epoch), false, icu_transition); break; case Intl::Transition::kPrevious: has_transition = basic_time_zone->getPreviousTransition( ApproximateMillisecondEpoch(isolate, nanosecond_epoch, Direction::kFuture), false, icu_transition); break; } if (!has_transition) { return isolate->factory()->null_value(); } // #sec-temporal-getianatimezonenexttransition and // #sec-temporal-getianatimezoneprevioustransition states: // "The operation returns null if no such transition exists for which t ≤ // ℤ(nsMaxInstant)." and "The operation returns null if no such transition // exists for which t ≥ ℤ(nsMinInstant)." // // nsMinInstant = -nsMaxInstant = -8.64 × 10^21 => msMinInstant = -8.64 x // 10^15 constexpr int64_t kMsMinInstant = -8.64e15; // nsMaxInstant = 10^8 × nsPerDay = 8.64 × 10^21 => msMaxInstant = 8.64 x // 10^15 constexpr int64_t kMsMaxInstant = 8.64e15; int64_t time_ms = static_cast<int64_t>(icu_transition.getTime()); if (time_ms < kMsMinInstant || time_ms > kMsMaxInstant) { return isolate->factory()->null_value(); } return MillisecondToNanosecond(isolate, time_ms); } std::vector<Handle<BigInt>> Intl::GetTimeZonePossibleOffsetNanoseconds( Isolate* isolate, int32_t time_zone_index, Handle<BigInt> nanosecond_epoch) { std::unique_ptr<const icu::BasicTimeZone> basic_time_zone( CreateBasicTimeZoneFromIndex(time_zone_index)); int64_t time_ms = ApproximateMillisecondEpoch(isolate, nanosecond_epoch); int32_t raw_offset; int32_t dst_offset; UErrorCode status = U_ZERO_ERROR; basic_time_zone->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_FORMER, UCAL_TZ_LOCAL_FORMER, raw_offset, dst_offset, status); DCHECK(U_SUCCESS(status)); // offset for time_ms interpretted as before a time zone // transition int64_t offset_former = raw_offset + dst_offset; basic_time_zone->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_LATTER, UCAL_TZ_LOCAL_LATTER, raw_offset, dst_offset, status); DCHECK(U_SUCCESS(status)); // offset for time_ms interpretted as after a time zone // transition int64_t offset_latter = raw_offset + dst_offset; std::vector<Handle<BigInt>> result; if (offset_former == offset_latter) { // For most of the time, when either interpretation are the same, we are not // in a moment of offset transition based on rule changing: Just return that // value. result.push_back(MillisecondToNanosecond(isolate, offset_former)); } else if (offset_former > offset_latter) { // When the input represents a local time repeating multiple times at a // negative time zone transition (e.g. when the daylight saving time ends // or the time zone offset is decreased due to a time zone rule change). result.push_back(MillisecondToNanosecond(isolate, offset_former)); result.push_back(MillisecondToNanosecond(isolate, offset_latter)); } else { // If the offset after the transition is greater than the offset before the // transition, that mean it is in the moment the time "skip" an hour, or two // (or six in a Time Zone in south pole) in that case there are no possible // Time Zone offset for that moment and nothing will be added to the result. } return result; } int64_t Intl::GetTimeZoneOffsetNanoseconds(Isolate* isolate, int32_t time_zone_index, Handle<BigInt> nanosecond_epoch) { std::unique_ptr<const icu::BasicTimeZone> basic_time_zone( CreateBasicTimeZoneFromIndex(time_zone_index)); int64_t time_ms = ApproximateMillisecondEpoch(isolate, nanosecond_epoch); int32_t raw_offset; int32_t dst_offset; UErrorCode status = U_ZERO_ERROR; basic_time_zone->getOffset(time_ms, false, raw_offset, dst_offset, status); DCHECK(U_SUCCESS(status)); // Turn ms into ns return static_cast<int64_t>(raw_offset + dst_offset) * 1000000; } } // namespace internal } // namespace v8