/* * Copyright (C) 2015 Andy VanWagoner (thetalecrafter@gmail.com) * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "IntlCollator.h" #if ENABLE(INTL) #include "Error.h" #include "IntlCollatorConstructor.h" #include "IntlObject.h" #include "JSBoundFunction.h" #include "JSCJSValueInlines.h" #include "JSCellInlines.h" #include "ObjectConstructor.h" #include "SlotVisitorInlines.h" #include "StructureInlines.h" #include #include namespace JSC { const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, 0, CREATE_METHOD_TABLE(IntlCollator) }; // FIXME: Implement kf (caseFirst). static const char* const relevantExtensionKeys[2] = { "co", "kn" }; static const size_t indexOfExtensionKeyCo = 0; static const size_t indexOfExtensionKeyKn = 1; void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const { if (collator) ucol_close(collator); } IntlCollator* IntlCollator::create(VM& vm, Structure* structure) { IntlCollator* format = new (NotNull, allocateCell(vm.heap)) IntlCollator(vm, structure); format->finishCreation(vm); return format; } Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) { return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); } IntlCollator::IntlCollator(VM& vm, Structure* structure) : JSDestructibleObject(vm, structure) { } void IntlCollator::finishCreation(VM& vm) { Base::finishCreation(vm); ASSERT(inherits(info())); } void IntlCollator::destroy(JSCell* cell) { static_cast(cell)->IntlCollator::~IntlCollator(); } void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) { IntlCollator* thisObject = jsCast(cell); ASSERT_GC_OBJECT_INHERITS(thisObject, info()); Base::visitChildren(thisObject, visitor); visitor.append(&thisObject->m_boundCompare); } static Vector sortLocaleData(const String& locale, size_t keyIndex) { // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) Vector keyLocaleData; switch (keyIndex) { case indexOfExtensionKeyCo: { // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." keyLocaleData.append({ }); UErrorCode status = U_ZERO_ERROR; UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status); if (U_SUCCESS(status)) { const char* collation; while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) { // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." if (!strcmp(collation, "standard") || !strcmp(collation, "search")) continue; // Map keyword values to BCP 47 equivalents. if (!strcmp(collation, "dictionary")) collation = "dict"; else if (!strcmp(collation, "gb2312han")) collation = "gb2312"; else if (!strcmp(collation, "phonebook")) collation = "phonebk"; else if (!strcmp(collation, "traditional")) collation = "trad"; keyLocaleData.append(collation); } uenum_close(enumeration); } break; } case indexOfExtensionKeyKn: keyLocaleData.reserveInitialCapacity(2); keyLocaleData.uncheckedAppend(ASCIILiteral("false")); keyLocaleData.uncheckedAppend(ASCIILiteral("true")); break; default: ASSERT_NOT_REACHED(); } return keyLocaleData; } static Vector searchLocaleData(const String&, size_t keyIndex) { // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) Vector keyLocaleData; switch (keyIndex) { case indexOfExtensionKeyCo: // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." keyLocaleData.reserveInitialCapacity(1); keyLocaleData.append({ }); break; case indexOfExtensionKeyKn: keyLocaleData.reserveInitialCapacity(2); keyLocaleData.uncheckedAppend(ASCIILiteral("false")); keyLocaleData.uncheckedAppend(ASCIILiteral("true")); break; default: ASSERT_NOT_REACHED(); } return keyLocaleData; } void IntlCollator::initializeCollator(ExecState& state, JSValue locales, JSValue optionsValue) { // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402 2.0) // 1. If collator has an [[initializedIntlObject]] internal slot with value true, throw a TypeError exception. // 2. Set collator.[[initializedIntlObject]] to true. // 3. Let requestedLocales be CanonicalizeLocaleList(locales). auto requestedLocales = canonicalizeLocaleList(state, locales); // 4. ReturnIfAbrupt(requestedLocales). if (state.hadException()) return; // 5. If options is undefined, then JSObject* options; if (optionsValue.isUndefined()) { // a. Let options be ObjectCreate(%ObjectPrototype%). options = constructEmptyObject(&state); } else { // 6. Else // a. Let options be ToObject(options). options = optionsValue.toObject(&state); // b. ReturnIfAbrupt(options). if (state.hadException()) return; } // 7. Let u be GetOption(options, "usage", "string", «"sort", "search"», "sort"). String usageString = intlStringOption(state, options, state.vm().propertyNames->usage, { "sort", "search" }, "usage must be either \"sort\" or \"search\"", "sort"); // 8. ReturnIfAbrupt(u). if (state.hadException()) return; // 9. Set collator.[[usage]] to u. if (usageString == "sort") m_usage = Usage::Sort; else if (usageString == "search") m_usage = Usage::Search; else ASSERT_NOT_REACHED(); // 10. If u is "sort", then // a. Let localeData be the value of %Collator%.[[sortLocaleData]]; // 11. Else // a. Let localeData be the value of %Collator%.[[searchLocaleData]]. Vector (*localeData)(const String&, size_t); if (m_usage == Usage::Sort) localeData = sortLocaleData; else localeData = searchLocaleData; // 12. Let opt be a new Record. HashMap opt; // 13. Let matcher be GetOption(options, "localeMatcher", "string", «"lookup", "best fit"», "best fit"). String matcher = intlStringOption(state, options, state.vm().propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit"); // 14. ReturnIfAbrupt(matcher). if (state.hadException()) return; // 15. Set opt.[[localeMatcher]] to matcher. opt.add(ASCIILiteral("localeMatcher"), matcher); // 16. For each row in Table 1, except the header row, do: // a. Let key be the name given in the Key column of the row. // b. Let prop be the name given in the Property column of the row. // c. Let type be the string given in the Type column of the row. // d. Let list be a List containing the Strings given in the Values column of the row, or undefined if no strings are given. // e. Let value be GetOption(options, prop, type, list, undefined). // f. ReturnIfAbrupt(value). // g. If the string given in the Type column of the row is "boolean" and value is not undefined, then // i. Let value be ToString(value). // ii. ReturnIfAbrupt(value). // h. Set opt.[[]] to value. { String numericString; bool usesFallback; bool numeric = intlBooleanOption(state, options, state.vm().propertyNames->numeric, usesFallback); if (state.hadException()) return; if (!usesFallback) numericString = ASCIILiteral(numeric ? "true" : "false"); opt.add(ASCIILiteral("kn"), numericString); } { String caseFirst = intlStringOption(state, options, state.vm().propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr); if (state.hadException()) return; opt.add(ASCIILiteral("kf"), caseFirst); } // 17. Let relevantExtensionKeys be the value of %Collator%.[[relevantExtensionKeys]]. // 18. Let r be ResolveLocale(%Collator%.[[availableLocales]], requestedLocales, opt, relevantExtensionKeys, localeData). auto& availableLocales = state.callee()->globalObject()->intlCollatorAvailableLocales(); auto result = resolveLocale(state, availableLocales, requestedLocales, opt, relevantExtensionKeys, WTF_ARRAY_LENGTH(relevantExtensionKeys), localeData); // 19. Set collator.[[locale]] to the value of r.[[locale]]. m_locale = result.get(ASCIILiteral("locale")); // 20. Let k be 0. // 21. Let lenValue be Get(relevantExtensionKeys, "length"). // 22. Let len be ToLength(lenValue). // 23. Repeat while k < len: // a. Let Pk be ToString(k). // b. Let key be Get(relevantExtensionKeys, Pk). // c. ReturnIfAbrupt(key). // d. If key is "co", then // i. Let property be "collation". // ii. Let value be the value of r.[[co]]. // iii. If value is null, let value be "default". // e. Else use the row of Table 1 that contains the value of key in the Key column: // i. Let property be the name given in the Property column of the row. // ii. Let value be the value of r.[[]]. // iii. If the name given in the Type column of the row is "boolean", let value be the result of comparing value with "true". // f. Set collator.[[]] to value. // g. Increase k by 1. const String& collation = result.get(ASCIILiteral("co")); m_collation = collation.isNull() ? ASCIILiteral("default") : collation; m_numeric = (result.get(ASCIILiteral("kn")) == "true"); // 24. Let s be GetOption(options, "sensitivity", "string", «"base", "accent", "case", "variant"», undefined). String sensitivityString = intlStringOption(state, options, state.vm().propertyNames->sensitivity, { "base", "accent", "case", "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"", nullptr); // 25. ReturnIfAbrupt(s). if (state.hadException()) return; // 26. If s is undefined, then // a. If u is "sort", then let s be "variant". // b. Else // i. Let dataLocale be the value of r.[[dataLocale]]. // ii. Let dataLocaleData be Get(localeData, dataLocale). // iii. Let s be Get(dataLocaleData, "sensitivity"). // 10.2.3 "[[searchLocaleData]][locale] must have a sensitivity property with a String value equal to "base", "accent", "case", or "variant" for all locale values." // 27. Set collator.[[sensitivity]] to s. if (sensitivityString == "base") m_sensitivity = Sensitivity::Base; else if (sensitivityString == "accent") m_sensitivity = Sensitivity::Accent; else if (sensitivityString == "case") m_sensitivity = Sensitivity::Case; else m_sensitivity = Sensitivity::Variant; // 28. Let ip be GetOption(options, "ignorePunctuation", "boolean", undefined, false). bool usesFallback; bool ignorePunctuation = intlBooleanOption(state, options, state.vm().propertyNames->ignorePunctuation, usesFallback); if (usesFallback) ignorePunctuation = false; // 29. ReturnIfAbrupt(ip). if (state.hadException()) return; // 30. Set collator.[[ignorePunctuation]] to ip. m_ignorePunctuation = ignorePunctuation; // 31. Set collator.[[boundCompare]] to undefined. // 32. Set collator.[[initializedCollator]] to true. m_initializedCollator = true; // 33. Return collator. } void IntlCollator::createCollator(ExecState& state) { ASSERT(!m_collator); if (!m_initializedCollator) { initializeCollator(state, jsUndefined(), jsUndefined()); ASSERT(!state.hadException()); } UErrorCode status = U_ZERO_ERROR; auto collator = std::unique_ptr(ucol_open(m_locale.utf8().data(), &status)); if (U_FAILURE(status)) return; UColAttributeValue strength = UCOL_PRIMARY; UColAttributeValue caseLevel = UCOL_OFF; switch (m_sensitivity) { case Sensitivity::Base: break; case Sensitivity::Accent: strength = UCOL_SECONDARY; break; case Sensitivity::Case: caseLevel = UCOL_ON; break; case Sensitivity::Variant: strength = UCOL_TERTIARY; break; default: ASSERT_NOT_REACHED(); } ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status); ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be // ignored. There is currently no way to ignore only punctuation. ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); // "The method is required to return 0 when comparing Strings that are considered canonically // equivalent by the Unicode standard." ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); if (U_FAILURE(status)) return; m_collator = WTFMove(collator); } JSValue IntlCollator::compareStrings(ExecState& state, StringView x, StringView y) { // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0) if (!m_collator) { createCollator(state); if (!m_collator) return state.vm().throwException(&state, createError(&state, ASCIILiteral("Failed to compare strings."))); } UErrorCode status = U_ZERO_ERROR; UCharIterator iteratorX = createIterator(x); UCharIterator iteratorY = createIterator(y); auto result = ucol_strcollIter(m_collator.get(), &iteratorX, &iteratorY, &status); if (U_FAILURE(status)) return state.vm().throwException(&state, createError(&state, ASCIILiteral("Failed to compare strings."))); return jsNumber(result); } const char* IntlCollator::usageString(Usage usage) { switch (usage) { case Usage::Sort: return "sort"; case Usage::Search: return "search"; } ASSERT_NOT_REACHED(); return nullptr; } const char* IntlCollator::sensitivityString(Sensitivity sensitivity) { switch (sensitivity) { case Sensitivity::Base: return "base"; case Sensitivity::Accent: return "accent"; case Sensitivity::Case: return "case"; case Sensitivity::Variant: return "variant"; } ASSERT_NOT_REACHED(); return nullptr; } JSObject* IntlCollator::resolvedOptions(ExecState& state) { // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0) // The function returns a new object whose properties and attributes are set as if // constructed by an object literal assigning to each of the following properties the // value of the corresponding internal slot of this Collator object (see 10.4): locale, // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]] // internal slot of the standard built-in object that is the initial value of // Intl.Collator. if (!m_initializedCollator) { initializeCollator(state, jsUndefined(), jsUndefined()); ASSERT(!state.hadException()); } VM& vm = state.vm(); JSObject* options = constructEmptyObject(&state); options->putDirect(vm, vm.propertyNames->locale, jsString(&state, m_locale)); options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(&state, ASCIILiteral(usageString(m_usage)))); options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(&state, ASCIILiteral(sensitivityString(m_sensitivity)))); options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); options->putDirect(vm, vm.propertyNames->collation, jsString(&state, m_collation)); options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); return options; } void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) { m_boundCompare.set(vm, this, format); } } // namespace JSC #endif // ENABLE(INTL)