// © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING #include #include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" #include "charstr.h" #include "uresimp.h" #include "measunit_impl.h" #include "number_longnames.h" #include "number_microprops.h" #include #include "cstring.h" #include "util.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; namespace { /** * Display Name (this format has no placeholder). * * Used as an index into the LongNameHandler::simpleFormats array. Units * resources cover the normal set of PluralRules keys, as well as `dnam` and * `per` forms. */ constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; /** * "per" form (e.g. "{0} per day" is day's "per" form). * * Used as an index into the LongNameHandler::simpleFormats array. Units * resources cover the normal set of PluralRules keys, as well as `dnam` and * `per` forms. */ constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; /** * Gender of the word, in languages with grammatical gender. */ constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; // Number of keys in the array populated by PluralTableSink. constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; // TODO(icu-units#28): load this list from resources, after creating a "&set" // function for use in ldml2icu rules. const int32_t GENDER_COUNT = 7; const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", "masculine", "neuter", "personal"}; // Converts a UnicodeString to a const char*, either pointing to a string in // gGenders, or pointing to an empty string if an appropriate string was not // found. const char *getGenderString(UnicodeString uGender, UErrorCode status) { if (uGender.length() == 0) { return ""; } CharString gender; gender.appendInvariantChars(uGender, status); if (U_FAILURE(status)) { return ""; } int32_t first = 0; int32_t last = GENDER_COUNT; while (first < last) { int32_t mid = (first + last) / 2; int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]); if (cmp == 0) { return gGenders[mid]; } else if (cmp > 0) { first = mid + 1; } else if (cmp < 0) { last = mid; } } // We don't return an error in case our gGenders list is incomplete in // production. // // TODO(icu-units#28): a unit test checking all locales' genders are covered // by gGenders? Else load a complete list of genders found in // grammaticalFeatures in an initOnce. return ""; } // Returns the array index that corresponds to the given pluralKeyword. static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { // pluralKeyword can also be "dnam", "per", or "gender" switch (*pluralKeyword) { case 'd': if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) { return DNAM_INDEX; } break; case 'g': if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) { return GENDER_INDEX; } break; case 'p': if (uprv_strcmp(pluralKeyword + 1, "er") == 0) { return PER_INDEX; } break; default: break; } StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); return plural; } // Selects a string out of the `strings` array which corresponds to the // specified plural form, with fallback to the OTHER form. // // The `strings` array must have ARRAY_LENGTH items: one corresponding to each // of the plural forms, plus a display name ("dnam") and a "per" form. static UnicodeString getWithPlural( const UnicodeString* strings, StandardPlural::Form plural, UErrorCode& status) { UnicodeString result = strings[plural]; if (result.isBogus()) { result = strings[StandardPlural::Form::OTHER]; } if (result.isBogus()) { // There should always be data in the "other" plural variant. status = U_INTERNAL_PROGRAM_ERROR; } return result; } enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END }; /** * Returns three outputs extracted from pattern. * * @param coreUnit is extracted as per Extract(...) in the spec: * https://unicode.org/reports/tr35/tr35-general.html#compound-units * @param PlaceholderPosition indicates where in the string the placeholder was * found. * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar * contains the space character (if any) that separated the placeholder from * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one * space character is considered. */ void extractCorePattern(const UnicodeString &pattern, UnicodeString &coreUnit, PlaceholderPosition &placeholderPosition, UChar &joinerChar) { joinerChar = 0; int32_t len = pattern.length(); if (pattern.startsWith(u"{0}", 3)) { placeholderPosition = PH_BEGINNING; if (u_isJavaSpaceChar(pattern[3])) { joinerChar = pattern[3]; coreUnit.setTo(pattern, 4, len - 4); } else { coreUnit.setTo(pattern, 3, len - 3); } } else if (pattern.endsWith(u"{0}", 3)) { placeholderPosition = PH_END; if (u_isJavaSpaceChar(pattern[len - 4])) { coreUnit.setTo(pattern, 0, len - 4); joinerChar = pattern[len - 4]; } else { coreUnit.setTo(pattern, 0, len - 3); } } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) { placeholderPosition = PH_NONE; coreUnit = pattern; } else { placeholderPosition = PH_MIDDLE; coreUnit = pattern; } } ////////////////////////// /// BEGIN DATA LOADING /// ////////////////////////// // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty // string both in case of unknown gender and in case of unknown unit. UnicodeString getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return {}; } // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... // TODO(ICU-20400): Get duration-*-person data properly with aliases. StringPiece subtypeForResource; int32_t subtypeLen = static_cast(uprv_strlen(builtinUnit.getSubtype())); if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) { subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7}; } else { subtypeForResource = builtinUnit.getSubtype(); } CharString key; key.append("units/", status); key.append(builtinUnit.getType(), status); key.append("/", status); key.append(subtypeForResource, status); key.append("/gender", status); UErrorCode localStatus = status; int32_t resultLen = 0; const UChar *result = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus); if (U_SUCCESS(localStatus)) { status = localStatus; return UnicodeString(true, result, resultLen); } else { // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to // check whether the parent "$unitRes" exists? Then we could return // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not // being a builtin). return {}; } } // Loads data from a resource tree with paths matching // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases // and genders. // // An InflectedPluralSink is configured to load data for a specific gender and // case. It loads all plural forms, because selection between plural forms is // dependent upon the value being formatted. // // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at // units/compound/power2: German has case, French has differences for gender, // but no case. // // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the // tree structures are different. After homogenizing the structures, we may be // able to unify the two classes. // // TODO: Spec violation: expects presence of "count" - does not fallback to an // absent "count"! If this fallback were added, getCompoundValue could be // superseded? class InflectedPluralSink : public ResourceSink { public: // Accepts `char*` rather than StringPiece because // ResourceTable::findValue(...) requires a null-terminated `char*`. // // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds // checking is performed. explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray) : gender(gender), caseVariant(caseVariant), outArray(outArray) { // Initialize the array to bogus strings. for (int32_t i = 0; i < ARRAY_LENGTH; i++) { outArray[i].setToBogus(); } } // See ResourceSink::put(). void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { int32_t pluralIndex = getIndex(key, status); if (U_FAILURE(status)) { return; } if (!outArray[pluralIndex].isBogus()) { // We already have a pattern continue; } ResourceTable genderTable = value.getTable(status); ResourceTable caseTable; // This instance has to outlive `value` if (loadForPluralForm(genderTable, caseTable, value, status)) { outArray[pluralIndex] = value.getUnicodeString(status); } } } private: // Tries to load data for the configured gender from `genderTable`. Returns // true if found, returning the data in `value`. The returned data will be // for the configured gender if found, falling back to "neuter" and // no-gender if not. The caseTable parameter holds the intermediate // ResourceTable for the sake of lifetime management. bool loadForPluralForm(const ResourceTable &genderTable, ResourceTable &caseTable, ResourceValue &value, UErrorCode &status) { if (uprv_strcmp(gender, "") != 0) { if (loadForGender(genderTable, gender, caseTable, value, status)) { return true; } if (uprv_strcmp(gender, "neuter") != 0 && loadForGender(genderTable, "neuter", caseTable, value, status)) { return true; } } if (loadForGender(genderTable, "_", caseTable, value, status)) { return true; } return false; } // Tries to load data for the given gender from `genderTable`. Returns true // if found, returning the data in `value`. The returned data will be for // the configured case if found, falling back to "nominative" and no-case if // not. bool loadForGender(const ResourceTable &genderTable, const char *genderVal, ResourceTable &caseTable, ResourceValue &value, UErrorCode &status) { if (!genderTable.findValue(genderVal, value)) { return false; } caseTable = value.getTable(status); if (uprv_strcmp(caseVariant, "") != 0) { if (loadForCase(caseTable, caseVariant, value)) { return true; } if (uprv_strcmp(caseVariant, "nominative") != 0 && loadForCase(caseTable, "nominative", value)) { return true; } } if (loadForCase(caseTable, "_", value)) { return true; } return false; } // Tries to load data for the given case from `caseTable`. Returns true if // found, returning the data in `value`. bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) { if (!caseTable.findValue(caseValue, value)) { return false; } return true; } const char *gender; const char *caseVariant; UnicodeString *outArray; }; // Fetches localised formatting patterns for the given subKey. See documentation // for InflectedPluralSink for details. // // Data is loaded for the appropriate unit width, with missing data filled in // from unitsShort. void getInflectedMeasureData(StringPiece subKey, const Locale &locale, const UNumberUnitWidth &width, const char *gender, const char *caseVariant, UnicodeString *outArray, UErrorCode &status) { InflectedPluralSink sink(gender, caseVariant, outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return; } CharString key; key.append("units", status); if (width == UNUM_UNIT_WIDTH_NARROW) { key.append("Narrow", status); } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } key.append("/", status); key.append(subKey, status); UErrorCode localStatus = status; ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); if (width == UNUM_UNIT_WIDTH_SHORT) { status = localStatus; return; } // TODO(ICU-13353): The fallback to short does not work in ICU4C. // Manually fall back to short (this is done automatically in Java). key.clear(); key.append("unitsShort/", status); key.append(subKey, status); ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); } class PluralTableSink : public ResourceSink { public: // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds // checking is performed. explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { // Initialize the array to bogus strings. for (int32_t i = 0; i < ARRAY_LENGTH; i++) { outArray[i].setToBogus(); } } void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { if (uprv_strcmp(key, "case") == 0) { continue; } int32_t index = getIndex(key, status); if (U_FAILURE(status)) { return; } if (!outArray[index].isBogus()) { continue; } outArray[index] = value.getUnicodeString(status); if (U_FAILURE(status)) { return; } } } private: UnicodeString *outArray; }; /** * Populates outArray with `locale`-specific values for `unit` through use of * PluralTableSink. Only the set of basic units are supported! * * Reading from resources *unitsNarrow* and *unitsShort* (for width * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". * * @param unit must be a built-in unit, i.e. must have a type and subtype, * listed in gTypes and gSubTypes in measunit.cpp. * @param unitDisplayCase the empty string and "nominative" are treated the * same. For other cases, strings for the requested case are used if found. * (For any missing case-specific data, we fall back to nominative.) * @param outArray must be of fixed length ARRAY_LENGTH. */ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, const char *unitDisplayCase, UnicodeString *outArray, UErrorCode &status) { PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return; } CharString subKey; subKey.append("/", status); subKey.append(unit.getType(), status); subKey.append("/", status); // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... // TODO(ICU-20400): Get duration-*-person data properly with aliases. int32_t subtypeLen = static_cast(uprv_strlen(unit.getSubtype())); if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) { subKey.append({unit.getSubtype(), subtypeLen - 7}, status); } else { subKey.append({unit.getSubtype(), subtypeLen}, status); } if (width != UNUM_UNIT_WIDTH_FULL_NAME) { UErrorCode localStatus = status; CharString genderKey; genderKey.append("units", localStatus); genderKey.append(subKey, localStatus); genderKey.append("/gender", localStatus); StackUResourceBundle fillIn; ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(), &localStatus); outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus); } CharString key; key.append("units", status); if (width == UNUM_UNIT_WIDTH_NARROW) { key.append("Narrow", status); } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } key.append(subKey, status); // Grab desired case first, if available. Then grab no-case data to fill in // the gaps. if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) { CharString caseKey; caseKey.append(key, status); caseKey.append("/case/", status); caseKey.append(unitDisplayCase, status); UErrorCode localStatus = U_ZERO_ERROR; // TODO(icu-units#138): our fallback logic is not spec-compliant: // lateral fallback should happen before locale fallback. Switch to // getInflectedMeasureData after homogenizing data format? Find a unit // test case that demonstrates the incorrect fallback logic (via // regional variant of an inflected language?) ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); } // TODO(icu-units#138): our fallback logic is not spec-compliant: we // check the given case, then go straight to the no-case data. The spec // states we should first look for case="nominative". As part of #138, // either get the spec changed, or add unit tests that warn us if // case="nominative" data differs from no-case data? UErrorCode localStatus = U_ZERO_ERROR; ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); if (width == UNUM_UNIT_WIDTH_SHORT) { if (U_FAILURE(localStatus)) { status = localStatus; } return; } // TODO(ICU-13353): The fallback to short does not work in ICU4C. // Manually fall back to short (this is done automatically in Java). key.clear(); key.append("unitsShort", status); key.append(subKey, status); ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); } // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, UErrorCode &status) { // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); if (U_FAILURE(status)) { return; } ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { UnicodeString &pattern = outArray[i]; if (pattern.isBogus()) { continue; } int32_t longNameLen = 0; const char16_t *longName = ucurr_getPluralName( currency.getISOCurrency(), locale.getName(), nullptr /* isChoiceFormat */, StandardPlural::getKeyword(static_cast(i)), &longNameLen, &status); // Example pattern from data: "{0} {1}" // Example output after find-and-replace: "{0} US dollars" pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen)); } } UnicodeString getCompoundValue(StringPiece compoundKey, const Locale &locale, const UNumberUnitWidth &width, UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return {}; } CharString key; key.append("units", status); if (width == UNUM_UNIT_WIDTH_NARROW) { key.append("Narrow", status); } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } key.append("/compound/", status); key.append(compoundKey, status); UErrorCode localStatus = status; int32_t len = 0; const UChar *ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus); if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) { // Fall back to short, which contains more compound data key.clear(); key.append("unitsShort/compound/", status); key.append(compoundKey, status); ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); } else { status = localStatus; } if (U_FAILURE(status)) { return {}; } return UnicodeString(ptr, len); } /** * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. * * Consider a deriveComponent rule that looks like this: * * * * Instantiating an instance as follows: * * DerivedComponents d(loc, "case", "per"); * * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", * and `d.value1("foo")` will be "nominative". * * The values returned by value0(...) and value1(...) are valid only while the * instance exists. In case of any kind of failure, value0(...) and value1(...) * will return "". */ class DerivedComponents { public: /** * Constructor. * * The feature and structure parameters must be null-terminated. The string * referenced by compoundValue must exist for longer than the * DerivedComponents instance. */ DerivedComponents(const Locale &locale, const char *feature, const char *structure) { StackUResourceBundle derivationsBundle, stackBundle; ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), &status); ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); if (U_FAILURE(status)) { return; } UErrorCode localStatus = U_ZERO_ERROR; // TODO(icu-units#28): use standard normal locale resolution algorithms // rather than just grabbing language: ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &localStatus); // TODO(icu-units#28): // - code currently assumes if the locale exists, the rules are there - // instead of falling back to root when the requested rule is missing. // - investigate ures.h functions, see if one that uses res_findResource() // might be better (or use res_findResource directly), or maybe help // improve ures documentation to guide function selection? if (localStatus == U_MISSING_RESOURCE_ERROR) { ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); } else { status = localStatus; } ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); if (U_SUCCESS(status)) { if (val0.compare(UnicodeString(u"compound")) == 0) { compound0_ = true; } else { compound0_ = false; value0_.appendInvariantChars(val0, status); } if (val1.compare(UnicodeString(u"compound")) == 0) { compound1_ = true; } else { compound1_ = false; value1_.appendInvariantChars(val1, status); } } } // Returns a StringPiece that is only valid as long as the instance exists. StringPiece value0(const StringPiece compoundValue) const { return compound0_ ? compoundValue : value0_.toStringPiece(); } // Returns a StringPiece that is only valid as long as the instance exists. StringPiece value1(const StringPiece compoundValue) const { return compound1_ ? compoundValue : value1_.toStringPiece(); } // Returns a char* that is only valid as long as the instance exists. const char *value0(const char *compoundValue) const { return compound0_ ? compoundValue : value0_.data(); } // Returns a char* that is only valid as long as the instance exists. const char *value1(const char *compoundValue) const { return compound1_ ? compoundValue : value1_.data(); } private: UErrorCode status = U_ZERO_ERROR; // Holds strings referred to by value0 and value1; bool compound0_ = false, compound1_ = false; CharString value0_, value1_; }; // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding // testsuite support for testing with synthetic data? /** * Loads and returns the value in rules that look like these: * * * * * Currently a fake example, but spec compliant: * * * NOTE: If U_FAILURE(status), returns an empty string. */ UnicodeString getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { StackUResourceBundle derivationsBundle, stackBundle; ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), &status); ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); // TODO: use standard normal locale resolution algorithms rather than just grabbing language: ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); // TODO: // - code currently assumes if the locale exists, the rules are there - // instead of falling back to root when the requested rule is missing. // - investigate ures.h functions, see if one that uses res_findResource() // might be better (or use res_findResource directly), or maybe help // improve ures documentation to guide function selection? if (status == U_MISSING_RESOURCE_ERROR) { status = U_ZERO_ERROR; ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); } ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); if (U_FAILURE(status)) { return {}; } U_ASSERT(!uVal.isBogus()); return uVal; } // Returns the gender string for structures following these rules: // // // // // Fake example: // // // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that // correspond to value="0" and value="1". // // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. // "prefix" doesn't). UnicodeString getDerivedGender(Locale locale, const char *structure, UnicodeString *data0, UnicodeString *data1, UErrorCode &status) { UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status); if (val.length() == 1) { switch (val[0]) { case u'0': return data0[GENDER_INDEX]; case u'1': if (data1 == nullptr) { return {}; } return data1[GENDER_INDEX]; } } return val; } //////////////////////// /// END DATA LOADING /// //////////////////////// // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace const UChar *trimSpaceChars(const UChar *s, int32_t &length) { if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) { return s; } int32_t start = 0; int32_t limit = length; while (start < limit && u_isJavaSpaceChar(s[start])) { ++start; } if (start < limit) { // There is non-white space at start; we will not move limit below that, // so we need not test start 0); // Else it would not be COMPOUND if (mui.singleUnits[endSlice]->dimensionality < 0) { // We have a -per- construct UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status); if (perRule.length() != 1) { // Fixed gender for -per- units return perRule; } if (perRule[0] == u'1') { // Find the start of the denominator. We already know there is one. while (mui.singleUnits[startSlice]->dimensionality >= 0) { startSlice++; } } else { // Find the end of the numerator while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) { endSlice--; } if (endSlice < 0) { // We have only a denominator, e.g. "per-second". // TODO(icu-units#28): find out what gender to use in the // absence of a first value - mentioned in CLDR-14253. return {}; } } } if (endSlice > startSlice) { // We have a -times- construct UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status); if (timesRule.length() != 1) { // Fixed gender for -times- units return timesRule; } if (timesRule[0] == u'0') { endSlice = startSlice; } else { // We assume timesRule[0] == u'1' startSlice = endSlice; } } U_ASSERT(startSlice == endSlice); singleUnitIndex = startSlice; } else if (mui.complexity == UMEASURE_UNIT_MIXED) { status = U_INTERNAL_PROGRAM_ERROR; return {}; } else { U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE); U_ASSERT(mui.singleUnits.length() == 1); } // Now we know which singleUnit's gender we want const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex]; // Check for any power-prefix gender override: if (std::abs(singleUnit->dimensionality) != 1) { UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status); if (powerRule.length() != 1) { // Fixed gender for -powN- units return powerRule; } // powerRule[0] == u'0'; u'1' not currently in spec. } // Check for any SI and binary prefix gender override: if (std::abs(singleUnit->dimensionality) != 1) { UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status); if (prefixRule.length() != 1) { // Fixed gender for -powN- units return prefixRule; } // prefixRule[0] == u'0'; u'1' not currently in spec. } // Now we've boiled it down to the gender of one simple unit identifier: return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status), status); } void maybeCalculateGender(const Locale &locale, const MeasureUnit &unitRef, UnicodeString *outArray, UErrorCode &status) { if (outArray[GENDER_INDEX].isBogus()) { UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status); if (meterGender.isEmpty()) { // No gender for meter: assume ungendered language return; } // We have a gendered language, but are lacking gender for unitRef. outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status); } } } // namespace void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, LongNameHandler *fillIn, UErrorCode &status) { // From https://unicode.org/reports/tr35/tr35-general.html#compound-units - // Points 1 and 2 are mostly handled by MeasureUnit: // // 1. If the unitId is empty or invalid, fail // 2. Put the unitId into normalized order U_ASSERT(fillIn != nullptr); if (uprv_strcmp(unitRef.getType(), "") != 0) { // Handling built-in units: // // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant) // - If result is not empty, return it UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status); maybeCalculateGender(loc, unitRef, simpleFormats, status); if (U_FAILURE(status)) { return; } fillIn->rules = rules; fillIn->parent = parent; fillIn->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); if (!simpleFormats[GENDER_INDEX].isBogus()) { fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); } return; // TODO(icu-units#145): figure out why this causes a failure in // format/MeasureFormatTest/TestIndividualPluralFallback and other // tests, when it should have been an alternative for the lines above: // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); // fillIn->rules = rules; // fillIn->parent = parent; // return; } else { // Check if it is a MeasureUnit this constructor handles: this // constructor does not handle mixed units U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED); forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); fillIn->rules = rules; fillIn->parent = parent; return; } } void LongNameHandler::forArbitraryUnit(const Locale &loc, const MeasureUnit &unitRef, const UNumberUnitWidth &width, const char *unitDisplayCase, LongNameHandler *fillIn, UErrorCode &status) { if (U_FAILURE(status)) { return; } if (fillIn == nullptr) { status = U_INTERNAL_PROGRAM_ERROR; return; } // Numbered list items are from the algorithms at // https://unicode.org/reports/tr35/tr35-general.html#compound-units: // // 4. Divide the unitId into numerator (the part before the "-per-") and // denominator (the part after the "-per-). If both are empty, fail MeasureUnitImpl unit; MeasureUnitImpl perUnit; { MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) { SingleUnitImpl *subUnit = fullUnit.singleUnits[i]; if (subUnit->dimensionality > 0) { unit.appendSingleUnit(*subUnit, status); } else { subUnit->dimensionality *= -1; perUnit.appendSingleUnit(*subUnit, status); } } } // TODO(icu-units#28): check placeholder logic, see if it needs to be // present here instead of only in processPatternTimes: // // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty DerivedComponents derivedPerCases(loc, "case", "per"); // 6. numeratorUnitString UnicodeString numeratorUnitData[ARRAY_LENGTH]; processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase), numeratorUnitData, status); // 7. denominatorUnitString UnicodeString denominatorUnitData[ARRAY_LENGTH]; processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase), denominatorUnitData, status); // TODO(icu-units#139): // - implement DerivedComponents for "plural/times" and "plural/power": // French has different rules, we'll be producing the wrong results // currently. (Prove via tests!) // - implement DerivedComponents for "plural/per", "plural/prefix", // "case/times", "case/power", and "case/prefix" - although they're // currently hardcoded. Languages with different rules are surely on the // way. // // Currently we only use "case/per", "plural/times", "case/times", and // "case/power". // // This may have impact on multiSimpleFormatsToModifiers(...) below too? // These rules are currently (ICU 69) all the same and hard-coded below. UnicodeString perUnitPattern; if (!denominatorUnitData[PER_INDEX].isBogus()) { // If we have no denominator, we obtain the empty string: perUnitPattern = denominatorUnitData[PER_INDEX]; } else { // 8. Set perPattern to be getValue([per], locale, length) UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status); // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status); if (U_FAILURE(status)) { return; } // Plural and placeholder handling for 7. denominatorUnitString: // TODO(icu-units#139): hardcoded: // UnicodeString denominatorFormat = getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status); // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status); if (U_FAILURE(status)) { return; } UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments(); int32_t trimmedLen = denominatorPattern.length(); const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen); UnicodeString denominatorString(false, trimmed, trimmedLen); // 9. If the denominatorString is empty, set result to // [numeratorString], otherwise set result to format(perPattern, // numeratorString, denominatorString) // // TODO(icu-units#28): Why does UnicodeString need to be explicit in the // following line? perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status); if (U_FAILURE(status)) { return; } } if (perUnitPattern.length() == 0) { fillIn->simpleFormatsToModifiers(numeratorUnitData, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); } else { fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); } // Gender // // TODO(icu-units#28): find out what gender to use in the absence of a first // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253. // // gender/per deriveCompound rules don't say: // fillIn->gender = getGenderString( getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status); } void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, Locale loc, const UNumberUnitWidth &width, const char *caseVariant, UnicodeString *outArray, UErrorCode &status) { if (U_FAILURE(status)) { return; } if (productUnit.complexity == UMEASURE_UNIT_MIXED) { // These are handled by MixedUnitLongNameHandler status = U_UNSUPPORTED_ERROR; return; } #if U_DEBUG for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { U_ASSERT(outArray[pluralIndex].length() == 0); U_ASSERT(!outArray[pluralIndex].isBogus()); } #endif if (productUnit.identifier.isEmpty()) { // TODO(icu-units#28): consider when serialize should be called. // identifier might also be empty for MeasureUnit(). productUnit.serialize(status); } if (U_FAILURE(status)) { return; } if (productUnit.identifier.length() == 0) { // MeasureUnit(): no units: return empty strings. return; } MeasureUnit builtinUnit; if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) { // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it // breaks them all down. Do we want to drop this? // - findBySubType isn't super efficient, if we skip it and go to basic // singles, we don't have to construct MeasureUnit's anymore. // - Check all the existing unit tests that fail without this: is it due // to incorrect fallback via getMeasureData? // - Do those unit tests cover this code path representatively? if (builtinUnit != MeasureUnit()) { getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status); maybeCalculateGender(loc, builtinUnit, outArray, status); } return; } // 2. Set timesPattern to be getValue(times, locale, length) UnicodeString timesPattern = getCompoundValue("times", loc, width, status); SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status); if (U_FAILURE(status)) { return; } PlaceholderPosition globalPlaceholder[ARRAY_LENGTH]; UChar globalJoinerChar = 0; // Numbered list items are from the algorithms at // https://unicode.org/reports/tr35/tr35-general.html#compound-units: // // pattern(...) point 5: // - Set both globalPlaceholder and globalPlaceholderPosition to be empty // // 3. Set result to be empty for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { // Initial state: empty string pattern, via all falling back to OTHER: if (pluralIndex == StandardPlural::Form::OTHER) { outArray[pluralIndex].remove(); } else { outArray[pluralIndex].setToBogus(); } globalPlaceholder[pluralIndex] = PH_EMPTY; } // Empty string represents "compound" (propagate the plural form). const char *pluralCategory = ""; DerivedComponents derivedTimesPlurals(loc, "plural", "times"); DerivedComponents derivedTimesCases(loc, "case", "times"); DerivedComponents derivedPowerCases(loc, "case", "power"); // 4. For each single_unit in product_unit for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); singleUnitIndex++) { SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex]; const char *singlePluralCategory; const char *singleCaseVariant; // TODO(icu-units#28): ensure we have unit tests that change/fail if we // assign incorrect case variants here: if (singleUnitIndex < productUnit.singleUnits.length() - 1) { // 4.1. If hasMultiple singlePluralCategory = derivedTimesPlurals.value0(pluralCategory); singleCaseVariant = derivedTimesCases.value0(caseVariant); pluralCategory = derivedTimesPlurals.value1(pluralCategory); caseVariant = derivedTimesCases.value1(caseVariant); } else { singlePluralCategory = derivedTimesPlurals.value1(pluralCategory); singleCaseVariant = derivedTimesCases.value1(caseVariant); } // 4.2. Get the gender of that single_unit MeasureUnit simpleUnit; if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) { // Ideally all simple units should be known, but they're not: // 100-kilometer is internally treated as a simple unit, but it is // not a built-in unit and does not have formatting data in CLDR 39. // // TODO(icu-units#28): test (desirable) invariants in unit tests. status = U_UNSUPPORTED_ERROR; return; } const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status); // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-' U_ASSERT(singleUnit->dimensionality > 0); int32_t dimensionality = singleUnit->dimensionality; UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH]; if (dimensionality != 1) { // 4.3.1. set dimensionalityPrefixPattern to be // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender), // such as "{0} kwadratowym" CharString dimensionalityKey("compound/power", status); dimensionalityKey.appendNumber(dimensionality, status); getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender, singleCaseVariant, dimensionalityPrefixPatterns, status); if (U_FAILURE(status)) { // At the time of writing, only pow2 and pow3 are supported. // Attempting to format other powers results in a // U_RESOURCE_TYPE_MISMATCH. We convert the error if we // understand it: if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) { status = U_UNSUPPORTED_ERROR; } return; } // TODO(icu-units#139): // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory) // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant) singleCaseVariant = derivedPowerCases.value0(singleCaseVariant); // 4.3.4. remove the dimensionality_prefix from singleUnit singleUnit->dimensionality = 1; } // 4.4. if singleUnit starts with an si_prefix, such as 'centi' UMeasurePrefix prefix = singleUnit->unitPrefix; UnicodeString prefixPattern; if (prefix != UMEASURE_PREFIX_ONE) { // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale, // length), such as "centy{0}" CharString prefixKey; // prefixKey looks like "1024p3" or "10p-2": prefixKey.appendNumber(umeas_getPrefixBase(prefix), status); prefixKey.append('p', status); prefixKey.appendNumber(umeas_getPrefixPower(prefix), status); // Contains a pattern like "centy{0}". prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status); // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory) // // TODO(icu-units#139): that refers to these rules: // // though I'm not sure what other value they might end up having. // // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant) // // TODO(icu-units#139): that refers to: // but the prefix (value0) doesn't have case, the rest simply // propagates. // 4.4.4. remove the si_prefix from singleUnit singleUnit->unitPrefix = UMEASURE_PREFIX_ONE; } // 4.5. Set corePattern to be the getValue(singleUnit, locale, length, // singlePluralCategory, singleCaseVariant), such as "{0} metrem" UnicodeString singleUnitArray[ARRAY_LENGTH]; // At this point we are left with a Simple Unit: U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) == 0); getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray, status); if (U_FAILURE(status)) { // Shouldn't happen if we have data for all single units return; } // Calculate output gender if (!singleUnitArray[GENDER_INDEX].isBogus()) { U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty()); UnicodeString uVal; if (prefix != UMEASURE_PREFIX_ONE) { singleUnitArray[GENDER_INDEX] = getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status); } if (dimensionality != 1) { singleUnitArray[GENDER_INDEX] = getDerivedGender(loc, "power", singleUnitArray, nullptr, status); } UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status); if (timesGenderRule.length() == 1) { switch (timesGenderRule[0]) { case u'0': if (singleUnitIndex == 0) { U_ASSERT(outArray[GENDER_INDEX].isBogus()); outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; } break; case u'1': if (singleUnitIndex == productUnit.singleUnits.length() - 1) { U_ASSERT(outArray[GENDER_INDEX].isBogus()); outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; } } } else { if (outArray[GENDER_INDEX].isBogus()) { outArray[GENDER_INDEX] = timesGenderRule; } } } // Calculate resulting patterns for each plural form for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { StandardPlural::Form plural = static_cast(pluralIndex); // singleUnitArray[pluralIndex] looks something like "{0} Meter" if (outArray[pluralIndex].isBogus()) { if (singleUnitArray[pluralIndex].isBogus()) { // Let the usual plural fallback mechanism take care of this // plural form continue; } else { // Since our singleUnit can have a plural form that outArray // doesn't yet have (relying on fallback to OTHER), we start // by grabbing it with the normal plural fallback mechanism outArray[pluralIndex] = getWithPlural(outArray, plural, status); if (U_FAILURE(status)) { return; } } } if (uprv_strcmp(singlePluralCategory, "") != 0) { plural = static_cast(getIndex(singlePluralCategory, status)); } // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern. UnicodeString coreUnit; PlaceholderPosition placeholderPosition; UChar joinerChar; extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit, placeholderPosition, joinerChar); // 4.7 If the position is middle, then fail if (placeholderPosition == PH_MIDDLE) { status = U_UNSUPPORTED_ERROR; return; } // 4.8. If globalPlaceholder is empty if (globalPlaceholder[pluralIndex] == PH_EMPTY) { globalPlaceholder[pluralIndex] = placeholderPosition; globalJoinerChar = joinerChar; } else { // Expect all units involved to have the same placeholder position U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition); // TODO(icu-units#28): Do we want to add a unit test that checks // for consistent joiner chars? Probably not, given how // inconsistent they are. File a CLDR ticket with examples? } // Now coreUnit would be just "Meter" // 4.9. If siPrefixPattern is not empty if (prefix != UMEASURE_PREFIX_ONE) { SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status); if (U_FAILURE(status)) { return; } // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern, // coreUnit) UnicodeString tmp; // combineLowercasing(locale, length, prefixPattern, coreUnit) // // TODO(icu-units#28): run this only if prefixPattern does not // contain space characters - do languages "as", "bn", "hi", // "kk", etc have concepts of upper and lower case?: if (width == UNUM_UNIT_WIDTH_FULL_NAME) { coreUnit.toLower(loc); } prefixCompiled.format(coreUnit, tmp, status); if (U_FAILURE(status)) { return; } coreUnit = tmp; } // 4.10. If dimensionalityPrefixPattern is not empty if (dimensionality != 1) { SimpleFormatter dimensionalityCompiled( getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status); if (U_FAILURE(status)) { return; } // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length, // dimensionalityPrefixPattern, coreUnit) UnicodeString tmp; // combineLowercasing(locale, length, prefixPattern, coreUnit) // // TODO(icu-units#28): run this only if prefixPattern does not // contain space characters - do languages "as", "bn", "hi", // "kk", etc have concepts of upper and lower case?: if (width == UNUM_UNIT_WIDTH_FULL_NAME) { coreUnit.toLower(loc); } dimensionalityCompiled.format(coreUnit, tmp, status); if (U_FAILURE(status)) { return; } coreUnit = tmp; } if (outArray[pluralIndex].length() == 0) { // 4.11. If the result is empty, set result to be coreUnit outArray[pluralIndex] = coreUnit; } else { // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit) UnicodeString tmp; timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status); outArray[pluralIndex] = tmp; } } } for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { UnicodeString tmp; tmp.append(u"{0}", 3); if (globalJoinerChar != 0) { tmp.append(globalJoinerChar); } tmp.append(outArray[pluralIndex]); outArray[pluralIndex] = tmp; } else if (globalPlaceholder[pluralIndex] == PH_END) { if (globalJoinerChar != 0) { outArray[pluralIndex].append(globalJoinerChar); } outArray[pluralIndex].append(u"{0}", 3); } } } UnicodeString LongNameHandler::getUnitDisplayName( const Locale& loc, const MeasureUnit& unit, UNumberUnitWidth width, UErrorCode& status) { if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, "", simpleFormats, status); return simpleFormats[DNAM_INDEX]; } UnicodeString LongNameHandler::getUnitPattern( const Locale& loc, const MeasureUnit& unit, UNumberUnitWidth width, StandardPlural::Form pluralForm, UErrorCode& status) { if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, "", simpleFormats, status); // The above already handles fallback from other widths to short if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); } // Now handle fallback from other plural forms to OTHER return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]: simpleFormats[StandardPlural::Form::OTHER]; } LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { auto* result = new LongNameHandler(rules, parent); if (result == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; } UnicodeString simpleFormats[ARRAY_LENGTH]; getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return nullptr; } result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); // TODO(icu-units#28): currency gender? return result; } void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, UErrorCode &status) { for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { StandardPlural::Form plural = static_cast(i); UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); if (U_FAILURE(status)) { return; } SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); if (U_FAILURE(status)) { return; } fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural}); } } void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, Field field, UErrorCode &status) { SimpleFormatter trailCompiled(trailFormat, 1, 1, status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { StandardPlural::Form plural = static_cast(i); UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); if (U_FAILURE(status)) { return; } UnicodeString compoundFormat; if (leadFormat.length() == 0) { compoundFormat = trailFormat; } else { trailCompiled.format(leadFormat, compoundFormat, status); if (U_FAILURE(status)) { return; } } SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); if (U_FAILURE(status)) { return; } fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); } } void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { if (parent != NULL) { parent->processQuantity(quantity, micros, status); } StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); micros.modOuter = &fModifiers[pluralForm]; micros.gender = gender; } const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { return &fModifiers[plural]; } void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, MixedUnitLongNameHandler *fillIn, UErrorCode &status) { U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); U_ASSERT(fillIn != nullptr); if (U_FAILURE(status)) { return; } MeasureUnitImpl temp; const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); // Defensive, for production code: if (impl.complexity != UMEASURE_UNIT_MIXED) { // Should be using the normal LongNameHandler status = U_UNSUPPORTED_ERROR; return; } fillIn->fMixedUnitCount = impl.singleUnits.length(); fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { // Grab data for each of the components. UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this // propagation of unitDisplayCase is correct: getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, status); // TODO(ICU-21494): if we add support for gender for mixed units, we may // need maybeCalculateGender() here. } // TODO(icu-units#120): Make sure ICU doesn't output zero-valued // high-magnitude fields // * for mixed units count N, produce N listFormatters, one for each subset // that might be formatted. UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; if (width == UNUM_UNIT_WIDTH_NARROW) { listWidth = ULISTFMT_WIDTH_NARROW; } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) { // This might be the same as SHORT in most languages: listWidth = ULISTFMT_WIDTH_WIDE; } fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); // TODO(ICU-21494): grab gender of each unit, calculate the gender // associated with this list formatter, save it for later. fillIn->rules = rules; fillIn->parent = parent; // We need a localised NumberFormatter for the numbers of the bigger units // (providing Arabic numerals, for example). fillIn->fNumberFormatter = NumberFormatter::withLocale(loc); } void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { U_ASSERT(fMixedUnitCount > 1); if (parent != nullptr) { parent->processQuantity(quantity, micros, status); } micros.modOuter = getMixedUnitModifier(quantity, micros, status); } const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { if (micros.mixedMeasuresCount == 0) { U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value status = U_UNSUPPORTED_ERROR; return µs.helpers.emptyWeakModifier; } // Algorithm: // // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should // find "3 yard" and "1 foot" in micros.mixedMeasures. // // Obtain long-names with plural forms corresponding to measure values: // * {0} yards, {0} foot, {0} inches // // Format the integer values appropriately and modify with the format // strings: // - 3 yards, 1 foot // // Use ListFormatter to combine, with one placeholder: // - 3 yards, 1 foot and {0} inches // // Return a SimpleModifier for this pattern, letting the rest of the // pipeline take care of the remaining inches. LocalArray outputMeasuresList(new UnicodeString[fMixedUnitCount], status); if (U_FAILURE(status)) { return µs.helpers.emptyWeakModifier; } StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER; for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) { DecimalQuantity fdec; // If numbers are negative, only the first number needs to have its // negative sign formatted. int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i]; if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity` // If quantity is not the first value and quantity is negative if (micros.indexOfQuantity > 0 && quantity.isNegative()) { quantity.negate(); } StandardPlural::Form quantityPlural = utils::getPluralSafe(micros.rounder, rules, quantity, status); UnicodeString quantityFormatWithPlural = getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status); SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status); quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status); } else { fdec.setToLong(number); StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); UnicodeString simpleFormat = getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); UnicodeString num; auto appendable = UnicodeStringAppendable(num); fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); compiledFormatter.format(num, outputMeasuresList[i], status); } } // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we // can set micros.gender to the gender associated with the list formatter in // use below (once we have correct support for that). And then document this // appropriately? "getMixedUnitModifier" doesn't sound like it would do // something like this. // Combine list into a "premixed" pattern UnicodeString premixedFormatPattern; fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern, status); SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status); if (U_FAILURE(status)) { return µs.helpers.emptyWeakModifier; } micros.helpers.mixedUnitModifier = SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural}); return µs.helpers.mixedUnitModifier; } const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form /*plural*/) const { // TODO(icu-units#28): investigate this method when investigating where // ModifierStore::getModifier() gets used. To be sure it remains // unreachable: UPRV_UNREACHABLE; return nullptr; } LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector &units, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { LocalPointer result(new LongNameMultiplexer(parent), status); if (U_FAILURE(status)) { return nullptr; } U_ASSERT(units.length() > 0); if (result->fHandlers.resize(units.length()) == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; } result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); for (int32_t i = 0, length = units.length(); i < length; i++) { const MeasureUnit &unit = *units[i]; result->fMeasureUnits[i] = unit; if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, mlnh, status); result->fHandlers[i] = mlnh; } else { LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status); result->fHandlers[i] = lnh; } if (U_FAILURE(status)) { return nullptr; } } return result.orphan(); } void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { // We call parent->processQuantity() from the Multiplexer, instead of // letting LongNameHandler handle it: we don't know which LongNameHandler to // call until we've called the parent! fParent->processQuantity(quantity, micros, status); // Call the correct LongNameHandler based on outputUnit for (int i = 0; i < fHandlers.getCapacity(); i++) { if (fMeasureUnits[i] == micros.outputUnit) { fHandlers[i]->processQuantity(quantity, micros, status); return; } } if (U_FAILURE(status)) { return; } // We shouldn't receive any outputUnit for which we haven't already got a // LongNameHandler: status = U_INTERNAL_PROGRAM_ERROR; } #endif /* #if !UCONFIG_NO_FORMATTING */