/* Copyright (c) 2004, 2024, Oracle and/or its affiliates. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License, version 2.0, as published by the Free Software Foundation. This library is designed to work with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the library and your derivative works with the separately licensed software that they have either included with the library or referenced in the documentation. Without limiting anything contained in the foregoing, this file, which is part of C Driver for MySQL (Connector/C), is also subject to the Universal FOSS Exception, version 1.0, a copy of which can be found at http://oss.oracle.com/licenses/universal-foss-exception. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License, version 2.0, for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* UCA (Unicode Collation Algorithm) support. Features that are not implemented yet: - No Normalization From D is done + No decomposition is done + No Thai/Lao orderding is done - No combining marks processing is done */ #include #include #include #include #include #include #include #include #include #include "m_string.h" #include "my_byteorder.h" #include "my_compiler.h" #include "mysql/my_loglevel.h" #include "mysql/strings/m_ctype.h" #include "mysys_err.h" #include "string_with_len.h" #include "strings/m_ctype_internals.h" #include "strings/mb_wc.h" #include "strings/str_uca_type.h" #include "strings/uca900_data.h" #include "strings/uca900_ja_data.h" #include "strings/uca900_zh_data.h" #include "strings/uca_data.h" #include "template_utils.h" MY_UCA_INFO my_uca_v400 = { UCA_V400, /* version */ nullptr, /* in case we clone an instance */ 0xFFFF, /* maxchar */ uca_length, /* lengths */ nullptr, /* m_allocated_weights */ uca_weight, /* weights */ false, /* have_contractions */ nullptr, /* contraction_nodes */ nullptr, /* contraction_flags */ /* Logical positions */ 0x0009, /* first_non_ignorable p != ignore */ 0xA48C, /* last_non_ignorable Not a CJK and not UNASSIGNED */ 0x0332, /* first_primary_ignorable p == 0 */ 0x20EA, /* last_primary_ignorable */ 0x0000, /* first_secondary_ignorable p,s == 0 */ 0xFE73, /* last_secondary_ignorable p,s == 0 */ 0x0000, /* first_tertiary_ignorable p,s,t == 0 */ 0xFE73, /* last_tertiary_ignorable p,s,t == 0 */ 0x0000, /* first_trailing */ 0x0000, /* last_trailing */ 0x0009, /* first_variable */ 0x2183, /* last_variable */ 0, /* extra_ce_pri_base, not used */ 0, /* extra_ce_sec_base, not used */ 0 /* extra_ce_ter_base, not used */ }; /******************************************************/ MY_UCA_INFO my_uca_v520 = { UCA_V520, /* version */ nullptr, /* in case we clone an instance */ 0x10FFFF, /* maxchar */ uca520_length, nullptr, /* m_allocated_weights */ uca520_weight, false, nullptr, /* contractions */ nullptr, 0x0009, /* first_non_ignorable p != ignore */ 0x1342E, /* last_non_ignorable Not a CJK and not UASSIGNED */ 0x0332, /* first_primary_ignorable p == ignore */ 0x101FD, /* last_primary_ignorable */ 0x0000, /* first_secondary_ignorable p,s= ignore */ 0xFE73, /* last_secondary_ignorable */ 0x0000, /* first_tertiary_ignorable p,s,t == ignore */ 0xFE73, /* last_tertiary_ignorable */ 0x0000, /* first_trailing */ 0x0000, /* last_trailing */ 0x0009, /* first_variable if alt=non-ignorable: p != ignore */ 0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */ 0, /* extra_ce_pri_base, not used */ 0, /* extra_ce_sec_base, not used */ 0 /* extra_ce_ter_base, not used */ }; /******************************************************/ /* German Phonebook */ static const char german2[] = "&AE << \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " "&OE << \\u0153 <<< \\u0152 << \\u00F6 <<< \\u00D6 " "&UE << \\u00FC <<< \\u00DC "; /* Some sources treat LETTER A WITH DIAERESIS (00E4,00C4) secondary greater than LETTER AE (00E6,00C6). http://www.evertype.com/alphabets/icelandic.pdf http://developer.mimer.com/collations/charts/icelandic.htm Other sources do not provide any special rules for LETTER A WITH DIAERESIS: http://www.omniglot.com/writing/icelandic.htm http://en.wikipedia.org/wiki/Icelandic_alphabet http://oss.software.ibm.com/icu/charts/collation/is.html Let's go the first way. */ static const char icelandic[] = "& A < \\u00E1 <<< \\u00C1 " "& D < \\u00F0 <<< \\u00D0 " "& E < \\u00E9 <<< \\u00C9 " "& I < \\u00ED <<< \\u00CD " "& O < \\u00F3 <<< \\u00D3 " "& U < \\u00FA <<< \\u00DA " "& Y < \\u00FD <<< \\u00DD " "& Z < \\u00FE <<< \\u00DE " "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " "< \\u00E5 <<< \\u00C5 "; /* Some sources treat I and Y primary different. Other sources treat I and Y the same on primary level. We'll go the first way. */ static const char latvian[] = "& C < \\u010D <<< \\u010C " "& G < \\u0123 <<< \\u0122 " "& I < \\u0079 <<< \\u0059 " "& K < \\u0137 <<< \\u0136 " "& L < \\u013C <<< \\u013B " "& N < \\u0146 <<< \\u0145 " "& R < \\u0157 <<< \\u0156 " "& S < \\u0161 <<< \\u0160 " "& Z < \\u017E <<< \\u017D "; static const char romanian[] = "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " "& I < \\u00EE <<< \\u00CE " "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E " "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 "; static const char slovenian[] = "& C < \\u010D <<< \\u010C " "& S < \\u0161 <<< \\u0160 " "& Z < \\u017E <<< \\u017D "; static const char polish[] = "& A < \\u0105 <<< \\u0104 " "& C < \\u0107 <<< \\u0106 " "& E < \\u0119 <<< \\u0118 " "& L < \\u0142 <<< \\u0141 " "& N < \\u0144 <<< \\u0143 " "& O < \\u00F3 <<< \\u00D3 " "& S < \\u015B <<< \\u015A " "& Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B"; static const char estonian[] = "& S < \\u0161 <<< \\u0160 " " < \\u007A <<< \\u005A " " < \\u017E <<< \\u017D " "& W < \\u00F5 <<< \\u00D5 " "< \\u00E4 <<< \\u00C4 " "< \\u00F6 <<< \\u00D6 " "< \\u00FC <<< \\u00DC "; // Standard Spanish, also for Galician. static const char spanish[] = "& N < \\u00F1 <<< \\u00D1 "; /* Some sources treat V and W as similar on primary level. We'll treat V and W as different on primary level. */ static const char swedish[] = "& Y <<\\u00FC <<< \\u00DC " "& Z < \\u00E5 <<< \\u00C5 " "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 " "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "; static const char turkish[] = "& C < \\u00E7 <<< \\u00C7 " "& G < \\u011F <<< \\u011E " "& H < \\u0131 <<< \\u0049 " "& O < \\u00F6 <<< \\u00D6 " "& S < \\u015F <<< \\u015E " "& U < \\u00FC <<< \\u00DC "; static const char czech[] = "& C < \\u010D <<< \\u010C " "& H < ch <<< Ch <<< CH" "& R < \\u0159 <<< \\u0158" "& S < \\u0161 <<< \\u0160" "& Z < \\u017E <<< \\u017D"; static const char danish[] = /* Also good for Norwegian */ "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170" "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4" " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150" " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA"; static const char lithuanian[] = "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C" "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116" "& I << y <<< Y" "& S < \\u0161 <<< \\u0160" "& Z < \\u017E <<< \\u017D"; static const char slovak[] = "& A < \\u00E4 <<< \\u00C4" "& C < \\u010D <<< \\u010C" "& H < ch <<< Ch <<< CH" "& O < \\u00F4 <<< \\u00D4" "& S < \\u0161 <<< \\u0160" "& Z < \\u017E <<< \\u017D"; static const char spanish2[] = /* Also good for Asturian and Galician */ "&C < ch <<< Ch <<< CH" "&L < ll <<< Ll <<< LL" "&N < \\u00F1 <<< \\u00D1"; static const char roman[] = /* i.e. Classical Latin */ "& I << j <<< J " "& V << u <<< U "; /* Persian collation support was provided by Jody McIntyre To: internals@lists.mysql.com Subject: Persian UTF8 collation support Date: 17.08.2004 Contraction is not implemented. Some implementations do perform contraction but others do not, and it is able to sort all my test strings correctly. Jody. */ static const char persian[] = "& \\u066D < \\u064E < \\uFE76 < \\uFE77 < \\u0650 < \\uFE7A < \\uFE7B" " < \\u064F < \\uFE78 < \\uFE79 < \\u064B < \\uFE70 < \\uFE71" " < \\u064D < \\uFE74 < \\u064C < \\uFE72" "& \\uFE7F < \\u0653 < \\u0654 < \\u0655 < \\u0670" "& \\u0669 < \\u0622 < \\u0627 < \\u0671 < \\u0621 < \\u0623 < \\u0625" " < \\u0624 < \\u0626" "& \\u0642 < \\u06A9 < \\u0643" "& \\u0648 < \\u0647 < \\u0629 < \\u06C0 < \\u06CC < \\u0649 < \\u064A" "& \\uFE80 < \\uFE81 < \\uFE82 < \\uFE8D < \\uFE8E < \\uFB50 < \\uFB51" " < \\uFE80 " /* FE80 appears both in reset and shift. We need to break the rule here and reset to *new* FE80 again, so weight for FE83 is calculated as P[FE80]+1, not as P[FE80]+8. */ " & \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85" " < \\uFE86 < \\u0689 < \\u068A" "& \\uFEAE < \\uFDFC" "& \\uFED8 < \\uFB8E < \\uFB8F < \\uFB90 < \\uFB91 < \\uFED9 < \\uFEDA" " < \\uFEDB < \\uFEDC" "& \\uFEEE < \\uFEE9 < \\uFEEA < \\uFEEB < \\uFEEC < \\uFE93 < \\uFE94" " < \\uFBA4 < \\uFBA5 < \\uFBFC < \\uFBFD < \\uFBFE < \\uFBFF" " < \\uFEEF < \\uFEF0 < \\uFEF1 < \\uFEF2 < \\uFEF3 < \\uFEF4" " < \\uFEF5 < \\uFEF6 < \\uFEF7 < \\uFEF8 < \\uFEF9 < \\uFEFA" " < \\uFEFB < \\uFEFC"; /* Esperanto tailoring. Contributed by Bertilo Wennergren September 1, 2005 */ static const char esperanto[] = "& C < \\u0109 <<< \\u0108" "& G < \\u011D <<< \\u011C" "& H < \\u0125 <<< \\u0124" "& J < \\u0135 <<< \\u0134" "& S < \\u015d <<< \\u015c" "& U < \\u016d <<< \\u016c"; /* A simplified version of Hungarian, without consonant contractions. */ static const char hungarian[] = "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150" "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"; static const char croatian[] = "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106" "&D < d\\u017E = \\u01C6 <<< d\\u017D <<< D\\u017E = \\u01C5 <<< D\\u017D " "= \\u01C4" " < \\u0111 <<< \\u0110" "&L < lj = \\u01C9 <<< lJ <<< Lj = \\u01C8 <<< LJ = \\u01C7" "&N < nj = \\u01CC <<< nJ <<< Nj = \\u01CB <<< NJ = \\u01CA" "&S < \\u0161 <<< \\u0160" "&Z < \\u017E <<< \\u017D"; /* SCCII Part 1 : Collation Sequence (SLS1134) 2006/11/24 Harshula Jayasuriya Language Technology Research Lab, University of Colombo / ICTA */ #if 0 static const char sinhala[]= "& \\u0D96 < \\u0D82 < \\u0D83" "& \\u0DA5 < \\u0DA4" "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3" "& \\u0DDE < \\u0DCA"; #else static const char sinhala[] = "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D" "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3" "< \\u0DA5 < \\u0DA4 < \\u0DA6" "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC" "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1" "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8" "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1" "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6" "< \\u0DCF" "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6" "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA" "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA"; #endif static const char vietnamese[] = " &A << \\u00E0 <<< \\u00C0" /* A */ " << \\u1EA3 <<< \\u1EA2" " << \\u00E3 <<< \\u00C3" " << \\u00E1 <<< \\u00C1" " << \\u1EA1 <<< \\u1EA0" " < \\u0103 <<< \\u0102" /* A WITH BREVE */ " << \\u1EB1 <<< \\u1EB0" " << \\u1EB3 <<< \\u1EB2" " << \\u1EB5 <<< \\u1EB4" " << \\u1EAF <<< \\u1EAE" " << \\u1EB7 <<< \\u1EB6" " < \\u00E2 <<< \\u00C2" /* A WITH CIRCUMFLEX */ " << \\u1EA7 <<< \\u1EA6" " << \\u1EA9 <<< \\u1EA8" " << \\u1EAB <<< \\u1EAA" " << \\u1EA5 <<< \\u1EA4" " << \\u1EAD <<< \\u1EAC" " &D < \\u0111 <<< \\u0110" /* D WITH STROKE */ " &E << \\u00E8 <<< \\u00C8" /* E */ " << \\u1EBB <<< \\u1EBA" " << \\u1EBD <<< \\u1EBC" " << \\u00E9 <<< \\u00C9" " << \\u1EB9 <<< \\u1EB8" " < \\u00EA <<< \\u00CA" /* E WITH CIRCUMFLEX */ " << \\u1EC1 <<< \\u1EC0" " << \\u1EC3 <<< \\u1EC2" " << \\u1EC5 <<< \\u1EC4" " << \\u1EBF <<< \\u1EBE" " << \\u1EC7 <<< \\u1EC6" " &I << \\u00EC <<< \\u00CC" /* I */ " << \\u1EC9 <<< \\u1EC8" " << \\u0129 <<< \\u0128" " << \\u00ED <<< \\u00CD" " << \\u1ECB <<< \\u1ECA" " &O << \\u00F2 <<< \\u00D2" /* O */ " << \\u1ECF <<< \\u1ECE" " << \\u00F5 <<< \\u00D5" " << \\u00F3 <<< \\u00D3" " << \\u1ECD <<< \\u1ECC" " < \\u00F4 <<< \\u00D4" /* O WITH CIRCUMFLEX */ " << \\u1ED3 <<< \\u1ED2" " << \\u1ED5 <<< \\u1ED4" " << \\u1ED7 <<< \\u1ED6" " << \\u1ED1 <<< \\u1ED0" " << \\u1ED9 <<< \\u1ED8" " < \\u01A1 <<< \\u01A0" /* O WITH HORN */ " << \\u1EDD <<< \\u1EDC" " << \\u1EDF <<< \\u1EDE" " << \\u1EE1 <<< \\u1EE0" " << \\u1EDB <<< \\u1EDA" " << \\u1EE3 <<< \\u1EE2" " &U << \\u00F9 <<< \\u00D9" /* U */ " << \\u1EE7 <<< \\u1EE6" " << \\u0169 <<< \\u0168" " << \\u00FA <<< \\u00DA" " << \\u1EE5 <<< \\u1EE4" " < \\u01B0 <<< \\u01AF" /* U WITH HORN */ " << \\u1EEB <<< \\u1EEA" " << \\u1EED <<< \\u1EEC" " << \\u1EEF <<< \\u1EEE" " << \\u1EE9 <<< \\u1EE8" " << \\u1EF1 <<< \\u1EF0" " &Y << \\u1EF3 <<< \\u1EF2" /* Y */ " << \\u1EF7 <<< \\u1EF6" " << \\u1EF9 <<< \\u1EF8" " << \\u00FD <<< \\u00DD" " << \\u1EF5 <<< \\u1EF4"; /* German Phonebook */ static const char de_pb_cldr_30[] = "&AE << \\u00E4 <<< \\u00C4 " "&OE << \\u00F6 <<< \\u00D6 " "&UE << \\u00FC <<< \\u00DC "; /* Icelandic */ static const char is_cldr_30[] = "&[before 1]b < \\u00E1 <<< \\u00C1 " "& d << \\u0111 <<< \\u0110 < \\u00F0 <<< \\u00D0 " "&[before 1]f < \\u00E9 <<< \\u00C9 " "&[before 1]j < \\u00ED <<< \\u00CD " "&[before 1]p < \\u00F3 <<< \\u00D3 " "&[before 1]v < \\u00FA <<< \\u00DA " "&[before 1]z < \\u00FD <<< \\u00DD " "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " "< \\u00E5 <<< \\u00C5"; /* Latvian */ static const char lv_cldr_30[] = "&[before 1]D < \\u010D <<< \\u010C " "&[before 1]H < \\u0123 <<< \\u0122 " "& I << y <<< Y " "&[before 1]L < \\u0137 <<< \\u0136 " "&[before 1]M < \\u013C <<< \\u013B " "&[before 1]O < \\u0146 <<< \\u0145 " "&[before 1]S < \\u0157 <<< \\u0156 " "&[before 1]T < \\u0161 <<< \\u0160 " "&[before 1]\\u01B7 < \\u017E <<< \\u017D"; /* Romanian */ static const char ro_cldr_30[] = "&A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " "&I < \\u00EE <<< \\u00CE " "&S < \\u015F = \\u0219 <<< \\u015E = \\u0218 " "&T < \\u0163 = \\u021B <<< \\u0162 = \\u021A"; /* Slovenian */ static const char sl_cldr_30[] = "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 " "&D < \\u0111 <<< \\u0110 " "&S < \\u0161 <<< \\u0160 " "&Z < \\u017E <<< \\u017D"; /* Polish */ static const char pl_cldr_30[] = "&A < \\u0105 <<< \\u0104 " "&C < \\u0107 <<< \\u0106 " "&E < \\u0119 <<< \\u0118 " "&L < \\u0142 <<< \\u0141 " "&N < \\u0144 <<< \\u0143 " "&O < \\u00F3 <<< \\u00D3 " "&S < \\u015B <<< \\u015A " "&Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B"; /* Estonian */ static const char et_cldr_30[] = "&[before 1]T < \\u0161 <<< \\u0160 < z <<< Z " "< \\u017E <<< \\u017D " "&[before 1]X < \\u00F5 <<< \\u00D5 < \\u00E4 <<< \\u00C4 " "< \\u00F6 <<< \\u00D6 < \\u00FC <<< \\u00DC"; /* Swedish */ static const char sv_cldr_30[] = "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 " "& t <<< \\u00FE/h " "& T <<< \\u00DE/H " "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " "&[before 1]\\u01C0 < \\u00E5 <<< \\u00C5 < \\u00E4 <<< \\u00C4 " "<< \\u00E6 <<< \\u00C6 << \\u0119 <<< \\u0118 " "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 " "<< \\u00F4 <<< \\u00D4"; /* Turkish */ static const char tr_cldr_30[] = "& C < \\u00E7 <<< \\u00C7 " "& G < \\u011F <<< \\u011E " "&[before 1]i < \\u0131 <<< I " "& i <<< \\u0130 " "& O < \\u00F6 <<< \\u00D6 " "& S < \\u015F <<< \\u015E " "& U < \\u00FC <<< \\u00DC "; /* Czech */ static const char cs_cldr_30[] = "&C < \\u010D <<< \\u010C " "&H < ch <<< cH <<< Ch <<< CH " "&R < \\u0159 <<< \\u0158" "&S < \\u0161 <<< \\u0160" "&Z < \\u017E <<< \\u017D"; /* Danish, same for Norwegian */ static const char da_cldr_30[] = "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 " "& t <<< \\u00FE/h " "& T <<< \\u00DE/H " "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " "< \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 " "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 " "< \\u00E5 <<< \\u00C5 <<< aa <<< Aa " "<<< AA"; static Coll_param da_coll_param = {nullptr, false, CASE_FIRST_UPPER}; /* CASE FIRST OFF for Norwegian */ static Coll_param no_coll_param = {nullptr, false, CASE_FIRST_OFF}; /* Lithuanian */ static const char lt_cldr_30[] = "&\\u0300 = \\u0307\\u0300 " "&\\u0301 = \\u0307\\u0301 " "&\\u0303 = \\u0307\\u0303 " "&A << \\u0105 <<< \\u0104 " "&C < \\u010D <<< \\u010C " "&E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116" "&I << \\u012F <<< \\u012E << y <<< Y " "&S < \\u0161 <<< \\u0160 " "&U << \\u0173 <<< \\u0172 << \\u016B <<< \\u016A " "&Z < \\u017E <<< \\u017D"; /* Slovak */ static const char sk_cldr_30[] = "&A < \\u00E4 <<< \\u00C4 " "&C < \\u010D <<< \\u010C " "&H < ch <<< cH <<< Ch <<< CH " "&O < \\u00F4 <<< \\u00D4 " "&R < \\u0159 <<< \\u0158 " "&S < \\u0161 <<< \\u0160 " "&Z < \\u017E <<< \\u017D"; /* Spanish (Traditional) */ static const char es_trad_cldr_30[] = "&N < \\u00F1 <<< \\u00D1 " "&C < ch <<< Ch <<< CH " "&l < ll <<< Ll <<< LL"; /* Persian */ #if 0 static const char fa_cldr_30[]= "& \\u064E << \\u0650 << \\u064F << \\u064B << \\u064D " "<< \\u064C " "&[before 1]\\u0627 < \\u0622 " "& \\u0627 << \\u0671 < \\u0621 << \\u0623 << \\u0672 " "<< \\u0625 << \\u0673 << \\u0624 << \\u06CC\\u0654 " "<<< \\u0649\\u0654 <<< \\u0626 " "& \\u06A9 << \\u06AA << \\u06AB << \\u0643 << \\u06AC " "<< \\u06AD << \\u06AE " "& \\u06CF < \\u0647 << \\u06D5 << \\u06C1 << \\u0629 " "<< \\u06C3 << \\u06C0 << \\u06BE " "& \\u06CC << \\u0649 << \\u06D2 << \\u064A << \\u06D0 " "<< \\u06D1 << \\u06CD << \\u06CE"; static Reorder_param fa_reorder_param= { {CHARGRP_ARAB, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0 }; static Coll_param fa_coll_param= { &fa_reorder_param, true }; #endif /* Hungarian */ static const char hu_cldr_30[] = "&C < cs <<< Cs <<< CS " "&D < dz <<< Dz <<< DZ " "&DZ < dzs <<< Dzs <<< DZS " "&G < gy <<< Gy <<< GY " "&L < ly <<< Ly <<< LY " "&N < ny <<< Ny <<< NY " "&S < sz <<< Sz <<< SZ " "&T < ty <<< Ty <<< TY " "&Z < zs <<< Zs <<< ZS " "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150 " "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " "&cs <<< ccs/cs " "&Cs <<< Ccs/cs " "&CS <<< CCS/CS " "&dz <<< ddz/dz " "&Dz <<< Ddz/dz " "&DZ <<< DDZ/DZ " "&dzs<<< ddzs/dzs " "&Dzs<<< Ddzs/dzs " "&DZS<<< DDZS/DZS " "&gy <<< ggy/gy " "&Gy <<< Ggy/gy " "&GY <<< GGY/GY " "&ly <<< lly/ly " "&Ly <<< Lly/ly " "&LY <<< LLY/LY " "&ny <<< nny/ny " "&Ny <<< Nny/ny " "&NY <<< NNY/NY " "&sz <<< ssz/sz " "&Sz <<< Ssz/sz " "&SZ <<< SSZ/SZ " "&ty <<< tty/ty " "&Ty <<< Tty/ty " "&TY <<< TTY/TY " "&zs <<< zzs/zs " "&Zs <<< Zzs/zs " "&ZS <<< ZZS/ZS"; /* Croatian, same for Serbian with Latin and Bosnian. */ static const char hr_cldr_30[] = "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 " "&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D " "<<< \\u01C4 < \\u0111 <<< \\u0110 " "&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ " "<<< \\u01C7 " "&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ " "<<< \\u01CA " "&S < \\u0161 <<< \\u0160 " "&Z < \\u017E <<< \\u017D "; static Reorder_param hr_reorder_param = { {CHARGRP_LATIN, CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0}; static Coll_param hr_coll_param = {&hr_reorder_param, false, CASE_FIRST_OFF}; /* Sinhala */ #if 0 static const char si_cldr_30[]= "&\\u0D96 < \\u0D82 < \\u0D83 " "&\\u0DA5 < \\u0DA4"; #endif /* Vietnamese */ static const char vi_cldr_30[] = "&\\u0300 << \\u0309 << \\u0303 << \\u0301 << \\u0323 " "&a < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " "&d < \\u0111 <<< \\u0110 " "&e < \\u00EA <<< \\u00CA " "&o < \\u00F4 <<< \\u00D4 < \\u01A1 <<< \\u01A0 " "&u < \\u01B0 <<< \\u01AF"; static Coll_param vi_coll_param = {nullptr, true, CASE_FIRST_OFF}; static Reorder_param ja_reorder_param = { /* Per CLDR 30, Japanese reorder rule is defined as [Latn Kana Hani], but for Hani characters, their weight is implicit according to UCA, which is different from other character groups. We don't add "Hani" below and will have special handling for them in adjust_japanese_weight() and apply_reorder_param(). Implicit weight has two collation elements. To make strnxfrm() run faster, we give Japanese Han characters tailored weight which has only one collation element. These characters' weight is defined in ja_han_pages. */ {CHARGRP_LATIN, CHARGRP_KANA, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0}; static Coll_param ja_coll_param = {&ja_reorder_param, false /*norm_enabled*/, CASE_FIRST_OFF}; /* The Chinese reorder rule is defined as [Hani]. This means all Han characters' weight should be greater than the core group and smaller than any other character groups. The Han characters are separated into two parts. The CLDR collation definition file, zh.xml, defines 41336 Han characters' order, and all other Han characters have implicit weight. Since the core group characters occupy the weight value from 0x0209 to 0x1C46 in DUCET, so we decide to set the weight of all Han characters defined in zh.xml to be the value from 0x1C47 to 0xBDBE. The smallest weight value of these Han characters, 0x1C47, being the largest weight value of the core group plus one (0x1C46 + 1), ensures these Han characters sort greater than the core group characters. Also, we set the implicit weight to the Han characters like [BDBF - BDC3, 0020, 0002][XXXX, 0000, 0000]. To tailor the weight of characters of Latin, Cyrillic and so on to be bigger than all Han characters, we give these characters weights from 0xBDC4 to 0xF620. There are many character groups between the core group and the Han group, so it would be a long list if we put them in the following reorder_grp structure. But since it is a very simple weight shift, we put their calculated weight here and do not calculate it in my_prepare_reorder(). NOTE: We use the zh.xml file from CLDR v33.1 to implement this Chinese collation, because we found that the file of CLDR v30 is missing some very common Han characters (the Han character 'small', etc). */ static Reorder_param zh_reorder_param = { {CHARGRP_NONE}, {{{0x1C47, 0x54A3}, {0xBDC4, 0xF620}}}, 1, 0x54A3}; static Coll_param zh_coll_param = {&zh_reorder_param, false, CASE_FIRST_OFF}; /* Russian, same for Bulgerian and Mongolian with Cyrillic letters */ static Reorder_param ru_reorder_param = { {CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0}; static Coll_param ru_coll_param = {&ru_reorder_param, false /*norm_enabled*/, CASE_FIRST_OFF}; static constexpr uint16_t nochar[] = {0, 0}; /** Unicode Collation Algorithm: Collation element (weight) scanner, for consequent scan of collations weights from a string. Only meant as a base class; instantiate uca_scanner_any or uca_scanner_900 instead of this. */ class my_uca_scanner { protected: my_uca_scanner(const CHARSET_INFO *cs_arg, const uint8_t *str, size_t length) : wbeg(nochar), sbeg(str), send(str + length), uca(cs_arg->uca), cs(cs_arg), sbeg_dup(str) {} public: /** Get the level the scanner is currently working on. The string can be scanned multiple times (if the collation requires multi-level comparisons, e.g. for accent or case sensitivity); first to get primary weights, then from the start again for secondary, etc. */ unsigned get_weight_level() const { return weight_lv; } protected: unsigned weight_lv{0}; /* 0 = Primary, 1 = Secondary, 2 = Tertiary */ const uint16_t *wbeg; /* Beginning of the current weight string */ unsigned wbeg_stride{0}; /* Number of bytes between weights in string */ const uint8_t *sbeg; /* Beginning of the input string */ const uint8_t *send; /* End of the input string */ const MY_UCA_INFO *uca{nullptr}; uint16_t implicit[10]{}; my_wc_t prev_char{0}; // Previous code point we scanned, if any. const CHARSET_INFO *cs; unsigned num_of_ce_left{0}; const uint8_t *sbeg_dup; /* Backup of beginning of input string */ protected: const uint16_t *contraction_find(my_wc_t wc0, size_t *chars_skipped); inline const uint16_t *previous_context_find(my_wc_t wc0, my_wc_t wc1); }; /* Charset dependent scanner part, to optimize some character sets. */ template struct uca_scanner_any : public my_uca_scanner { uca_scanner_any(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg, const uint8_t *str, size_t length) : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) { // UCA 9.0.0 uses a different table format from what this scanner expects. assert(cs_arg->uca == nullptr || cs_arg->uca->version != UCA_V900); } unsigned get_char_index() const { return char_index; } inline int next(); private: /** How many code points (possibly multibyte) we have scanned so far. This includes code points with zero weight. Note that this is reset once we get to the end of the string and restart the scanning for the next weight level, but it is _not_ reset when we reach the end of the last level. */ unsigned char_index{0}; const Mb_wc mb_wc; inline int next_implicit(my_wc_t ch); }; template class uca_scanner_900 : public my_uca_scanner { public: uca_scanner_900(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg, const uint8_t *str, size_t length) : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {} inline int next(); /** For each weight in sequence, call "func", which should have a function signature of "bool func(int weight, bool is_level_separator)". Stops the iteration early if "func" returns false. This is morally equivalent to int weight; while ((weight= next()) >= 0) { if (!func(weight, weight == 0)) break; } except that it might employ optimizations internally to speed up the process. These optimizations will not modify the number of calls to func() (or their order), but might affect the internal scanner state during the calls, so func() should not try to read from the scanner except by calling public member functions. As a special optimization, if "bool preaccept_data(int num_weights)" returns true, the next "num_weights" calls to func() _must_ return true. This is so that bounds checking costs can be amortized over fewer calls. */ template inline void for_each_weight(T func, U preaccept_data); private: const Mb_wc mb_wc; inline int next_raw(); inline int more_weight(); uint16_t apply_case_first(uint16_t weight); uint16_t apply_reorder_param(uint16_t weight); inline int next_implicit(my_wc_t ch); void my_put_jamo_weights(my_wc_t *hangul_jamo, int jamo_cnt); /* apply_reorder_param() needs to return two weights for each origin weight. This boolean signals whether we have already returned the FB86 weight, and are ready to return the origin weight. */ bool return_origin_weight{true}; /* For Japanese kana-sensitive collation, we only add quaternary weight for katakana and hiragana, but not for others like latin and kanji, because characters like latin and kanji can be already distinguished from kana by three levels of weight. has_quaternary_weight is to indicate whether quaternary weight is needed for characters in string. */ bool has_quaternary_weight{false}; int handle_ja_contraction_quat_wt(); int handle_ja_common_quat_wt(my_wc_t wc); }; /********** Helper functions to handle contraction ************/ /** Mark a code point as a contraction part @param flags Pointer to UCA contraction flag data @param wc Unicode code point @param flag flag: "is contraction head", "is contraction tail" */ static inline void my_uca_add_contraction_flag(MY_UCA_INFO::flags_type *flags, my_wc_t wc, int flag) { (*flags)[wc & MY_UCA_CNT_FLAG_MASK] |= flag; } /** Check if UCA level data has contractions. @param uca Pointer to UCA data @return Flags indicating if UCA with contractions @retval 0 - no contractions @retval 1 - there are some contractions */ static inline bool my_uca_have_contractions(const MY_UCA_INFO *uca) { return uca->have_contractions; } struct trie_node_cmp { bool operator()(const MY_CONTRACTION &a, const my_wc_t b) { return a.ch < b; } bool operator()(const MY_CONTRACTION &a, const MY_CONTRACTION &b) { return a.ch < b.ch; } }; static std::vector::const_iterator find_contraction_part_in_trie(const std::vector &cont_nodes, my_wc_t ch) { if (cont_nodes.empty()) return cont_nodes.end(); return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch, trie_node_cmp()); } static std::vector::iterator find_contraction_part_in_trie( std::vector &cont_nodes, my_wc_t ch) { if (cont_nodes.empty()) return cont_nodes.end(); return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch, trie_node_cmp()); } /** Find a contraction consisting of two code points and return its weight array @param cont_nodes Vector that contains contraction nodes @param wc1 First code point @param wc2 Second code point @return Weight array @retval NULL - no contraction found @retval ptr - contraction weight array */ const uint16_t *my_uca_contraction2_weight( const std::vector *cont_nodes, my_wc_t wc1, my_wc_t wc2) { if (!cont_nodes) return nullptr; if (!cont_nodes->empty()) { std::vector::const_iterator node_it1 = find_contraction_part_in_trie(*cont_nodes, wc1); if (node_it1 == cont_nodes->end() || node_it1->ch != wc1) return nullptr; std::vector::const_iterator node_it2 = find_contraction_part_in_trie(node_it1->child_nodes, wc2); if (node_it2 != node_it1->child_nodes.end() && node_it2->ch == wc2 && node_it2->is_contraction_tail) return node_it2->weight; } return nullptr; } /** Check if a code point can be previous context head @param flags Pointer to UCA contraction flag data @param wc Code point @retval false - cannot be previous context head @retval true - can be previous context head */ static inline bool my_uca_can_be_previous_context_head( const MY_UCA_INFO::flags_type *flags, my_wc_t wc) { return (*flags)[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD; } /** Check if a code point can be previous context tail @param flags Pointer to UCA contraction flag data @param wc Code point @retval false - cannot be contraction tail @retval true - can be contraction tail */ static inline bool my_uca_can_be_previous_context_tail( const MY_UCA_INFO::flags_type *flags, my_wc_t wc) { return (*flags)[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL; } /** Check if a string is a contraction of exactly the given length, and return its weight array on success. @param cont_nodes Vector that contains contraction nodes @param wc Pointer to wide string @param len String length @return Weight array @retval NULL - Input string is not a known contraction @retval ptr - contraction weight array */ static inline const uint16_t *my_uca_contraction_weight( const std::vector *cont_nodes, const my_wc_t *wc, size_t len) { if (!cont_nodes) return nullptr; std::vector::const_iterator node_it; for (size_t ch_ind = 0; ch_ind < len; ++ch_ind) { node_it = find_contraction_part_in_trie(*cont_nodes, wc[ch_ind]); if (node_it == cont_nodes->end() || node_it->ch != wc[ch_ind]) return nullptr; cont_nodes = &node_it->child_nodes; } if (node_it->is_contraction_tail) return node_it->weight; return nullptr; } /** Return length of a 0-terminated wide string, analogous to strnlen(). @param s Pointer to wide string @param maxlen Mamixum string length @return string length, or maxlen if no '\0' is met. */ static size_t my_wstrnlen(my_wc_t *s, size_t maxlen) { for (size_t i = 0; i < maxlen; i++) { if (s[i] == 0) return i; } return maxlen; } /** Find a contraction in the input stream and return its weight array Scan input code points to find a longest path in contraction trie which contains all these code points. If the ending node of this path is end of contraction, return the weight array. @param wc0 The first code point of the contraction (which should have the MY_UCA_CNT_HEAD flag). @param[out] chars_skipped How many code points where skipped in the contraction we found. Only makes sense if we actually found one. @return Weight array @retval NULL no contraction found @retval ptr contraction weight array */ const uint16_t *my_uca_scanner::contraction_find(my_wc_t wc0, size_t *chars_skipped) { const uint8_t *beg = nullptr; auto mb_wc = cs->cset->mb_wc; const uint8_t *s = sbeg; const std::vector *cont_nodes = uca->contraction_nodes; const MY_CONTRACTION *longest_contraction = nullptr; std::vector::const_iterator node_it; for (;;) { node_it = find_contraction_part_in_trie(*cont_nodes, wc0); if (node_it == cont_nodes->end() || node_it->ch != wc0) break; if (node_it->is_contraction_tail) { longest_contraction = &(*node_it); beg = s; *chars_skipped = node_it->contraction_len - 1; } int mblen; if ((mblen = mb_wc(cs, &wc0, s, send)) <= 0) break; s += mblen; cont_nodes = &node_it->child_nodes; } if (longest_contraction != nullptr) { const uint16_t *cweight = longest_contraction->weight; if (uca->version == UCA_V900) { cweight += weight_lv; wbeg = cweight + MY_UCA_900_CE_SIZE; wbeg_stride = MY_UCA_900_CE_SIZE; num_of_ce_left = 7; } else { wbeg = cweight + 1; wbeg_stride = MY_UCA_900_CE_SIZE; } sbeg = beg; return cweight; } return nullptr; /* No contractions were found */ } /** Find weight for contraction with previous context and return its weight array. @param wc0 Previous code point @param wc1 Current code point @return Weight array @retval NULL - no contraction with context found @retval ptr - contraction weight array */ ALWAYS_INLINE const uint16_t *my_uca_scanner::previous_context_find(my_wc_t wc0, my_wc_t wc1) { std::vector::const_iterator node_it1 = find_contraction_part_in_trie(*uca->contraction_nodes, wc1); if (node_it1 == uca->contraction_nodes->end() || node_it1->ch != wc1) return nullptr; std::vector::const_iterator node_it2 = find_contraction_part_in_trie(node_it1->child_nodes_context, wc0); if (node_it2 != node_it1->child_nodes_context.end() && node_it2->ch == wc0) { if (uca->version == UCA_V900) { wbeg = node_it2->weight + MY_UCA_900_CE_SIZE + weight_lv; wbeg_stride = MY_UCA_900_CE_SIZE; num_of_ce_left = 7; } else { wbeg = node_it2->weight + 1; wbeg_stride = MY_UCA_900_CE_SIZE; } return node_it2->weight + weight_lv; } return nullptr; } /****************************************************************/ #define HANGUL_JAMO_MAX_LENGTH 3 /** Check if a code point is Hangul syllable. Decompose it to jamos if it is, and return tailored weights. @param syllable Hangul syllable to be decomposed @param[out] jamo Corresponding jamos @return 0 The code point is not Hangul syllable or cannot be decomposed others The number of jamos returned */ static int my_decompose_hangul_syllable(my_wc_t syllable, my_wc_t *jamo) { if (syllable < 0xAC00 || syllable > 0xD7AF) return 0; constexpr unsigned syllable_base = 0xAC00; constexpr unsigned leadingjamo_base = 0x1100; constexpr unsigned voweljamo_base = 0x1161; constexpr unsigned trailingjamo_base = 0x11A7; constexpr unsigned voweljamo_cnt = 21; constexpr unsigned trailingjamo_cnt = 28; const unsigned syllable_index = syllable - syllable_base; const unsigned v_t_combination = voweljamo_cnt * trailingjamo_cnt; const unsigned leadingjamo_index = syllable_index / v_t_combination; const unsigned voweljamo_index = (syllable_index % v_t_combination) / trailingjamo_cnt; const unsigned trailingjamo_index = syllable_index % trailingjamo_cnt; jamo[0] = leadingjamo_base + leadingjamo_index; jamo[1] = voweljamo_base + voweljamo_index; jamo[2] = trailingjamo_index ? (trailingjamo_base + trailingjamo_index) : 0; return trailingjamo_index ? 3 : 2; } template void uca_scanner_900::my_put_jamo_weights( my_wc_t *hangul_jamo, int jamo_cnt) { for (int jamoind = 0; jamoind < jamo_cnt; jamoind++) { uint16_t *implicit_weight = implicit + jamoind * MY_UCA_900_CE_SIZE; unsigned page = hangul_jamo[jamoind] >> 8; unsigned code = hangul_jamo[jamoind] & 0xFF; const uint16_t *jamo_weight_page = uca->weights[page]; implicit_weight[0] = UCA900_WEIGHT(jamo_weight_page, 0, code); implicit_weight[1] = UCA900_WEIGHT(jamo_weight_page, 1, code); implicit_weight[2] = UCA900_WEIGHT(jamo_weight_page, 2, code); } implicit[9] = jamo_cnt; } /* Chinese Han characters are assigned an implicit weight according to the Unicode Collation Algorithm. But when creating our Chinese collation for utf8mb4, to implement this language's reorder rule, we give the Han characters in CLDR zh.xml file weight values from 0x1C47 to 0xBDBE, and let the other Han characters keep their implicit weight. Per UCA, the smallest leading primary weight of the implicit weight is 0xFB00, and the largest primary weight we ocuppy for the Han characters in zh.xml is 0xBDBE. There is a huge gap between these two weight values. To use this weight value gap and let the character groups like Latin, Cyrillic, have a single primary weight as before reordering, we change the leading primary weight of the implicit weight as below. */ static uint16_t change_zh_implicit(uint16_t weight) { assert(weight >= 0xFB00); switch (weight) { case 0xFB00: return 0xF621; case 0xFB40: return 0xBDBF; case 0xFB41: return 0xBDC0; case 0xFB80: return 0xBDC1; case 0xFB84: return 0xBDC2; case 0xFB85: return 0xBDC3; default: return weight + 0xF622 - 0xFBC0; } } template ALWAYS_INLINE int uca_scanner_900::next_implicit( my_wc_t ch) { my_wc_t hangul_jamo[HANGUL_JAMO_MAX_LENGTH]; int jamo_cnt; if ((jamo_cnt = my_decompose_hangul_syllable(ch, hangul_jamo))) { my_put_jamo_weights(hangul_jamo, jamo_cnt); num_of_ce_left = jamo_cnt - 1; wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv; wbeg_stride = MY_UCA_900_CE_SIZE; return *(implicit + weight_lv); } /* We give the Chinese collation different leading primary weight to make sure there are enough single weight values to be assigned to character groups like Latin, Cyrillic, etc. */ unsigned page = 0; if (ch >= 0x17000 && ch <= 0x18AFF) // Tangut character { page = 0xFB00; implicit[3] = (ch - 0x17000) | 0x8000; } else { page = ch >> 15; implicit[3] = (ch & 0x7FFF) | 0x8000; if ((ch >= 0x3400 && ch <= 0x4DB5) || (ch >= 0x20000 && ch <= 0x2A6D6) || (ch >= 0x2A700 && ch <= 0x2B734) || (ch >= 0x2B740 && ch <= 0x2B81D) || (ch >= 0x2B820 && ch <= 0x2CEA1)) { page += 0xFB80; } else if ((ch >= 0x4E00 && ch <= 0x9FD5) || (ch >= 0xFA0E && ch <= 0xFA29)) page += 0xFB40; else page += 0xFBC0; } if (cs->coll_param == &zh_coll_param) { page = change_zh_implicit(page); } implicit[0] = page; implicit[1] = 0x0020; implicit[2] = 0x0002; // implicit[3] is set above. implicit[4] = 0; implicit[5] = 0; num_of_ce_left = 1; wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv; wbeg_stride = MY_UCA_900_CE_SIZE; return *(implicit + weight_lv); } /** Return implicit UCA weight Used for code points that do not have assigned UCA weights. @return The leading implicit weight. */ template ALWAYS_INLINE int uca_scanner_any::next_implicit(my_wc_t ch) { implicit[0] = (ch & 0x7FFF) | 0x8000; implicit[1] = 0; wbeg = implicit; wbeg_stride = MY_UCA_900_CE_SIZE; unsigned page = ch >> 15; if (ch >= 0x3400 && ch <= 0x4DB5) page += 0xFB80; else if (ch >= 0x4E00 && ch <= 0x9FA5) page += 0xFB40; else page += 0xFBC0; return page; } template ALWAYS_INLINE int uca_scanner_any::next() { /* Check if the weights for the previous code point have been already fully scanned. If yes, then get the next code point and initialize wbeg and wlength to its weight string. */ if (wbeg[0]) /* More weights left from the previous step: */ return *wbeg++; /* return the next weight from expansion */ do { my_wc_t wc = 0; /* Get next code point */ int mblen = mb_wc(&wc, sbeg, send); if (mblen <= 0) { ++weight_lv; return -1; } sbeg += mblen; char_index++; if (wc > uca->maxchar) { /* Return 0xFFFD as weight for all characters outside BMP */ wbeg = nochar; wbeg_stride = 0; return 0xFFFD; } if (my_uca_have_contractions(uca)) { const uint16_t *cweight = nullptr; /* If we have scanned a code point which can have previous context, and there were some more code point already before, then verify that {prev_char, wc} together form a real previous context pair. Note, we support only 2-character long sequences with previous context at the moment. CLDR does not have longer sequences. */ if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) && wbeg != nochar && /* if not the very first character */ my_uca_can_be_previous_context_head(uca->contraction_flags, prev_char) && (cweight = previous_context_find(prev_char, wc))) { prev_char = 0; /* Clear for the next character */ return *cweight; } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) { /* Check if wc starts a contraction */ size_t chars_skipped; if ((cweight = contraction_find(wc, &chars_skipped))) { char_index += chars_skipped; return *cweight; } } prev_char = wc; } /* Process single code point */ unsigned page = wc >> 8; unsigned code = wc & 0xFF; /* If weight page for wc does not exist, then calculate algoritmically */ const uint16_t *wpage = uca->weights[page]; if (!wpage) return next_implicit(wc); /* Calculate pointer to wc's weight, using page and offset */ wbeg = wpage + code * uca->lengths[page]; wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS; } while (!wbeg[0]); /* Skip ignorable code points */ return *wbeg++; } template inline int uca_scanner_900::more_weight() { /* Check if the weights for the previous code point have been already fully scanned. If no, return the first non-zero weight. */ while (num_of_ce_left != 0 && *wbeg == 0) { wbeg += wbeg_stride; --num_of_ce_left; } if (num_of_ce_left != 0) { uint16_t rtn = *wbeg; wbeg += wbeg_stride; --num_of_ce_left; return rtn; /* return the next weight from expansion */ } return -1; } static inline bool is_hiragana_char(my_wc_t wc) { return wc >= 0x3041 && wc <= 0x3096; } static inline bool is_katakana_char(my_wc_t wc) { return (wc >= 0x30A1 && wc <= 0x30FA) || // Full width katakana (wc >= 0xFF66 && wc <= 0xFF9D); // Half width katakana } static inline bool is_katakana_iteration(my_wc_t wc) { return wc == 0x30FD || wc == 0x30FE; } static inline bool is_hiragana_iteration(my_wc_t wc) { return wc == 0x309D || wc == 0x309E; } static inline bool is_ja_length_mark(my_wc_t wc) { return wc == 0x30FC; } /** Return quaternary weight when running for that level. @retval 0 - Do not return quaternary weight. @retval others - Quaternary weight for this character. */ template ALWAYS_INLINE int uca_scanner_900::handle_ja_contraction_quat_wt() { /* For Japanese, only weight shift rule and previous context rule is defined. And in previous context rules, the first character is always katakana / hiragana, and the second character is always iteration or length mark. The quaternary weight of iteration / length mark is same as the first character. So has_quaternary_weight is always true. For how we return quaternary weight, please refer to the comment in handle_ja_common_quat_wt(). */ if (weight_lv == 3) { wbeg = nochar; num_of_ce_left = 0; if (is_katakana_char(prev_char)) { return JA_KATA_QUAT_WEIGHT; } else if (is_hiragana_char(prev_char)) { return JA_HIRA_QUAT_WEIGHT; } } return 0; } /** Check whether quaternary weight is needed for character with Japanese kana-sensitive collation. If it is, return quaternary weight when running for that level. @retval 0 - Quaternary weight check is done. @retval -1 - There is no quaternary weight for this character. @retval others - Quaternary weight for this character. */ template ALWAYS_INLINE int uca_scanner_900::handle_ja_common_quat_wt( my_wc_t wc) { /* For Japanese kana-sensitive collation, we detect whether quaternary weight is necessary when scanning for the first level of weight. If it is, the quaternary weight will be returned for katakana / hiragana later. */ if (weight_lv == 0 && !has_quaternary_weight) { if (is_katakana_char(wc) || is_katakana_iteration(wc) || is_hiragana_char(wc) || is_hiragana_iteration(wc) || is_ja_length_mark(wc)) has_quaternary_weight = true; } else if (weight_lv == 3) { wbeg = nochar; num_of_ce_left = 0; if (is_katakana_char(wc) || is_katakana_iteration(wc) || is_ja_length_mark(wc)) { return JA_KATA_QUAT_WEIGHT; } else if (is_hiragana_char(wc) || is_hiragana_iteration(wc)) { return JA_HIRA_QUAT_WEIGHT; } return -1; } return 0; } // Generic version that can handle any number of levels. template ALWAYS_INLINE int uca_scanner_900::next_raw() { int remain_weight = more_weight(); if (remain_weight >= 0) return remain_weight; do { my_wc_t wc = 0; /* Get next code point */ int mblen = mb_wc(&wc, sbeg, send); if (mblen <= 0) { if (LEVELS_FOR_COMPARE == 1) { ++weight_lv; return -1; } if (++weight_lv < LEVELS_FOR_COMPARE) { if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { // Return directly if we don't have quaternary weight. if (weight_lv == 3 && !has_quaternary_weight) return -1; } /* Restart scanning from the beginning of the string, and add a level separator. */ sbeg = sbeg_dup; return 0; } // If we don't have any more levels left, we're done. return -1; } sbeg += mblen; assert(wc <= uca->maxchar); // mb_wc() has already checked this. if (my_uca_have_contractions(uca)) { const uint16_t *cweight = nullptr; /* If we have scanned a code point which can have previous context, and there were some more code points already before, then verify that {prev_char, wc} together form a real previous context pair. Note, we support only 2-character long sequences with previous context at the moment. CLDR does not have longer sequences. CLDR doesn't have previous context rule whose first character is 0x0000, so the initial value (0) of prev_char won't break the logic. */ if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) && my_uca_can_be_previous_context_head(uca->contraction_flags, prev_char) && (cweight = previous_context_find(prev_char, wc))) { // For Japanese kana-sensitive collation. if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { int quat_wt = handle_ja_contraction_quat_wt(); prev_char = 0; if (quat_wt > 0) return quat_wt; } prev_char = 0; /* Clear for the next code point */ return *cweight; } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) { /* Check if wc starts a contraction */ size_t chars_skipped; // Ignored. if ((cweight = contraction_find(wc, &chars_skipped))) return *cweight; } prev_char = wc; } // For Japanese kana-sensitive collation. if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { int quat_wt = handle_ja_common_quat_wt(wc); if (quat_wt == -1) continue; else if (quat_wt) return quat_wt; } /* Process single code point */ unsigned page = wc >> 8; unsigned code = wc & 0xFF; /* If weight page for wc does not exist, then calculate algoritmically */ const uint16_t *wpage = uca->weights[page]; if (!wpage) return next_implicit(wc); /* Calculate pointer to wc's weight, using page and offset */ wbeg = UCA900_WEIGHT_ADDR(wpage, weight_lv, code); wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS; num_of_ce_left = UCA900_NUM_OF_CE(wpage, code); } while (!wbeg[0]); /* Skip ignorable code points */ uint16_t rtn = *wbeg; wbeg += wbeg_stride; --num_of_ce_left; return rtn; } template template ALWAYS_INLINE void uca_scanner_900::for_each_weight( T func, U preaccept_data) { if (cs->tailoring || cs->mbminlen != 1 || cs->coll_param) { // Slower, generic path. int s_res; while ((s_res = next()) >= 0) { if (!func(s_res, s_res == 0)) return; } return; } /* Fast path. TODO: See if we can accept some character sets with tailorings. */ const uint16_t *ascii_wpage = UCA900_WEIGHT_ADDR(uca->weights[0], /*level=*/weight_lv, /*subcode=*/0); /* Precalculate the limit for the fast path below, taking care not to form pointers that are before sbeg, as those cannot be legally compared. (In particular, this catches the case of sbeg == send == nullptr.) */ const uint8_t *send_local = (send - sbeg > 3) ? (send - 3) : sbeg; for (;;) { /* We could have more weights left from the previous call to next() (if any) that we need to deal with. */ int s_res; while ((s_res = more_weight()) >= 0) { if (!func(s_res, s_res == 0)) return; } /* Loop in a simple fast path as long as we only have non-ignorable ASCII characters. These characters always have exactly a single weight and consist of only a single byte, so we can skip a lot of the checks we'd otherwise have to do. */ const uint8_t *sbeg_local = sbeg; while (sbeg_local < send_local && preaccept_data(sizeof(uint32_t))) { /* Check if all four bytes are in the range 0x20..0x7e, inclusive. These have exactly one weight. Note that this unfortunately does not include tab and newline, which would otherwise be legal candidates. See the FastOutOfRange unit test for verification that the bitfiddling trick used here is correct. */ uint32_t four_bytes = 0; memcpy(&four_bytes, sbeg_local, sizeof(four_bytes)); if (((four_bytes + 0x01010101u) & 0x80808080) || ((four_bytes - 0x20202020u) & 0x80808080)) break; const int s_res0 = ascii_wpage[sbeg_local[0]]; const int s_res1 = ascii_wpage[sbeg_local[1]]; const int s_res2 = ascii_wpage[sbeg_local[2]]; const int s_res3 = ascii_wpage[sbeg_local[3]]; assert(s_res0 != 0); assert(s_res1 != 0); assert(s_res2 != 0); assert(s_res3 != 0); func(s_res0, /*is_level_separator=*/false); func(s_res1, /*is_level_separator=*/false); func(s_res2, /*is_level_separator=*/false); func(s_res3, /*is_level_separator=*/false); sbeg_local += sizeof(uint32_t); } sbeg = sbeg_local; // Do a single code point in the generic path. s_res = next_raw(); if (s_res == 0) { // Level separator, so we have to update our page pointer. ascii_wpage += UCA900_DISTANCE_BETWEEN_LEVELS; } if (s_res < 0 || !func(s_res, s_res == 0)) return; } } /** Change a weight according to the reorder parameters. @param weight The weight to change @retval reordered weight */ template uint16_t uca_scanner_900::apply_reorder_param( uint16_t weight) { /* Chinese collation's reordering is done in next_implicit() and modify_all_zh_pages(). See the comment on zh_reorder_param and change_zh_implicit(). */ if (cs->coll_param == &zh_coll_param) return weight; const Reorder_param *param = cs->coll_param->reorder_param; if (weight >= START_WEIGHT_TO_REORDER && weight <= param->max_weight) { for (int rec_ind = 0; rec_ind < param->wt_rec_num; ++rec_ind) { const Reorder_wt_rec *wt_rec = param->wt_rec + rec_ind; if (weight >= wt_rec->old_wt_bdy.begin && weight <= wt_rec->old_wt_bdy.end) { /* As commented in adjust_japanese_weight(), if this is a Japanese collation, for characters whose weight is between Latin and Kana group, and for the characters whose weight is between Kana and Han, we need to change their weight to be after all Han characters. We decide to give them the weights [FB86 0000 0000] [origin weight] to make sure the new weights are greater than the maximum implicit weight of Han characters. If this character's origin weight has more than one non-ignorable primary weight, for example, [AAAA 0020 0002][BBBB 0020 0002], both AAAA and BBBB need to be changed. The new weight should be: [FB86 0000 0000][AAAA 0020 0002][FB86 0000 0000][BBBB 0020 0002]. */ if (param == &ja_reorder_param && wt_rec->new_wt_bdy.begin == 0) { return_origin_weight = !return_origin_weight; if (return_origin_weight) break; /* We didn't consume the weight; rewind the iterator, so we will get another call where we can output it. */ wbeg -= wbeg_stride; ++num_of_ce_left; return 0xFB86; } // Regular (non-Japanese-specific) reordering. return weight - wt_rec->old_wt_bdy.begin + wt_rec->new_wt_bdy.begin; } } } return weight; } // See Unicode TR35 section 3.14.1. static bool is_tertiary_weight_upper_case(uint16_t weight) { if ((weight >= 0x08 && weight <= 0x0C) || weight == 0x0E || weight == 0x11 || weight == 0x12 || weight == 0x1D) return true; return false; } template uint16_t uca_scanner_900::apply_case_first( uint16_t weight) { /* We only apply case weight change here when the character is not tailored. Tailored character's case weight has been changed in my_char_weight_put_900(). We have only 1 collation (Danish) needs to implement [caseFirst upper]. */ if (cs->coll_param->case_first == CASE_FIRST_UPPER && weight_lv == 2 && weight < 0x20) { if (is_tertiary_weight_upper_case(weight)) weight |= CASE_FIRST_UPPER_MASK; else weight |= CASE_FIRST_LOWER_MASK; } return weight; } template ALWAYS_INLINE int uca_scanner_900::next() { int res = next_raw(); Coll_param *param = cs->coll_param; if (res > 0 && param) { /* Reorder weight change only on primary level. */ if (param->reorder_param && weight_lv == 0) res = apply_reorder_param(res); if (param->case_first != CASE_FIRST_OFF) res = apply_case_first(res); } return res; } /* Compares two strings according to the collation SYNOPSIS: my_strnncoll_uca() cs Character set information s First string slen First string length t Second string tlen Second string length NOTES: Initializes two weight scanners and gets weights corresponding to two strings in a loop. If weights are not the same at some step then returns their difference. In the while() comparison these situations are possible: 1. (s_res>0) and (t_res>0) and (s_res == t_res) Weights are the same so far, continue comparison 2. (s_res>0) and (t_res>0) and (s_res!=t_res) A difference has been found, return. 3. (s_res>0) and (t_res<0) We have reached the end of the second string, or found an illegal multibyte sequence in the second string. Return a positive number, i.e. the first string is bigger. 4. (s_res<0) and (t_res>0) We have reached the end of the first string, or found an illegal multibyte sequence in the first string. Return a negative number, i.e. the second string is bigger. 5. (s_res<0) and (t_res<0) Both scanners returned -1. It means we have riched the end-of-string of illegal-sequence in both strings at the same time. Return 0, strings are equal. RETURN Difference between two strings, according to the collation: 0 - means strings are equal negative number - means the first string is smaller positive number - means the first string is bigger */ template static int my_strnncoll_uca(const CHARSET_INFO *cs, const Mb_wc mb_wc, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) { Scanner sscanner(mb_wc, cs, s, slen); Scanner tscanner(mb_wc, cs, t, tlen); int s_res = 0; int t_res = 0; /* We compare 2 strings in same level first. If only string A's scanner has gone to next level, which means another string, B's weight of current level is longer than A's. We'll compare B's remaining weights with space. */ for (unsigned current_lv = 0; current_lv < LEVELS_FOR_COMPARE; ++current_lv) { /* Run the scanners until one of them runs out of current lv */ do { s_res = sscanner.next(); t_res = tscanner.next(); } while (s_res == t_res && s_res >= 0 && sscanner.get_weight_level() == current_lv && tscanner.get_weight_level() == current_lv); /* Two scanners run to next level at same time, or we found a difference, or we found an error. */ if (sscanner.get_weight_level() == tscanner.get_weight_level()) { if (s_res == t_res && s_res >= 0) continue; break; // Error or inequality found, end. } if (tscanner.get_weight_level() > current_lv) { // t ran out of weights on this level, and s didn't. if (t_is_prefix) { // Consume the rest of the weights from s. do { s_res = sscanner.next(); } while (s_res >= 0 && sscanner.get_weight_level() == current_lv); if (s_res < 0) break; // Error found, end. // s is now also on the next level. Continue comparison. continue; } else { // s is longer than t (and t_prefix isn't set). return 1; } } if (sscanner.get_weight_level() > current_lv) { // s ran out of weights on this level, and t didn't. return -1; } break; } return (s_res - t_res); } static inline int my_space_weight(const CHARSET_INFO *cs) /* W3-TODO */ { if (cs->uca && cs->uca->version == UCA_V900) return UCA900_WEIGHT(cs->uca->weights[0], /*weight_lv=*/0, 0x20); else return cs->uca->weights[0][0x20 * cs->uca->lengths[0]]; } /** Helper function: Find address of weights of the given code point. @param uca Pointer to UCA data @param wc character Unicode code point @return Weight array @retval pointer to weight array for the given code point, or nullptr if this page does not have implicit weights. */ static inline uint16_t *my_char_weight_addr(MY_UCA_INFO *uca, my_wc_t wc) { unsigned page = 0; unsigned ofst = 0; return wc > uca->maxchar ? nullptr : (uca->weights[page = (wc >> 8)] ? uca->weights[page] + (ofst = (wc & 0xFF)) * uca->lengths[page] : nullptr); } /** Helper function: Find address of weights of the given code point, for UCA 9.0.0 format. @param uca Pointer to UCA data @param wc character Unicode code point @return Weight array @retval pointer to weight array for the given code point, or nullptr if this page does not have implicit weights. */ static inline uint16_t *my_char_weight_addr_900(MY_UCA_INFO *uca, my_wc_t wc) { if (wc > uca->maxchar) return nullptr; unsigned page = wc >> 8; unsigned ofst = wc & 0xFF; uint16_t *weights = uca->weights[page]; if (weights) return UCA900_WEIGHT_ADDR(weights, /*level=*/0, ofst); else return nullptr; } /* Compares two strings according to the collation, ignoring trailing spaces. SYNOPSIS: my_strnncollsp_uca() cs Character set information s First string slen First string length t Second string tlen Second string length NOTES: Works exactly the same with my_strnncoll_uca(), but ignores trailing spaces. In the while() comparison these situations are possible: 1. (s_res>0) and (t_res>0) and (s_res == t_res) Weights are the same so far, continue comparison 2. (s_res>0) and (t_res>0) and (s_res!=t_res) A difference has been found, return. 3. (s_res>0) and (t_res<0) We have reached the end of the second string, or found an illegal multibyte sequence in the second string. Compare the first string to an infinite array of space characters until difference is found, or until the end of the first string. 4. (s_res<0) and (t_res>0) We have reached the end of the first string, or found an illegal multibyte sequence in the first string. Compare the second string to an infinite array of space characters until difference is found or until the end of the second steing. 5. (s_res<0) and (t_res<0) Both scanners returned -1. It means we have riched the end-of-string of illegal-sequence in both strings at the same time. Return 0, strings are equal. RETURN Difference between two strings, according to the collation: 0 - means strings are equal negative number - means the first string is smaller positive number - means the first string is bigger */ template static int my_strnncollsp_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) { int s_res, t_res; uca_scanner_any sscanner(mb_wc, cs, s, slen); uca_scanner_any tscanner(mb_wc, cs, t, tlen); do { s_res = sscanner.next(); t_res = tscanner.next(); } while (s_res == t_res && s_res > 0); if (s_res > 0 && t_res < 0) { /* Calculate weight for SPACE character */ t_res = my_space_weight(cs); /* compare the first string to spaces */ do { if (s_res != t_res) return (s_res - t_res); s_res = sscanner.next(); } while (s_res > 0); return 0; } if (s_res < 0 && t_res > 0) { /* Calculate weight for SPACE character */ s_res = my_space_weight(cs); /* compare the second string to spaces */ do { if (s_res != t_res) return (s_res - t_res); t_res = tscanner.next(); } while (t_res > 0); return 0; } return (s_res - t_res); } /* Calculates hash value for the given string, according to the collation, and ignoring trailing spaces. SYNOPSIS: my_hash_sort_uca() cs Character set information s String slen String's length n1 First hash parameter n2 Second hash parameter NOTES: Scans consequently weights and updates hash parameters n1 and n2. In a case insensitive collation, upper and lower case of the same letter will return the same weight sequence, and thus will produce the same hash values in n1 and n2. RETURN N/A */ template static void my_hash_sort_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) { int s_res = 0; slen = cs->cset->lengthsp(cs, pointer_cast(s), slen); uca_scanner_any scanner(mb_wc, cs, s, slen); uint64_t tmp1 = *n1; uint64_t tmp2 = *n2; while ((s_res = scanner.next()) > 0) { tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res >> 8)) + (tmp1 << 8); tmp2 += 3; tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res & 0xFF)) + (tmp1 << 8); tmp2 += 3; } *n1 = tmp1; *n2 = tmp2; } /* For the given string creates its "binary image", suitable to be used in binary comparison, i.e. in memcmp(). SYNOPSIS: my_strnxfrm_uca() cs Character set information dst Where to write the image dstlen Space available for the image, in bytes src The source string srclen Length of the source string, in bytes NOTES: In a loop, scans weights from the source string and writes them into the binary image. In a case insensitive collation, upper and lower cases of the same letter will produce the same image subsequences. When we have reached the end-of-string or found an illegal multibyte sequence, the loop stops. It is impossible to restore the original string using its binary image. Binary images are used for bulk comparison purposes, e.g. in ORDER BY, when it is more efficient to create a binary image and use it instead of weight scanner for the original strings for every comparison. RETURN Number of bytes that have been written into the binary image. */ template static size_t my_strnxfrm_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) { uint8_t *d0 = dst; uint8_t *de = dst + dstlen; int s_res; uca_scanner_any scanner(mb_wc, cs, src, srclen); while (dst < de && (s_res = scanner.next()) > 0) { *dst++ = s_res >> 8; if (dst < de) *dst++ = s_res & 0xFF; } if (dst < de) { /* PAD SPACE behavior. We still have space left in the output buffer, which must mean that the scanner is at the end of the last level. Find out how many weights we wrote per level, and add any remaining spaces we need to get us up to the requested total. */ assert(num_codepoints >= scanner.get_char_index()); num_codepoints -= scanner.get_char_index(); if (num_codepoints) { unsigned space_count = std::min((de - dst) / 2, num_codepoints); s_res = my_space_weight(cs); for (; space_count; space_count--) { dst = store16be(dst, s_res); } } } if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) { s_res = my_space_weight(cs); for (; dst < de;) { *dst++ = s_res >> 8; if (dst < de) *dst++ = s_res & 0xFF; } } return dst - d0; } static int my_uca_charcmp_900(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { uint16_t *weight1_ptr = my_char_weight_addr_900(cs->uca, wc1); /* W3-TODO */ uint16_t *weight2_ptr = my_char_weight_addr_900(cs->uca, wc2); /* Check if some of the characters does not have implicit weights */ if (!weight1_ptr || !weight2_ptr) return wc1 != wc2; if (weight1_ptr[0] && weight2_ptr[0] && weight1_ptr[0] != weight2_ptr[0]) return 1; /* Thoroughly compare all weights */ size_t length1 = weight1_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS]; size_t length2 = weight2_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS]; for (int level = 0; level < cs->levels_for_compare; ++level) { size_t wt_ind1 = 0; size_t wt_ind2 = 0; uint16_t *weight1 = weight1_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS; uint16_t *weight2 = weight2_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS; while (wt_ind1 < length1 && wt_ind2 < length2) { // Zero weight is ignorable. for (; wt_ind1 < length1 && !*weight1; wt_ind1++) weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; if (wt_ind1 == length1) break; for (; wt_ind2 < length2 && !*weight2; wt_ind2++) weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; if (wt_ind2 == length2) break; // Check if these two non-ignorable weights are equal. if (*weight1 != *weight2) return 1; wt_ind1++; wt_ind2++; weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; } /* If either character is out of weights but we have equality so far, check if the other character has any non-ignorable weights left. */ for (; wt_ind1 < length1; wt_ind1++) { if (*weight1) return 1; weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; } for (; wt_ind2 < length2; wt_ind2++) { if (*weight2) return 1; weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; } } return 0; } /* This function compares if two code points are the same. The sign +1 or -1 does not matter. The only important thing is that the result is 0 or not 0. This fact allows us to use memcmp() safely, on both little-endian and big-endian machines. */ static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { if (wc1 == wc2) return 0; if (cs->uca != nullptr && cs->uca->version == UCA_V900) return my_uca_charcmp_900(cs, wc1, wc2); size_t length1, length2; uint16_t *weight1 = my_char_weight_addr(cs->uca, wc1); /* W3-TODO */ uint16_t *weight2 = my_char_weight_addr(cs->uca, wc2); /* Check if some of the code points does not have implicit weights */ if (!weight1 || !weight2) return wc1 != wc2; /* Quickly compare first weights */ if (weight1[0] != weight2[0]) return 1; /* Thoroughly compare all weights */ length1 = cs->uca->lengths[wc1 >> MY_UCA_PSHIFT]; /* W3-TODO */ length2 = cs->uca->lengths[wc2 >> MY_UCA_PSHIFT]; if (length1 > length2) return memcmp((const void *)weight1, (const void *)weight2, length2 * 2) ? 1 : weight1[length2]; if (length1 < length2) return memcmp((const void *)weight1, (const void *)weight2, length1 * 2) ? 1 : weight2[length1]; return memcmp((const void *)weight1, (const void *)weight2, length1 * 2); } /*** Compare string against string with wildcard ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ static int my_wildcmp_uca_impl(const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, int recurse_level) { if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return -1; while (wildstr != wildend) { int result = -1; /* Not found, using wildcards */ auto mb_wc = cs->cset->mb_wc; /* Compare the expression and pattern strings character-by-character until we find a '%' (w_many) in the pattern string. Once we do, we break out of the loop and try increasingly large widths for the '%' match, calling ourselves recursively until we find a match. (As an optimization, we test for the character immediately after '%' before we recurse.) This takes exponential time in the worst case. Example: Say we are trying to match the pattern 'ab%cd' against the string 'ab..c.cd'. We first match the initial 'ab' against each other, and then see the '%' in the pattern. Since the first character after '%' is 'c', we skip to the first 'c' in the expression string, and try to match 'c.cd' against 'cd' by a recursive call. Since this failed, we scan for the next 'c', and try to match 'cd' against 'cd', which works. */ my_wc_t w_wc; while (true) { int mb_len; if ((mb_len = mb_wc(cs, &w_wc, pointer_cast(wildstr), pointer_cast(wildend))) <= 0) return 1; wildstr += mb_len; // If we found '%' (w_many), break out this loop. if (w_wc == (my_wc_t)w_many) { result = 1; break; } /* If the character we just read was an escape character, skip it and read the next character instead. This character is used verbatim without checking if it is a wildcard (% or _). However, as a special exception, a lone escape character at the end of a string is treated as itself. */ bool escaped = false; if (w_wc == (my_wc_t)escape && wildstr < wildend) { if ((mb_len = mb_wc(cs, &w_wc, pointer_cast(wildstr), pointer_cast(wildend))) <= 0) return 1; wildstr += mb_len; escaped = true; } my_wc_t s_wc; if ((mb_len = mb_wc(cs, &s_wc, pointer_cast(str), pointer_cast(str_end))) <= 0) return 1; str += mb_len; // If we found '_' (w_one), skip one character in expression string. if (!escaped && w_wc == (my_wc_t)w_one) { result = 1; } else { if (my_uca_charcmp(cs, s_wc, w_wc)) return 1; } if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } if (w_wc == (my_wc_t)w_many) { // Remove any '%' and '_' following w_many in the pattern string. for (;;) { if (wildstr == wildend) { /* The previous w_many (%) was the last character in the pattern string, so we have a match no matter what the rest of the expression string looks like (even empty). */ return 0; } int mb_len_wild = mb_wc(cs, &w_wc, pointer_cast(wildstr), pointer_cast(wildend)); if (mb_len_wild <= 0) return 1; wildstr += mb_len_wild; if (w_wc == (my_wc_t)w_many) continue; if (w_wc == (my_wc_t)w_one) { /* Skip one character in expression string because '_' needs to match one. */ my_wc_t s_wc; int mb_len = mb_wc(cs, &s_wc, pointer_cast(str), pointer_cast(str_end)); if (mb_len <= 0) return 1; str += mb_len; continue; } break; /* Not a wild character */ } // No character in the expression string to match w_wc. if (str == str_end) return -1; // Skip the escape character ('\') in the pattern if needed. if (w_wc == (my_wc_t)escape && wildstr < wildend) { int mb_len = mb_wc(cs, &w_wc, pointer_cast(wildstr), pointer_cast(wildend)); if (mb_len <= 0) return 1; wildstr += mb_len; } /* w_wc is now the character following w_many (e.g., if the pattern is "a%c", w_wc is 'c'). */ while (true) { /* Skip until we find a character in the expression string that is equal to w_wc. */ int mb_len = 0; while (str != str_end) { my_wc_t s_wc; if ((mb_len = mb_wc(cs, &s_wc, pointer_cast(str), pointer_cast(str_end))) <= 0) return 1; if (!my_uca_charcmp(cs, s_wc, w_wc)) break; str += mb_len; } // No character in the expression string is equal to w_wc. if (str == str_end) return -1; str += mb_len; /* The strings match up until the first character after w_many in the pattern string. For the rest part of pattern string and expression string, we recursively call to get wild compare result. Example, wildcmp(..., "abcdefg", "a%de%g", ...), we'll run again on wildcmp(..., "efg", "e%g", ...). */ result = my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, recurse_level + 1); if (result <= 0) return result; } } } return (str != str_end ? 1 : 0); } static int my_strcasecmp_uca(const CHARSET_INFO *cs, const char *s, const char *t) { const MY_UNICASE_INFO *uni_plane = cs->caseinfo; const MY_UNICASE_CHARACTER *page; while (s[0] && t[0]) { my_wc_t s_wc, t_wc; if (static_cast(s[0]) < 128) { s_wc = uni_plane->page[0][static_cast(s[0])].tolower; s++; } else { int res; res = cs->cset->mb_wc(cs, &s_wc, pointer_cast(s), pointer_cast(s + 4)); if (res <= 0) return strcmp(s, t); s += res; if (s_wc <= uni_plane->maxchar && (page = uni_plane->page[s_wc >> 8])) s_wc = page[s_wc & 0xFF].tolower; } /* Do the same for the second string */ if (static_cast(t[0]) < 128) { /* Convert single byte character into weight */ t_wc = uni_plane->page[0][static_cast(t[0])].tolower; t++; } else { int res = cs->cset->mb_wc(cs, &t_wc, pointer_cast(t), pointer_cast(t + 4)); if (res <= 0) return strcmp(s, t); t += res; if (t_wc <= uni_plane->maxchar && (page = uni_plane->page[t_wc >> 8])) t_wc = page[t_wc & 0xFF].tolower; } /* Now we have two weights, let's compare them */ if (s_wc != t_wc) return static_cast(s_wc) - static_cast(t_wc); } return static_cast(static_cast(s[0])) - static_cast(static_cast(t[0])); } extern "C" { static int my_wildcmp_uca(const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1); } } // extern "C" /* Collation language is implemented according to subset of ICU Collation Customization (tailorings): http://icu.sourceforge.net/userguide/Collate_Customization.html Collation language elements: Delimiters: space - skipped := A-Z | a-z | \uXXXX Shift command: := & - reset at this letter. Diff command: := < - Identifies a primary difference. := << - Identifies a secondary difference. := <<< - Idenfifies a tertiary difference. Collation rules: := { } := | | | := [ ] An example, Polish collation: &A < \u0105 <<< \u0104 &C < \u0107 <<< \u0106 &E < \u0119 <<< \u0118 &L < \u0142 <<< \u0141 &N < \u0144 <<< \u0143 &O < \u00F3 <<< \u00D3 &S < \u015B <<< \u015A &Z < \u017A <<< \u017B */ typedef enum my_coll_lexem_num_en { MY_COLL_LEXEM_EOF = 0, MY_COLL_LEXEM_SHIFT = 1, MY_COLL_LEXEM_RESET = 4, MY_COLL_LEXEM_CHAR = 5, MY_COLL_LEXEM_ERROR = 6, MY_COLL_LEXEM_OPTION = 7, MY_COLL_LEXEM_EXTEND = 8, MY_COLL_LEXEM_CONTEXT = 9 } my_coll_lexem_num; /** Convert collation customization lexem to string, for nice error reporting @param term lexem code @return lexem name */ static const char *my_coll_lexem_num_to_str(my_coll_lexem_num term) { switch (term) { case MY_COLL_LEXEM_EOF: return "EOF"; case MY_COLL_LEXEM_SHIFT: return "Shift"; case MY_COLL_LEXEM_RESET: return "&"; case MY_COLL_LEXEM_CHAR: return "Character"; case MY_COLL_LEXEM_OPTION: return "Bracket option"; case MY_COLL_LEXEM_EXTEND: return "/"; case MY_COLL_LEXEM_CONTEXT: return "|"; case MY_COLL_LEXEM_ERROR: return "ERROR"; } return nullptr; } struct MY_COLL_LEXEM { my_coll_lexem_num term; const char *beg; const char *end; const char *prev; int diff; int code; }; /* Initialize collation rule lexical anilizer SYNOPSIS my_coll_lexem_init lexem Lex analyzer to init str Const string to parse str_end End of the string USAGE RETURN VALUES N/A */ static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, const char *str, const char *str_end) { lexem->beg = str; lexem->prev = str; lexem->end = str_end; lexem->diff = 0; lexem->code = 0; } /** Compare lexem to string with length @param lexem lexem @param pattern string @param patternlen string length @retval 0 if lexem is equal to string, non-0 otherwise. */ static int lex_cmp(MY_COLL_LEXEM *lexem, const char *pattern, size_t patternlen) { size_t lexemlen = lexem->beg - lexem->prev; if (lexemlen < patternlen) return 1; /* Not a prefix */ return native_strncasecmp(lexem->prev, pattern, patternlen); } /* Print collation customization expression parse error, with context. SYNOPSIS my_coll_lexem_print_error lexem Lex analyzer to take context from errstr string to write error to errsize errstr size txt error message col_name collation name USAGE RETURN VALUES N/A */ static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, char *errstr, size_t errsize, const char *txt, const char *col_name) { const char *src = lexem->prev; // "29" is for a backward compatibility: int src_len = std::min(29, static_cast(lexem->end - lexem->prev)); errstr[errsize - 1] = '\0'; snprintf(errstr, errsize - 1, "%s at '%.*s' for COLLATION : %s", txt[0] ? txt : "Syntax error", src_len, src, col_name); } /* Convert a hex digit into its numeric value SYNOPSIS ch2x ch hex digit to convert USAGE RETURN VALUES an integer value in the range 0..15 -1 on error */ static int ch2x(int ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'a' && ch <= 'f') return 10 + ch - 'a'; if (ch >= 'A' && ch <= 'F') return 10 + ch - 'A'; return -1; } /* Collation language lexical parser: Scans the next lexem. SYNOPSIS my_coll_lexem_next lexem Lex analyzer, previously initialized by my_coll_lexem_init. USAGE Call this function in a loop RETURN VALUES Lexem number: eof, diff, shift, char or error. */ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) { const char *beg; my_coll_lexem_num rc; for (beg = lexem->beg; beg < lexem->end; beg++) { switch (*beg) { case ' ': case '\t': case '\r': case '\n': continue; case '[': /* Bracket expression, e.g. "[optimize [a-z]]" */ { size_t nbrackets; /* Indicates nested recursion level */ for (beg++, nbrackets = 1; beg < lexem->end; beg++) { if (*beg == '[') /* Enter nested bracket expression */ nbrackets++; else if (*beg == ']') { if (--nbrackets == 0) { rc = MY_COLL_LEXEM_OPTION; beg++; goto ex; } } } rc = MY_COLL_LEXEM_ERROR; goto ex; } case '&': beg++; rc = MY_COLL_LEXEM_RESET; goto ex; case '=': beg++; lexem->diff = 0; rc = MY_COLL_LEXEM_SHIFT; goto ex; case '/': beg++; rc = MY_COLL_LEXEM_EXTEND; goto ex; case '|': beg++; rc = MY_COLL_LEXEM_CONTEXT; goto ex; case '<': /* Shift: '<' or '<<' or '<<<' or '<<<<' */ { /* Scan up to 3 additional '<' characters */ for (beg++, lexem->diff = 1; (beg < lexem->end) && (*beg == '<') && (lexem->diff <= 3); beg++, lexem->diff++) ; rc = MY_COLL_LEXEM_SHIFT; goto ex; } default: break; } /* Escaped character, e.g. \u1234 */ if ((*beg == '\\') && (beg + 2 < lexem->end) && (beg[1] == 'u') && my_isxdigit(&my_charset_utf8mb3_general_ci, beg[2])) { int ch; beg += 2; lexem->code = 0; while ((beg < lexem->end) && ((ch = ch2x(beg[0])) >= 0)) { lexem->code = (lexem->code << 4) + ch; beg++; } rc = MY_COLL_LEXEM_CHAR; goto ex; } /* Unescaped single byte character: allow printable ASCII range except SPACE and special characters parsed above []<&/|= */ if (*beg >= 0x21 && *beg <= 0x7E) { lexem->code = *beg++; rc = MY_COLL_LEXEM_CHAR; goto ex; } if (((uint8_t)*beg) > 0x7F) /* Unescaped multibyte character */ { CHARSET_INFO *cs = &my_charset_utf8mb3_general_ci; my_wc_t wc; int nbytes = cs->cset->mb_wc(cs, &wc, pointer_cast(beg), pointer_cast(lexem->end)); if (nbytes > 0) { rc = MY_COLL_LEXEM_CHAR; beg += nbytes; lexem->code = (int)wc; goto ex; } } rc = MY_COLL_LEXEM_ERROR; goto ex; } rc = MY_COLL_LEXEM_EOF; ex: lexem->prev = lexem->beg; lexem->beg = beg; lexem->term = rc; return rc; } /* Collation rule item */ #define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ struct MY_COLL_RULE { my_wc_t base[MY_UCA_MAX_EXPANSION]; /* Base character */ my_wc_t curr[MY_UCA_MAX_CONTRACTION]; /* Current character */ int diff[4]; /* Primary, Secondary, Tertiary, Quaternary difference */ size_t before_level; /* "reset before" indicator */ bool with_context; }; /** Return length of the "reset" string of a rule. @param r Collation customization rule @return Length of r->base */ static inline size_t my_coll_rule_reset_length(MY_COLL_RULE *r) { return my_wstrnlen(r->base, MY_UCA_MAX_EXPANSION); } /** Return length of the "shift" string of a rule. @param r Collation customization rule @return Length of r->base */ static inline size_t my_coll_rule_shift_length(MY_COLL_RULE *r) { return my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION); } /** Append new character to the end of a 0-terminated wide string. @param wc Wide string @param limit Maximum possible result length @param code Character to add @return 1 if character was added, 0 if string was too long */ static int my_coll_rule_expand(my_wc_t *wc, size_t limit, my_wc_t code) { size_t i; for (i = 0; i < limit; i++) { if (wc[i] == 0) { wc[i] = code; return 1; } } return 0; } /** Initialize collation customization rule @param r Rule */ static void my_coll_rule_reset(MY_COLL_RULE *r) { memset(r, 0, sizeof(*r)); } /* Shift methods: Simple: "&B < C" : weight('C') = weight('B') + 1 Expand: weight('C') = { weight('B'), weight(last_non_ignorable) + 1 } */ typedef enum { my_shift_method_simple = 0, my_shift_method_expand } my_coll_shift_method; struct MY_COLL_RULES { MY_UCA_INFO *uca{nullptr}; /* Unicode weight data */ size_t nrules{0}; /* Number of rules in the rule array */ size_t mrules{0}; /* Number of allocated rules */ MY_COLL_RULE *rule{nullptr}; /* Rule array */ MY_CHARSET_LOADER *loader{nullptr}; MY_CHARSET_ERRMSG *errmsg{nullptr}; my_coll_shift_method shift_after_method{my_shift_method_simple}; }; /** Realloc rule array to a new size. Reallocate memory for 128 additional rules at once, to reduce the number of reallocs, which is important for long tailorings (e.g. for East Asian collations). @param rules Rule container @param n new number of rules @return 0 on success, -1 on error. */ static int my_coll_rules_realloc(MY_COLL_RULES *rules, size_t n) { if (rules->nrules < rules->mrules || (rules->rule = static_cast(realloc( rules->rule, sizeof(MY_COLL_RULE) * (rules->mrules = n + 128))))) return 0; return -1; } /** Append one new rule to a rule array @param rules Rule container @param rule New rule to add @return 0 on success, -1 on error. */ static int my_coll_rules_add(MY_COLL_RULES *rules, MY_COLL_RULE *rule) { if (my_coll_rules_realloc(rules, rules->nrules + 1)) return -1; rules->rule[rules->nrules++] = rule[0]; return 0; } /** Apply difference at level @param r Rule @param level Level (0,1,2,3,4) */ static void my_coll_rule_shift_at_level(MY_COLL_RULE *r, int level) { switch (level) { case 4: /* Quaternary difference */ r->diff[3]++; break; case 3: /* Tertiary difference */ r->diff[2]++; r->diff[3] = 0; break; case 2: /* Secondary difference */ r->diff[1]++; r->diff[2] = r->diff[3] = 0; break; case 1: /* Primary difference */ r->diff[0]++; r->diff[1] = r->diff[2] = r->diff[3] = 0; break; case 0: /* Do nothing for '=': use the previous offsets for all levels */ break; default: assert(0); } } struct MY_COLL_RULE_PARSER { MY_COLL_LEXEM tok[2]; /* Current token and next token for look-ahead */ MY_COLL_RULE rule; /* Currently parsed rule */ MY_COLL_RULES *rules; /* Rule list pointer */ char errstr[128]; /* Error message */ }; /** Current parser token @param p Collation customization parser @return Pointer to the current token */ static MY_COLL_LEXEM *my_coll_parser_curr(MY_COLL_RULE_PARSER *p) { return &p->tok[0]; } /** Next parser token, to look ahead. @param p Collation customization parser @return Pointer to the next token */ static MY_COLL_LEXEM *my_coll_parser_next(MY_COLL_RULE_PARSER *p) { return &p->tok[1]; } /** Scan one token from the input stream @param p Collation customization parser @return 1, for convenience, to use in logical expressions easier. */ static int my_coll_parser_scan(MY_COLL_RULE_PARSER *p) { my_coll_parser_curr(p)[0] = my_coll_parser_next(p)[0]; my_coll_lexem_next(my_coll_parser_next(p)); return 1; } /** Initialize collation customization parser @param p Collation customization parser @param rules Where to store rules @param str Beginning of a collation customization string @param str_end End of the collation customizations string */ static void my_coll_parser_init(MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules, const char *str, const char *str_end) { /* Initialize parser to the input buffer and scan two tokens, to make the current token and the next token known. */ memset(p, 0, sizeof(*p)); p->rules = rules; p->errstr[0] = '\0'; my_coll_lexem_init(my_coll_parser_curr(p), str, str_end); my_coll_lexem_next(my_coll_parser_curr(p)); my_coll_parser_next(p)[0] = my_coll_parser_curr(p)[0]; my_coll_lexem_next(my_coll_parser_next(p)); } /** Display error when an unexpected token found @param p Collation customization parser @param term Which lexem was expected @return 0, to use in "return" and boolean expressions. */ static int my_coll_parser_expected_error(MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) { snprintf(p->errstr, sizeof(p->errstr), "%s expected", my_coll_lexem_num_to_str(term)); return 0; } /** Display error when a too long character sequence is met @param p Collation customization parser @param name Which kind of sequence: contraction, expansion, etc. @return 0, to use in "return" and boolean expressions. */ static int my_coll_parser_too_long_error(MY_COLL_RULE_PARSER *p, const char *name) { snprintf(p->errstr, sizeof(p->errstr), "%s is too long", name); return 0; } /** Scan the given lexem from input stream, or display "expected" error. @param p Collation customization parser @param term Which lexem is expected. @retval 0 if the required term was not found. @retval 1 if the required term was found. */ static int my_coll_parser_scan_term(MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) { if (my_coll_parser_curr(p)->term != term) return my_coll_parser_expected_error(p, term); return my_coll_parser_scan(p); } /* In the following code we have a few functions to parse various collation customization non-terminal symbols. Unlike our usual coding convention, they return - 0 on "error" (when the rule was not scanned) and - 1 on "success"(when the rule was scanned). This is done intentionally to make body of the functions look easier and repeat the grammar of the rules in straightforward manner. For example: // ::= | int parse_x() { return parse_y() || parser_z(); } // ::= int parse_x() { return parse_y() && parser_z(); } Using 1 on "not found" and 0 on "found" in the parser code would make the code more error prone and harder to read because of having to use inverse boolean logic. */ /** Scan a collation setting in brakets, for example UCA version. @param p Collation customization parser @retval 0 if setting was scanned. @retval 1 if setting was not scanned. */ static int my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p) { MY_COLL_RULES *rules = p->rules; MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); if (!lex_cmp(lexem, STRING_WITH_LEN("[version 4.0.0]"))) { rules->uca = &my_uca_v400; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[version 5.2.0]"))) { rules->uca = &my_uca_v520; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method expand]"))) { rules->shift_after_method = my_shift_method_expand; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method simple]"))) { rules->shift_after_method = my_shift_method_simple; } else { return 0; } return my_coll_parser_scan(p); } /** Scan multiple collation settings @param p Collation customization parser @retval 0 if no settings were scanned. @retval 1 if one or more settings were scanned. */ static int my_coll_parser_scan_settings(MY_COLL_RULE_PARSER *p) { /* Scan collation setting or special purpose command */ while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) { if (!my_coll_parser_scan_setting(p)) return 0; } return 1; } /** Scan [before xxx] reset option @param p Collation customization parser @retval 0 if reset option was not scanned. @retval 1 if reset option was scanned. */ static int my_coll_parser_scan_reset_before(MY_COLL_RULE_PARSER *p) { MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); if (!lex_cmp(lexem, STRING_WITH_LEN("[before primary]")) || !lex_cmp(lexem, STRING_WITH_LEN("[before 1]"))) { p->rule.before_level = 1; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before secondary]")) || !lex_cmp(lexem, STRING_WITH_LEN("[before 2]"))) { p->rule.before_level = 2; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before tertiary]")) || !lex_cmp(lexem, STRING_WITH_LEN("[before 3]"))) { p->rule.before_level = 3; } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before quaternary]")) || !lex_cmp(lexem, STRING_WITH_LEN("[before 4]"))) { p->rule.before_level = 4; } else { p->rule.before_level = 0; return 0; /* Don't scan the next character */ } return my_coll_parser_scan(p); } /** Scan logical position and add to the wide string. @param p Collation customization parser @param pwc Wide string to add code to @param limit The result string cannot be longer than 'limit' characters @retval 0 if logical position was not scanned. @retval 1 if logical position was scanned. */ static int my_coll_parser_scan_logical_position(MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit) { MY_COLL_RULES *rules = p->rules; MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); if (!lex_cmp(lexem, STRING_WITH_LEN("[first non-ignorable]"))) lexem->code = rules->uca->first_non_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last non-ignorable]"))) lexem->code = rules->uca->last_non_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[first primary ignorable]"))) lexem->code = rules->uca->first_primary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last primary ignorable]"))) lexem->code = rules->uca->last_primary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[first secondary ignorable]"))) lexem->code = rules->uca->first_secondary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last secondary ignorable]"))) lexem->code = rules->uca->last_secondary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[first tertiary ignorable]"))) lexem->code = rules->uca->first_tertiary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last tertiary ignorable]"))) lexem->code = rules->uca->last_tertiary_ignorable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[first trailing]"))) lexem->code = rules->uca->first_trailing; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last trailing]"))) lexem->code = rules->uca->last_trailing; else if (!lex_cmp(lexem, STRING_WITH_LEN("[first variable]"))) lexem->code = rules->uca->first_variable; else if (!lex_cmp(lexem, STRING_WITH_LEN("[last variable]"))) lexem->code = rules->uca->last_variable; else return 0; /* Don't scan the next token */ if (!my_coll_rule_expand(pwc, limit, lexem->code)) { /* Logical position can not be in a contraction, so the above call should never fail. Let's assert in debug version and print a nice error message in production version. */ assert(0); return my_coll_parser_too_long_error(p, "Logical position"); } return my_coll_parser_scan(p); } /** Scan character list @ ::= CHAR [ CHAR... ] @param p Collation customization parser @param pwc Character string to add code to @param limit The result string cannot be longer than 'limit' characters @param name E.g. "contraction", "expansion" @retval 0 if character sequence was not scanned. @retval 1 if character sequence was scanned. */ static int my_coll_parser_scan_character_list(MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit, const char *name) { if (my_coll_parser_curr(p)->term != MY_COLL_LEXEM_CHAR) return my_coll_parser_expected_error(p, MY_COLL_LEXEM_CHAR); if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code)) return my_coll_parser_too_long_error(p, name); if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_CHAR)) return 0; while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CHAR) { if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code)) return my_coll_parser_too_long_error(p, name); my_coll_parser_scan(p); } return 1; } /** Scan reset sequence @ ::= [ @ ] @ | [ @ ] @ @param p Collation customization parser @retval 0 if reset sequence was not scanned. @retval 1 if reset sequence was scanned. */ static int my_coll_parser_scan_reset_sequence(MY_COLL_RULE_PARSER *p) { my_coll_rule_reset(&p->rule); /* Scan "[before x]" option, if exists */ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) my_coll_parser_scan_reset_before(p); /* Try logical reset position */ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) { if (!my_coll_parser_scan_logical_position(p, p->rule.base, 1)) return 0; } else { /* Scan single reset character or expansion */ if (!my_coll_parser_scan_character_list(p, p->rule.base, MY_UCA_MAX_EXPANSION, "Expansion")) return 0; } if ((p->rules->shift_after_method == my_shift_method_expand || p->rule.before_level == 1) && p->rules->uca->version < UCA_V900) /* Apply "before primary" option */ { /* Suppose we have this rule: &B[before primary] < C i.e. we need to put C before B, but after A, so the result order is: A < C < B. Let primary weight of B be [BBBB]. We cannot just use [BBBB-1] as weight for C: DUCET does not have enough unused weights between any two characters, so using [BBBB-1] will likely make C equal to the previous character, which is A, so we'll get this order instead of the desired: A = C < B. To guarantee that that C is sorted after A, we'll use expansion with a kind of "biggest possible character". As "biggest possible character" we'll use "last_non_ignorable": We'll compose weight for C as: [BBBB-1][MMMM+1] where [MMMM] is weight for "last_non_ignorable". We also do the same trick for "reset after" if the collation option says so. E.g. for the rules "&B < C", weight for C will be calculated as: [BBBB][MMMM+1] At this point we only need to store codepoints 'B' and 'last_non_ignorable'. Actual weights for 'C' will be calculated according to the above formula later, in create_tailoring(). */ if (!my_coll_rule_expand(p->rule.base, MY_UCA_MAX_EXPANSION, p->rules->uca->last_non_ignorable)) return my_coll_parser_too_long_error(p, "Expansion"); } return 1; } /** Scan shift sequence @ ::= @ [ / @ ] | @ [ | @ ] @param p Collation customization parser @retval 0 if shift sequence was not scanned. @retval 1 if shift sequence was scanned. */ static int my_coll_parser_scan_shift_sequence(MY_COLL_RULE_PARSER *p) { MY_COLL_RULE before_extend; memset(&p->rule.curr, 0, sizeof(p->rule.curr)); /* Scan single shift character or contraction */ if (!my_coll_parser_scan_character_list( p, p->rule.curr, MY_UCA_MAX_CONTRACTION, "Contraction")) return 0; before_extend = p->rule; /* Remember the part before "/" */ /* Append the part after "/" as expansion */ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) { my_coll_parser_scan(p); if (!my_coll_parser_scan_character_list(p, p->rule.base, MY_UCA_MAX_EXPANSION, "Expansion")) return 0; } else if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CONTEXT) { /* We support 2-character long context sequences only: one character is the previous context, plus the current character. It's OK as Unicode's CLDR does not have longer examples. */ my_coll_parser_scan(p); p->rule.with_context = true; if (!my_coll_parser_scan_character_list( p, p->rule.curr + 1, MY_UCA_MAX_EXPANSION - 1, "context")) return 0; /* It might be CONTEXT followed by EXPANSION. For example, Japanese collation has one rule defined as: "&[before 3]へ<<<へ|ゝ=べ|ゝ=へ|ゞ/\u3099" The part of "へ|ゞ/\u3099" is CONTEXT ('|') followed by EXPANSION ('/'). */ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) { my_coll_parser_scan(p); size_t len = my_wstrnlen(p->rule.base, MY_UCA_MAX_EXPANSION); if (!my_coll_parser_scan_character_list( p, p->rule.base + len, MY_UCA_MAX_EXPANSION - len, "Expansion")) return 0; } } /* Add rule to the rule list */ if (my_coll_rules_add(p->rules, &p->rule)) return 0; p->rule = before_extend; /* Restore to the state before "/" */ return 1; } /** Scan shift operator @ ::= < | << | <<< | <<<< | = @param p Collation customization parser @retval 0 if shift operator was not scanned. @retval 1 if shift operator was scanned. */ static int my_coll_parser_scan_shift(MY_COLL_RULE_PARSER *p) { if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_SHIFT) { my_coll_rule_shift_at_level(&p->rule, my_coll_parser_curr(p)->diff); return my_coll_parser_scan(p); } return 0; } /** Scan one rule: reset followed by a number of shifts @ ::= & @ @ @ [ { @ @ }... ] @param p Collation customization parser @retval 0 if rule was not scanned. @retval 1 if rule was scanned. */ static int my_coll_parser_scan_rule(MY_COLL_RULE_PARSER *p) { if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_RESET) || !my_coll_parser_scan_reset_sequence(p)) return 0; /* Scan the first required shift command */ if (!my_coll_parser_scan_shift(p)) return my_coll_parser_expected_error(p, MY_COLL_LEXEM_SHIFT); /* Scan the first shift sequence */ if (!my_coll_parser_scan_shift_sequence(p)) return 0; /* Scan subsequent shift rules */ while (my_coll_parser_scan_shift(p)) { if (!my_coll_parser_scan_shift_sequence(p)) return 0; } return 1; } /** Scan collation customization: settings followed by rules @ ::= [ @ ... ] [ @... ] @param p Collation customization parser @retval 0 if collation customization expression was not scanned. @retval 1 if collation customization expression was scanned. */ static int my_coll_parser_exec(MY_COLL_RULE_PARSER *p) { if (!my_coll_parser_scan_settings(p)) return 0; while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_RESET) { if (!my_coll_parser_scan_rule(p)) return 0; } /* Make sure no unparsed input data left */ return my_coll_parser_scan_term(p, MY_COLL_LEXEM_EOF); } /* Collation language syntax parser. Uses lexical parser. @param rules Collation rule list to load to. @param str A string with collation customization. @param str_end End of the string. @param col_name Collation name @retval 0 on success @retval 1 on error */ static int my_coll_rule_parse(MY_COLL_RULES *rules, const char *str, const char *str_end, const char *col_name) { MY_COLL_RULE_PARSER p; my_coll_parser_init(&p, rules, str, str_end); if (!my_coll_parser_exec(&p)) { rules->errmsg->errcode = EE_COLLATION_PARSER_ERROR; my_coll_lexem_print_error(my_coll_parser_curr(&p), rules->errmsg->errarg, sizeof(rules->errmsg->errarg) - 1, p.errstr, col_name); return 1; } return 0; } static void spread_case_mask(uint16_t *to, size_t to_stride, size_t tailored_ce_cnt, uint16_t case_mask) { for (size_t i = 0; i < tailored_ce_cnt; ++i) { uint16_t *case_weight = &to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride]; if (*case_weight > CASE_FIRST_UPPER_MASK) case_mask = *case_weight & 0xFF00; else if (*case_weight) *case_weight |= case_mask; } } /* If the collation is marked as [caseFirst upper], move all of the weights around to accommodate that. Only tailored weights are changed; for non-tailored weights, we do it on-the-fly in uca_scanner_900::apply_case_first(). [caseFirst upper] is a directive that says that case should override all other tertiary case concerns (in a sense, a “level 2.5”), and furthermore, that uppercase should come before lowercase. (Normally lowercase sorts before uppercase.) It is currently only used in the Danish collation. This is done by looking at the tertiary weight, inferring the case from it, and then using the upper bits (which are normally unused) to signal the case. The algorithm is detailed in Unicode TR35, section 3.14, although we don't seem to follow it exactly. */ static void change_weight_if_case_first(CHARSET_INFO *cs, const MY_UCA_INFO *dst, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t curr_len, size_t tailored_ce_cnt) { /* We only need to implement [caseFirst upper] right now. */ if (!(cs->coll_param && cs->coll_param->case_first == CASE_FIRST_UPPER && cs->levels_for_compare == 3)) return; assert(cs->uca->version == UCA_V900); // How many CEs this character has with non-ignorable primary weight. int tailored_pri_cnt = 0; int origin_pri_cnt = 0; for (size_t i = 0; i < tailored_ce_cnt; ++i) { /* If rule A has already applied a case weight change, and we have rule B which is inherited from A, apply the same case weight change on the rest of rule B and return. */ if (to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] > CASE_FIRST_UPPER_MASK) { spread_case_mask(to, to_stride, tailored_ce_cnt, /*case_mask=*/0); return; } if (to[i * MY_UCA_900_CE_SIZE * to_stride]) tailored_pri_cnt++; } if (r->before_level == 1 || r->diff[0]) tailored_pri_cnt--; // Use the DUCET weight to detect the character's case. MY_UCA_INFO *src = &my_uca_v900; int changed_ce = 0; my_wc_t *curr = r->curr; for (size_t i = 0; i < curr_len; ++i) { const uint16_t *from = my_char_weight_addr_900(src, *curr); unsigned page = *curr >> 8; unsigned code = *curr & 0xFF; curr++; int ce_cnt = src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0; for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) { if (from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]) origin_pri_cnt++; } } int case_to_copy = 0; if (origin_pri_cnt <= tailored_pri_cnt) case_to_copy = origin_pri_cnt; else case_to_copy = tailored_pri_cnt - 1; int upper_cnt = 0; int lower_cnt = 0; curr = r->curr; uint16_t case_mask = 0; for (size_t curr_ind = 0; curr_ind < curr_len; ++curr_ind) { const uint16_t *from = my_char_weight_addr_900(src, *curr); unsigned page = *curr >> 8; unsigned code = *curr & 0xFF; curr++; int ce_cnt = src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0; changed_ce = 0; for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) { uint16_t primary_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]; if (primary_weight) { uint16_t case_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS + 2 * UCA900_DISTANCE_BETWEEN_LEVELS]; uint16_t *ce_to = nullptr; if (is_tertiary_weight_upper_case(case_weight)) { if (!case_to_copy) upper_cnt++; else case_mask = CASE_FIRST_UPPER_MASK; } else { if (!case_to_copy) lower_cnt++; else case_mask = CASE_FIRST_LOWER_MASK; } if (case_to_copy) { do { ce_to = to + changed_ce * MY_UCA_900_CE_SIZE * to_stride; changed_ce++; } while (*ce_to == 0); ce_to[2 * to_stride] |= case_mask; case_to_copy--; } } } } if (origin_pri_cnt <= tailored_pri_cnt) { for (int i = origin_pri_cnt; i < tailored_pri_cnt; ++i) { const int offset = changed_ce * MY_UCA_900_CE_SIZE * to_stride; if (to[offset] && to[offset] < dst->extra_ce_pri_base) to[offset + 2 * to_stride] = 0; } } else { if (upper_cnt && lower_cnt) case_mask = CASE_FIRST_MIXED_MASK; else if (upper_cnt && !lower_cnt) case_mask = CASE_FIRST_UPPER_MASK; else case_mask = CASE_FIRST_LOWER_MASK; bool skipped_extra_ce = false; for (int i = tailored_ce_cnt - 1; i >= 0; --i) { int offset = i * MY_UCA_900_CE_SIZE * to_stride; if (to[offset] && to[offset] < dst->extra_ce_pri_base) { if ((r->before_level == 1 || r->diff[0]) && !skipped_extra_ce) { skipped_extra_ce = true; continue; } to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] |= case_mask; break; } } } spread_case_mask(to, to_stride, tailored_ce_cnt, case_mask); } static size_t my_char_weight_put_900(MY_UCA_INFO *dst, uint16_t *to, size_t to_stride, size_t to_length, uint16_t *to_num_ce, const MY_COLL_RULE *rule, size_t base_len) { size_t count; int total_ce_cnt = 0; const my_wc_t *base = rule->base; for (count = 0; base_len;) { const uint16_t *from = nullptr; size_t from_stride = 0; int ce_cnt = 0; for (size_t chlen = base_len; chlen > 1; chlen--) { if ((from = my_uca_contraction_weight(dst->contraction_nodes, base, chlen))) { from_stride = 1; base += chlen; base_len -= chlen; ce_cnt = *(from + MY_UCA_MAX_WEIGHT_SIZE - 1); break; } } if (!from) { unsigned page = *base >> 8; unsigned code = *base & 0xFF; base++; base_len--; if (dst->weights[page]) { from = UCA900_WEIGHT_ADDR(dst->weights[page], /*level=*/0, code); from_stride = UCA900_DISTANCE_BETWEEN_LEVELS; ce_cnt = UCA900_NUM_OF_CE(dst->weights[page], code); } } for (int weight_ind = 0; weight_ind < ce_cnt * MY_UCA_900_CE_SIZE && count < to_length; weight_ind++) { *to = *from; to += to_stride; from += from_stride; count++; } total_ce_cnt += ce_cnt; } /* For shift on primary weight, there might be no enough room in the tables. For example, Sihala has the rule "&\\u0DA5 < \\u0DA4", which means that we should move U+0DA4 after U+0DA5 (on the primary level). However, there is no room after U+0DA5 in DUCET unless we wanted to conflict with U+0DA6: 0DA4 ; [.28EC.0020.0002] # SINHALA LETTER TAALUJA NAASIKYAYA 0DA5 ; [.28ED.0020.0002] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA 0DA6 ; [.28EE.0020.0002] # SINHALA LETTER SANYAKA JAYANNA Before our implementation of UCA 9.0.0, the shift on primary weight was done by making it a fake expansion when parsing the rule, where we'd expand U+0DA4 to U+0DA5 U+MMMM, MMMM being 'last_non_ignorable'. (This happens in my_coll_parser_scan_reset_sequence()). But from UCA 9.0.0, we also support accent- and case-sensitive collations, and then, having the extra weights of 'last_non_ignorable' (which is just a random character) on the second and third level may cause unexpected results for algorithms that use the meaning of the tertiary weight to infer case. Thus, we'll abandon the fake expansion way; instead, instead add an extra CE (after the one from U+0DA5, the character we are moving after) to represent all the weights we might want to shift. The actual shifting happens in apply_shift_900(). For the rule "&\\u0DA5 < \\u0DA4", U+0DA4's weights become [.28ED.0020.0002][.54A4.0000.0000], where 0x54A4 is the value of extra_ce_pri_base. We then apply the differences from the rule (which are never negative) to the last CE, so that it becomes e.g. [.54A5.0000.0000]. */ if ((rule->diff[0] || rule->diff[1] || rule->diff[2]) && count < to_length) { *to = rule->diff[0] ? dst->extra_ce_pri_base : 0; to += to_stride; *to = rule->diff[1] ? dst->extra_ce_sec_base : 0; to += to_stride; *to = rule->diff[2] ? dst->extra_ce_ter_base : 0; to += to_stride; total_ce_cnt++; count += 3; } total_ce_cnt = std::min(total_ce_cnt, (MY_UCA_MAX_WEIGHT_SIZE - 1) / MY_UCA_900_CE_SIZE); *to_num_ce = total_ce_cnt; return total_ce_cnt; } /** Helper function: Copies UCA weights for a given "unsigned" string to the given location. @param dst destination UCA weight data @param to destination address @param to_stride number of bytes between each successive weight in "to" @param to_length size of destination @param to_num_ce where to put the number of CEs generated @param rule The rule that contains the characters whose weight are to copied @param base_len The length of base character list @param uca_ver UCA version @return number of weights put */ static size_t my_char_weight_put(MY_UCA_INFO *dst, uint16_t *to, size_t to_stride, size_t to_length, uint16_t *to_num_ce, const MY_COLL_RULE *rule, size_t base_len, enum_uca_ver uca_ver) { if (uca_ver == UCA_V900) return my_char_weight_put_900(dst, to, to_stride, to_length, to_num_ce, rule, base_len); const my_wc_t *base = rule->base; size_t count = 0; while (base_len != 0) { const uint16_t *from = nullptr; for (size_t chlen = base_len; chlen > 1; chlen--) { if ((from = my_uca_contraction_weight(dst->contraction_nodes, base, chlen))) { base += chlen; base_len -= chlen; break; } } if (!from) { from = my_char_weight_addr(dst, *base); base++; base_len--; } for (; from && *from && count < to_length;) { *to = *from++; to += to_stride; count++; } } *to = 0; return count; } /** Alloc new page and copy the default UCA weights @param cs Character set @param loader Character set loader @param src Default UCA data to copy from @param dst UCA data to copy weights to @param page page number @retval false on success @retval true on error */ static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, const MY_UCA_INFO *src, MY_UCA_INFO *dst, size_t page) { const unsigned dst_size = 256 * dst->lengths[page] * sizeof(uint16_t); dst->weights[page] = static_cast(loader->mem_malloc(dst_size)); if (dst->weights[page] == nullptr) return true; dst->m_allocated_weights->at(page) = 1; assert(src->lengths[page] <= dst->lengths[page]); memset(dst->weights[page], 0, dst_size); if (cs->uca && cs->uca->version == UCA_V900) { const unsigned src_size = 256 * src->lengths[page] * sizeof(uint16_t); memcpy(dst->weights[page], src->weights[page], src_size); } else if (src->lengths[page] > 0) { for (unsigned chc = 0; chc < 256; chc++) { memcpy(dst->weights[page] + chc * dst->lengths[page], src->weights[page] + chc * src->lengths[page], src->lengths[page] * sizeof(uint16_t)); } } return false; } /* This is used to apply the weight shift if there is a [before 1] rule. If we have a rule "&[before 1] A < B < C", and A's collation element is [P, S, T], then in my_char_weight_put_900(), we append one extra collation element to A's CE to be B and C's CE. So B and C's CE becomes [P, S, T][p, 0, 0]. What we do with this function is to change B's CE to [P - 1, S, T][p + n, 0, 0]. 1. The rule "&[before 1] A < B < C" means "B < C < A" on primary level. Since "B < A", so we give B the first primary weight as (P - 1). 2. p is a weight value which is the maximum regular primary weight in DUCET plus one (0x54A3 + 1 = 0x54A4). This is to make sure B's primary weight less than A and greater than any character which sorts before A. 3. n is the number of characters in this rule's character list. For the B in this rule, n = 1. For the C in this rule, n = 2. This can make sure "B < C". It is the same thing that apply_secondary_shift_900() and apply_tertiary_shift_900() do, but on different weight levels. */ static bool apply_primary_shift_900(MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) { /* Find the second-to-last non-ignorable primary weight to apply shift, because the last one is the extra CE we added in my_char_weight_put_900(). */ int last_sec_pri = 0; for (last_sec_pri = nweights - 2; last_sec_pri >= 0; --last_sec_pri) { if (to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]) break; } if (last_sec_pri >= 0) { to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]--; /* Reset before */ if (rules->shift_after_method == my_shift_method_expand) { /* Special case. Don't let characters shifted after X and before next(X) intermix to each other. For example: "[shift-after-method expand] &0 < a &[before primary]1 < A". I.e. we reorder 'a' after '0', and then 'A' before '1'. 'a' must be sorted before 'A'. Note, there are no real collations in CLDR which shift after and before two neighbouring characters. We need this just in case. Reserving 4096 (0x1000) weights for such cases is perfectly enough. */ /* W3-TODO: const may vary on levels 2,3*/ last_weight_ptr[0] += 0x1000; } } else { rules->errmsg->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "U+%04lX", r->base[0]); return true; } return false; } /* This is used to apply the weight shift if there is a [before 2] rule. Please see the comment on apply_primary_shift_900(). */ static bool apply_secondary_shift_900(MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) { /* Find the second-to-last non-ignorable secondary weight to apply shift, because the last one is the extra CE we added in my_char_weight_put_900(). */ int last_sec_sec; for (last_sec_sec = nweights - 2; last_sec_sec >= 0; --last_sec_sec) { if (to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]) break; } if (last_sec_sec >= 0) { // Reset before. to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]--; if (rules->shift_after_method == my_shift_method_expand) { /* Same reason as in apply_primary_shift_900(), reserve 256 (0x100) weights for secondary level. */ last_weight_ptr[to_stride] += 0x100; } } else { rules->errmsg->errcode = EE_FAILED_TO_RESET_BEFORE_SECONDARY_IGNORABLE_CHAR; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "U+%04lX", r->base[0]); return true; } return false; } /* This is used to apply the weight shift if there is a [before 3] rule. Please see the comment on apply_primary_shift_900(). */ static bool apply_tertiary_shift_900(MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) { /* Find the second-to-last non-ignorable tertiary weight to apply shift, because the last one is the extra CE we added in my_char_weight_put_900(). */ int last_sec_ter; for (last_sec_ter = nweights - 2; last_sec_ter >= 0; --last_sec_ter) { if (to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]) break; } if (last_sec_ter >= 0) { // Reset before. to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]--; if (rules->shift_after_method == my_shift_method_expand) { /* Same reason as in apply_primary_shift_900(), reserve 16 (0x10) weights for tertiary level. */ last_weight_ptr[to_stride * 2] += 0x10; } } else { rules->errmsg->errcode = EE_FAILED_TO_RESET_BEFORE_TERTIARY_IGNORABLE_CHAR; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "U+%04lX", r->base[0]); return true; } return false; } static bool apply_shift_900(MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights) { // nweights should not less than 1 because of the extra CE. assert(nweights); // Apply level difference. uint16_t *const last_weight_ptr = to + (nweights - 1) * to_stride * MY_UCA_900_CE_SIZE; last_weight_ptr[0] += r->diff[0]; last_weight_ptr[to_stride] += r->diff[1]; last_weight_ptr[to_stride * 2] += r->diff[2]; if (r->before_level == 1) // Apply "&[before primary]". return apply_primary_shift_900(rules, r, to, to_stride, nweights, last_weight_ptr); else if (r->before_level == 2) // Apply "[before 2]". return apply_secondary_shift_900(rules, r, to, to_stride, nweights, last_weight_ptr); else if (r->before_level == 3) // Apply "[before 3]". return apply_tertiary_shift_900(rules, r, to, to_stride, nweights, last_weight_ptr); return false; } static bool apply_shift(MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, uint16_t *to, size_t to_stride, size_t nweights) { if (rules->uca->version == UCA_V900) return apply_shift_900(rules, r, to, to_stride, nweights); assert(to_stride == 1); /* Apply level difference. */ if (nweights) { to[nweights - 1] += r->diff[0]; if (r->before_level == 1) /* Apply "&[before primary]" */ { if (nweights >= 2) { to[nweights - 2]--; /* Reset before */ if (rules->shift_after_method == my_shift_method_expand) { /* Special case. Don't let characters shifted after X and before next(X) intermix to each other. For example: "[shift-after-method expand] &0 < a &[before primary]1 < A". I.e. we reorder 'a' after '0', and then 'A' before '1'. 'a' must be sorted before 'A'. Note, there are no real collations in CLDR which shift after and before two neighbour characters. We need this just in case. Reserving 4096 (0x1000) weights for such cases is perfectly enough. */ /* W3-TODO: const may vary on levels 2,3*/ to[nweights - 1] += 0x1000; } } else { rules->errmsg->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "U+%04lX", r->base[0]); return true; } } } else { /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */ assert(to[0] == 0); to[0] = r->diff[level]; } return false; } static MY_CONTRACTION *add_contraction_to_trie( std::vector *cont_nodes, MY_COLL_RULE *r) { MY_CONTRACTION new_node{0, {}, {}, {}, false, 0}; if (r->with_context) // previous-context contraction { assert(my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION) == 2); std::vector::iterator node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[1]); if (node_it == cont_nodes->end() || node_it->ch != r->curr[1]) { new_node.ch = r->curr[1]; node_it = cont_nodes->insert(node_it, new_node); } cont_nodes = &node_it->child_nodes_context; node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[0]); if (node_it == cont_nodes->end() || node_it->ch != r->curr[0]) { new_node.ch = r->curr[0]; node_it = cont_nodes->insert(node_it, new_node); } node_it->is_contraction_tail = true; node_it->contraction_len = 2; return &(*node_it); } else // normal contraction { size_t contraction_len = my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION); std::vector::iterator node_it; for (size_t ch_ind = 0; ch_ind < contraction_len; ++ch_ind) { node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[ch_ind]); if (node_it == cont_nodes->end() || node_it->ch != r->curr[ch_ind]) { new_node.ch = r->curr[ch_ind]; node_it = cont_nodes->insert(node_it, new_node); } cont_nodes = &node_it->child_nodes; } node_it->is_contraction_tail = true; node_it->contraction_len = contraction_len; return &(*node_it); } } static bool apply_one_rule(CHARSET_INFO *cs, MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, MY_UCA_INFO *dst) { size_t nweights; size_t nreset = my_coll_rule_reset_length(r); /* Length of reset sequence */ size_t nshift = my_coll_rule_shift_length(r); /* Length of shift sequence */ uint16_t *to = nullptr; uint16_t *to_num_ce = nullptr; size_t to_stride = 0; if (nshift >= 2) /* Contraction */ { size_t i; int flag; /* Add HEAD, MID and TAIL flags for the contraction parts */ my_uca_add_contraction_flag( dst->contraction_flags, r->curr[0], r->with_context ? MY_UCA_PREVIOUS_CONTEXT_HEAD : MY_UCA_CNT_HEAD); for (i = 1, flag = MY_UCA_CNT_MID1; i < nshift - 1; i++, flag <<= 1) my_uca_add_contraction_flag(dst->contraction_flags, r->curr[i], flag); my_uca_add_contraction_flag( dst->contraction_flags, r->curr[i], r->with_context ? MY_UCA_PREVIOUS_CONTEXT_TAIL : MY_UCA_CNT_TAIL); /* Add new contraction to the contraction list */ MY_CONTRACTION *trie_node = add_contraction_to_trie(dst->contraction_nodes, r); to = trie_node->weight; to_stride = 1; to_num_ce = &to[MY_UCA_MAX_WEIGHT_SIZE - 1]; /* Store weights of the "reset to" character */ nweights = my_char_weight_put(dst, to, to_stride, MY_UCA_MAX_WEIGHT_SIZE - 1, to_num_ce, r, nreset, rules->uca->version); } else { my_wc_t pagec = (r->curr[0] >> 8); assert(dst->weights[pagec]); if (cs->uca && cs->uca->version == UCA_V900) { to = my_char_weight_addr_900(dst, r->curr[0]); to_stride = UCA900_DISTANCE_BETWEEN_LEVELS; to_num_ce = to - UCA900_DISTANCE_BETWEEN_LEVELS; } else { to = my_char_weight_addr(dst, r->curr[0]); to_stride = 1; to_num_ce = to + (dst->lengths[pagec] - 1); } /* Store weights of the "reset to" character */ if (dst->lengths[pagec] == 0) nweights = 0; else nweights = my_char_weight_put(dst, to, to_stride, dst->lengths[pagec] - 1, to_num_ce, r, nreset, rules->uca->version); } change_weight_if_case_first(cs, dst, r, to, to_stride, nshift, nweights); /* Apply level difference. */ return apply_shift(rules, r, level, to, to_stride, nweights); } /** Check if collation rules are valid, i.e. characters are not outside of the collation supported range. */ static int check_rules(const MY_COLL_RULES *rules, const MY_UCA_INFO *dst, const MY_UCA_INFO *src) { const MY_COLL_RULE *r, *rlast; for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { if (r->curr[0] > dst->maxchar) { rules->errmsg->errcode = EE_SHIFT_CHAR_OUT_OF_RANGE; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "u%04X", (unsigned)r->curr[0]); return true; } else if (r->base[0] > src->maxchar) { rules->errmsg->errcode = EE_RESET_CHAR_OUT_OF_RANGE; snprintf(rules->errmsg->errarg, sizeof(rules->errmsg->errarg), "u%04X", (unsigned)r->base[0]); return true; } } return false; } static void synthesize_lengths_900(uint8_t *lengths, const uint16_t *const *weights, unsigned npages) { for (unsigned page = 0; page < npages; ++page) { int max_len = 0; if (weights[page]) { for (unsigned code = 0; code < 256; ++code) { max_len = std::max(max_len, weights[page][code]); } } if (max_len == 0) lengths[page] = 0; else lengths[page] = max_len * MY_UCA_900_CE_SIZE + 1; } } static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) { if (!cs->uca || cs->uca->version != UCA_V900 || cs->coll_param != &ja_coll_param) return; for (int page = MIN_JA_HAN_PAGE; page <= MAX_JA_HAN_PAGE; page++) { // In DUCET, weight is not assigned to code points in [U+4E00, U+9FFF]. // When re-initializing (after my_coll_uninit_uca), the weights // may already be set. assert(dst->weights[page] == nullptr || dst->weights[page] == ja_han_pages[page - MIN_JA_HAN_PAGE]); assert(dst->m_allocated_weights->at(page) == 0); dst->weights[page] = ja_han_pages[page - MIN_JA_HAN_PAGE]; } } /* We have reordered all the characters in the pages which contains Chinese Han characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the DUCET pages with these pages. */ static void copy_zh_han_pages(MY_UCA_INFO *dst, MY_CHARSET_LOADER *loader) { for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) { if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) { if (dst->m_allocated_weights->at(page)) { loader->mem_free(dst->weights[page]); dst->m_allocated_weights->at(page) = 0; } dst->weights[page] = zh_han_pages[page - MIN_ZH_HAN_PAGE]; } } } /* UCA defines an algorithm to calculate character's implicit weight if this character's weight is not defined in the DUCET. This function is to help convert Chinese character's implicit weight calculated by UCA back to its code points. The implicit weight and the code point is not 1 : 1 map because DUCET lets some characters share implicit primary weight. For example, the DUCET defines "2F00 ; [.FB40.0020.0004][.CE00.0000.0000] # KANGXI RADICAL ONE", and 4E00's implicit weight is [.FB40.0020.0002][.CE00.0000.0000]. We can see the primary weights of U+2F00 and U+4E00 are same (FB40 CE00). But for the Han characters in zh.xml file, each one has unique implicit weight. */ static inline my_wc_t convert_implicit_to_ch(uint16_t first, uint16_t second) { /* For reference, here is how UCA calculates one character's implicit weight. AAAA = 0xFB40 + (CP >> 15) # The 0xFB40 changes for different character # groups BBBB = (CP & 0x7FFF) | 0x8000 */ if (first < 0xFB80) return (((first - 0xFB40) << 15) | (second & 0x7FFF)); else if (first < 0xFBC0) return (((first - 0xFB80) << 15) | (second & 0x7FFF)); else return (((first - 0xFBC0) << 15) | (second & 0x7FFF)); } /* Usually we do reordering in apply_reorder_param(). But for the Chinese collation, since we want to remove the weight gap between the character groups (see the comment on change_zh_implicit()), and we have done the reordering for some characters in the pages which contains Chinese Han characters, if we still use apply_reorder_param() to do the reordering for other characters, we might meet weight conflict. For example, in the DUCET page, 'A' has primary weight 0x1C47, but this value has been assigned to the first Chinese Han character in CLDR zh.xml file. So we do the reordering for all the DUCET pages when initializing the collation. */ static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst, int npages) { std::map zh_han_to_single_weight_map; for (int i = 0; i < ZH_HAN_WEIGHT_PAIRS; i++) { zh_han_to_single_weight_map[zh_han_to_single_weight[i * 2]] = zh_han_to_single_weight[i * 2 + 1]; } for (int page = 0; page < npages; page++) { /* If there is no page in the DUCET, then all the characters in this page must have implicit weight. The reordering for it will be done by change_zh_implicit(). Do not need to change here. If there is page in zh_han_pages[], then all the characters in this page have been reordered by uca9dump. Do not need to change here. */ if (!dst->weights[page] || (page >= MIN_ZH_HAN_PAGE && page <= MAX_ZH_HAN_PAGE && zh_han_pages[page - MIN_ZH_HAN_PAGE])) continue; for (int off = 0; off < 256; off++) { uint16_t *wbeg = UCA900_WEIGHT_ADDR(dst->weights[page], 0, off); int num_of_ce = UCA900_NUM_OF_CE(dst->weights[page], off); for (int ce = 0; ce < num_of_ce; ce++) { assert(reorder_param->wt_rec_num == 1); if (*wbeg >= reorder_param->wt_rec[0].old_wt_bdy.begin && *wbeg <= reorder_param->wt_rec[0].old_wt_bdy.end) { *wbeg = *wbeg + reorder_param->wt_rec[0].new_wt_bdy.begin - reorder_param->wt_rec[0].old_wt_bdy.begin; } else if (*wbeg >= 0xFB00) { uint16_t next_wt = *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS); if (*wbeg >= 0xFB40 && *wbeg <= 0xFBC1) { // Han's implicit weight /* If some characters in DUCET share the same implicit weight, their reordered weight should be same too. */ my_wc_t ch = convert_implicit_to_ch(*wbeg, next_wt); if (zh_han_to_single_weight_map.find(ch) != zh_han_to_single_weight_map.end()) { *wbeg = zh_han_to_single_weight_map[ch]; *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS) = 0; wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; ce++; continue; } } *wbeg = change_zh_implicit(*wbeg); wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; ce++; } wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; } } } } static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst, const MY_UCA_INFO *src) { MY_COLL_RULE *r, *rlast; size_t i, npages = (src->maxchar + 1) / 256; bool has_contractions = false; MY_CHARSET_LOADER *loader = rules->loader; dst->maxchar = src->maxchar; if (check_rules(rules, dst, src)) return true; /* Allocate memory for pages and their lengths */ dst->lengths = static_cast(loader->mem_malloc(npages)); dst->weights = static_cast(loader->mem_malloc(npages * sizeof(uint16_t *))); if (dst->lengths == nullptr || dst->weights == nullptr) return true; if (dst->m_allocated_weights == nullptr) { dst->m_allocated_weights = new std::vector(); dst->m_allocated_weights->resize(npages, 0); } /* Copy pages lengths and page pointers from the default UCA weights. */ memcpy(dst->lengths, src->lengths, npages); memcpy(dst->weights, src->weights, npages * sizeof(uint16_t *)); /* Calculate maximum lengths for the pages which will be overwritten. Mark pages that will be overwritten as NULL. We'll allocate their own memory. */ for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { if (!r->curr[1]) /* If not a contraction */ { unsigned pagec = (r->curr[0] >> 8); if (r->base[1]) /* Expansion */ { /* Reserve space for maximum possible length */ dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE; } else { unsigned pageb = (r->base[0] >> 8); if ((r->diff[0] || r->diff[1] || r->diff[2]) && dst->lengths[pagec] < (src->lengths[pageb] + 3)) { if ((src->lengths[pageb] + 3) > MY_UCA_MAX_WEIGHT_SIZE) dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE; else dst->lengths[pagec] = src->lengths[pageb] + 3; } else if (dst->lengths[pagec] < src->lengths[pageb]) dst->lengths[pagec] = src->lengths[pageb]; } dst->weights[pagec] = nullptr; /* Mark that we'll overwrite this page */ } else has_contractions = true; } if (has_contractions) { dst->have_contractions = true; dst->contraction_nodes = new std::vector(0); dst->contraction_flags = new std::array{}; } if (cs->coll_param == &zh_coll_param) { /* We are going to reorder the weight of characters in uca pages when initializing this collation. And because of the reorder rule [reorder Hani], we need to change almost every character's weight. So copy all the pages. Please also see the comment on modify_all_zh_pages(). */ bool rc; for (i = 0; i < npages; i++) { if (dst->lengths[i] && (rc = my_uca_copy_page(cs, loader, src, dst, i))) return rc; } modify_all_zh_pages(cs->coll_param->reorder_param, dst, npages); copy_zh_han_pages(dst, loader); } else { /* Allocate pages that we'll overwrite and copy default weights */ for (i = 0; i < npages; i++) { bool rc; /* Don't touch pages with lengths[i]==0, they have implicit weights calculated algorithmically. */ if (!dst->weights[i] && dst->lengths[i] && (rc = my_uca_copy_page(cs, loader, src, dst, i))) return rc; } copy_ja_han_pages(cs, dst); } /* Preparatory step is done at this point. Now we have memory allocated for the pages that we'll overwrite, and for contractions, including previous context contractions. Also, for the pages that we'll overwrite, we have copied default weights. Now iterate through the rules, overwrite weights for the characters that appear in the rules, and put all contractions into contraction list. */ for (r = rules->rule; r < rlast; r++) { if (apply_one_rule(cs, rules, r, level, dst)) { loader->mem_free(dst->lengths); loader->mem_free(dst->weights); delete dst->m_allocated_weights; dst->lengths = nullptr; dst->weights = nullptr; dst->m_allocated_weights = nullptr; return true; } } return false; } /** Check whether the composition character is already in rule list @param rules The rule list @param wc The composition character @return true The composition character is already in list false The composition character is not in list */ static bool my_comp_in_rulelist(const MY_COLL_RULES *rules, my_wc_t wc) { MY_COLL_RULE *r, *rlast; for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { if (r->curr[0] == wc && r->curr[1] == 0) return true; } return false; } /** Check whether a composition character in the decomposition list is a normal character. @param dec_ind The index of composition character in list @return Whether it is a normal character */ static inline bool my_compchar_is_normal_char(unsigned dec_ind) { return uni_dec[dec_ind].decomp_tag == DECOMP_TAG_NONE; } static inline bool my_compchar_is_normal_char(const Unidata_decomp *decomp) { return my_compchar_is_normal_char(decomp - std::begin(uni_dec)); } static Unidata_decomp *get_decomposition(my_wc_t ch) { auto comp_func = [](Unidata_decomp x, Unidata_decomp y) { return x.charcode < y.charcode; }; Unidata_decomp to_find = {ch, CHAR_CATEGORY_LU, DECOMP_TAG_NONE, {0}}; Unidata_decomp *decomp = std::lower_bound( std::begin(uni_dec), std::end(uni_dec), to_find, comp_func); if (decomp == std::end(uni_dec) || decomp->charcode != ch) return nullptr; return decomp; } static Combining_mark *my_find_combining_mark(my_wc_t code) { auto comp_func = [](Combining_mark x, Combining_mark y) { return x.charcode < y.charcode; }; Combining_mark to_find = {code, 0}; return std::lower_bound(std::begin(combining_marks), std::end(combining_marks), to_find, comp_func); } /** Check if a list of combining marks contains the whole list of origin decomposed combining marks. @param origin_dec The origin list of combining marks decomposed from character in tailoring rule. @param dec_codes The list of combining marks decomposed from character in decomposition list. @param dec_diff The combining marks exist in dec_codes but not in origin_dec. @return Whether the list of combining marks contains the whole list of origin combining marks. */ static bool my_is_inheritance_of_origin(const my_wc_t *origin_dec, const my_wc_t *dec_codes, my_wc_t *dec_diff) { int ind0, ind1, ind2; if (origin_dec[0] != dec_codes[0]) return false; for (ind0 = ind1 = ind2 = 1; ind0 < MY_UCA_MAX_CONTRACTION && ind1 < MY_UCA_MAX_CONTRACTION && origin_dec[ind0] && dec_codes[ind1];) { if (origin_dec[ind0] == dec_codes[ind1]) { ind0++; ind1++; } else { Combining_mark *mark0 = my_find_combining_mark(origin_dec[ind0]); Combining_mark *mark1 = my_find_combining_mark(dec_codes[ind1]); if (mark0->ccc == mark1->ccc) return false; dec_diff[ind2++] = dec_codes[ind1++]; } } if (ind0 >= MY_UCA_MAX_CONTRACTION || !origin_dec[ind0]) { while (ind1 < MY_UCA_MAX_CONTRACTION) { dec_diff[ind2++] = dec_codes[ind1++]; } return true; } return false; } /** Add new rules recersively if one rule's characters are in decomposition list. @param rules The rule list @param r The rule to check @param decomp_rec The decomposition of the character in rule. @param comp_added Bitset which marks whether the comp character has been added to rule list. @return 1 Error adding new rules 0 Add rules successfully */ static int my_coll_add_inherit_rules( MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec, std::bitset *comp_added) { for (unsigned dec_ind = 0; dec_ind < array_elements(uni_dec); dec_ind++) { /* For normal character which can be decomposed, it is always decomposed to be another character and one combining mark. Currently we only support the weight inheritance of character that can be canonical-decomposed to another character and a list of combining marks. So skip the compatibility decomposition. Sample from UnicodeData.txt: Canonical decomposition: U+00DC : U+0055 U+0308 Compatibility decompsition: U+FF59 : U+0079 */ if (!my_compchar_is_normal_char(dec_ind) || comp_added->test(dec_ind) || (decomp_rec != nullptr && uni_dec[dec_ind].decomp_tag != decomp_rec->decomp_tag)) continue; /* In DUCET, all accented character's weight is defined as base character's weight followed by accent mark's weight. For example: 00DC = 0055 + 0308 0055 ; [.1E30.0020.0008] # LATIN CAPITAL LETTER U 0308 ; [.0000.002B.0002] # COMBINING DIAERESIS 00DC ; [.1E30.0020.0008][.0000.002B.0002] # LATIN CAPITAL LETTER U WITH DIAERESIS So the composition character's rule should be same as origin rule except of the change of curr value. */ my_wc_t dec_diff[MY_UCA_MAX_CONTRACTION]{r->curr[0], 0}; my_wc_t orig_dec[MY_UCA_MAX_CONTRACTION]{0}; if (decomp_rec == nullptr) { /* If there is no decomposition record found in Unidata_decomp, it means its decomposition form is itself. */ orig_dec[0] = r->curr[0]; } else { memcpy(orig_dec, decomp_rec->dec_codes, sizeof(orig_dec)); } if (my_is_inheritance_of_origin(orig_dec, uni_dec[dec_ind].dec_codes, dec_diff) && !my_comp_in_rulelist(rules, uni_dec[dec_ind].charcode)) { MY_COLL_RULE newrule{{0}, {uni_dec[dec_ind].charcode, 0}, {0}, 0, false}; memcpy(newrule.base, dec_diff, sizeof(newrule.base)); if (my_coll_rules_add(rules, &newrule)) return 1; comp_added->set(dec_ind); } } return 0; } static bool combining_mark_in_rulelist(const my_wc_t *dec_codes, const MY_COLL_RULE *r_start, const MY_COLL_RULE *r_end) { for (int i = 1; i < MY_UCA_MAX_CONTRACTION; ++i) { if (!*(dec_codes + i)) return false; for (const MY_COLL_RULE *r = r_start; r < r_end; ++r) { if (r->curr[0] == *(dec_codes + i)) { return true; } } } return false; } static int add_normalization_rules(const CHARSET_INFO *cs, MY_COLL_RULES *rules) { if (!cs->coll_param || !cs->coll_param->norm_enabled) return 0; const int orig_rule_num = rules->nrules; for (Unidata_decomp *decomp = std::begin(uni_dec); decomp < std::end(uni_dec); ++decomp) { if (!my_compchar_is_normal_char(decomp) || my_comp_in_rulelist(rules, decomp->charcode) || !combining_mark_in_rulelist(decomp->dec_codes, rules->rule, rules->rule + orig_rule_num)) continue; MY_COLL_RULE newrule{{0}, {decomp->charcode, 0}, {0}, 0, false}; memcpy(newrule.base, decomp->dec_codes, sizeof(newrule.base)); if (my_coll_rules_add(rules, &newrule)) return 1; } return 0; } /** For every rule in rule list, check and add new rules if it is in decomposition list. @param cs Character set info @param rules The rule list @return 1 Error happens when adding new rule 0 Add rules successfully */ static int my_coll_check_rule_and_inherit(const CHARSET_INFO *cs, MY_COLL_RULES *rules) { if (rules->uca->version != UCA_V900) return 0; /* Character can combine with marks to be a new character. For example, A + [mark b] = A1, A1 + [mark c] = A2. We think the weight of A1 and A2 should shift with A if A is in rule list and its weight shifts, unless A1 / A2 is already in rule list. */ std::bitset comp_added; int orig_rule_num = rules->nrules; for (int i = 0; i < orig_rule_num; ++i) { MY_COLL_RULE r = *(rules->rule + i); /* Do not add inheritance rule for contraction. But for the Chinese collation, the weight shift rule of Chinese collation is a bit different from all the languages we added so far. For example, it has a rule "&e << ... << e\\u0302\\u0300". So far, if a language's rule involves 'e\\u0302\\u0300', it will use the combining form character, U+1EC1, and it is not a contraction. If we don't handle this for Chinese collation, it will skip some further rule inheriting. */ if (cs->coll_param != &zh_coll_param && r.curr[1]) continue; Unidata_decomp *decomp_rec = get_decomposition(r.curr[0]); if (my_coll_add_inherit_rules(rules, &r, decomp_rec, &comp_added)) return 1; } return 0; } /** Helper function to store weight boundary values. @param[out] wt_rec Weight boundary for each character group and gap between groups @param rec_ind The position from where to store weight boundary @param old_begin Beginning weight of character group before reorder @param old_end End weight of character group before reorder @param new_begin Beginning weight of character group after reorder @param new_end End weight of character group after reorder */ static inline void my_set_weight_rec( Reorder_wt_rec (&wt_rec)[2 * UCA_MAX_CHAR_GRP], int rec_ind, uint16_t old_begin, uint16_t old_end, uint16_t new_begin, uint16_t new_end) { wt_rec[rec_ind] = {{old_begin, old_end}, {new_begin, new_end}}; } /** Calculate the reorder parameters for the character groups. @param cs Character set info @param[out] rec_ind The position from where to store weight boundary */ static void my_calc_char_grp_param(const CHARSET_INFO *cs, int &rec_ind) { int weight_start = START_WEIGHT_TO_REORDER; int grp_ind = 0; Reorder_param *param = cs->coll_param->reorder_param; for (; grp_ind < UCA_MAX_CHAR_GRP; ++grp_ind) { if (param->reorder_grp[grp_ind] == CHARGRP_NONE) break; for (Char_grp_info *info = std::begin(char_grp_infos); info < std::end(char_grp_infos); ++info) { if (param->reorder_grp[grp_ind] != info->group) continue; my_set_weight_rec( param->wt_rec, grp_ind, info->grp_wt_bdy.begin, info->grp_wt_bdy.end, weight_start, weight_start + info->grp_wt_bdy.end - info->grp_wt_bdy.begin); weight_start = param->wt_rec[grp_ind].new_wt_bdy.end + 1; break; } } rec_ind = grp_ind; } /** Calculate the reorder parameters for the gap between character groups. @param cs Character set info @param rec_ind The position from where to store weight boundary */ static void my_calc_char_grp_gap_param(CHARSET_INFO *cs, int &rec_ind) { Reorder_param *param = cs->coll_param->reorder_param; uint16_t weight_start = param->wt_rec[rec_ind - 1].new_wt_bdy.end + 1; Char_grp_info *last_grp = nullptr; for (Char_grp_info *info = std::begin(char_grp_infos); info < std::end(char_grp_infos); ++info) { for (int ind = 0; ind < UCA_MAX_CHAR_GRP; ++ind) { if (param->reorder_grp[ind] == CHARGRP_NONE) break; if (param->reorder_grp[ind] != info->group) continue; if (param->max_weight < info->grp_wt_bdy.end) param->max_weight = info->grp_wt_bdy.end; /* There might be some character groups before the first character group in our list. */ if (!last_grp && info->grp_wt_bdy.begin > START_WEIGHT_TO_REORDER) { my_set_weight_rec(param->wt_rec, rec_ind, START_WEIGHT_TO_REORDER, info->grp_wt_bdy.begin - 1, weight_start, weight_start + (info->grp_wt_bdy.begin - 1) - START_WEIGHT_TO_REORDER); weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1; rec_ind++; } /* Gap between 2 character groups in out list. */ if (last_grp && last_grp->grp_wt_bdy.end < (info->grp_wt_bdy.begin - 1)) { my_set_weight_rec(param->wt_rec, rec_ind, last_grp->grp_wt_bdy.end + 1, info->grp_wt_bdy.begin - 1, weight_start, weight_start + (info->grp_wt_bdy.begin - 1) - (last_grp->grp_wt_bdy.end + 1)); weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1; rec_ind++; } last_grp = info; break; } } param->wt_rec_num = rec_ind; } /** Prepare reorder parameters. @param cs Character set info */ static int my_prepare_reorder(CHARSET_INFO *cs) { /* Chinese collation's reordering is done in next_implicit() and modify_all_zh_pages(). See the comment on zh_reorder_param and change_zh_implicit(). */ if (!cs->coll_param->reorder_param || cs->coll_param == &zh_coll_param) return 0; /* For each group of character, for example, latin characters, their weights are in a separate range. The default sequence of these groups is: Latin, Greek, Coptic, Cyrillic, and so on. Some languages want to change the default sequence. For example, Croatian wants to put Cyrillic to just behind Latin. We need to reorder the character groups and change their weight accordingly. Here we calculate the parameters needed for weight change. And the change will happen when weight returns from strnxfrm. */ int rec_ind = 0; my_calc_char_grp_param(cs, rec_ind); my_calc_char_grp_gap_param(cs, rec_ind); return rec_ind; } static void adjust_japanese_weight(CHARSET_INFO *cs, int rec_ind) { /* Per CLDR 30, Japanese collations need to reorder characters as [Latin, Kana, Han, others]. So for the original character group list: [Latin, CharA, Kana, CharB, Han, Others], it should be reordered as [Latin, Kana, Han, CharA, CharB, Others]. But my_prepare_reorder() reorders original group to be [Latin, Kana, CharA, CharB, Han, Others]. This is because Han characters are different from others in that Han characters' weight is implicit and has two primary weights for each character. Other characters have only one primary weight for each (base) character. Han characters always sort bigger. CLDR defines the collating order for 6355 Japanese Han characters. All of them are in [U+4E00, U+9FFF]; we give them tailored primary weights in ja_han_pages. The tailored primary weights are just after Kana, because these characters are very common. These Han characters' weight pages will be added to collation's UCA data in copy_ja_han_pages(). For the other Han characters, we don't change their implicit weights, which is [FB80 - FB85, 0020, 0002][XXXX, 0000, 0000]. To make sure CharA and CharB's weight is greater than all Han characters, we give them weight as [FB86, 0000, 0000][origin weights]. This will be done in apply_reorder_param(). Because the values stored in last wt_rec element is calculated for moving CharA to be after Kana, but we want them to be after all Han character, we reset the weight boundary here, and will change all these characters' weight in apply_reorder_param(). */ Reorder_param *param = cs->coll_param->reorder_param; param->wt_rec[rec_ind - 1].new_wt_bdy.begin = 0; param->wt_rec[rec_ind - 1].new_wt_bdy.end = 0; param->wt_rec[rec_ind].old_wt_bdy.begin = param->wt_rec[1].old_wt_bdy.end + 1; param->wt_rec[rec_ind].old_wt_bdy.end = 0x54A3; param->wt_rec[rec_ind].new_wt_bdy.begin = 0; param->wt_rec[rec_ind].new_wt_bdy.end = 0; param->wt_rec_num++; param->max_weight = 0x54A3; } /** Prepare parametric tailoring, like reorder, etc. @param cs Character set info @param rules Collation rule list to add to. @return false Collation parameters applied successfully. true Error happened. */ static bool my_prepare_coll_param(CHARSET_INFO *cs, MY_COLL_RULES *rules) { if (rules->uca->version != UCA_V900 || !cs->coll_param) return false; int rec_ind = my_prepare_reorder(cs); if (add_normalization_rules(cs, rules)) return true; if (cs->coll_param == &ja_coll_param) adjust_japanese_weight(cs, rec_ind); /* Might add other parametric tailoring rules later. */ return false; } /* This function copies an UCS2 collation from the default Unicode Collation Algorithm (UCA) weights applying tailorings, i.e. a set of alternative weights for some characters. The default UCA weights are stored in uca_weight/uca_length. They consist of 256 pages, 256 character each. If a page is not overwritten by tailoring rules, it is copies as is from UCA as is. If a page contains some overwritten characters, it is allocated. Untouched characters are copied from the default weights. */ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_CHARSET_ERRMSG *errmsg) { if (!cs->tailoring) return false; /* Ok to add a collation without tailoring */ MY_COLL_RULES rules; MY_UCA_INFO new_uca; MY_UCA_INFO *src_uca = nullptr; int rc = 0; MY_UCA_INFO *src, *dst; size_t npages; bool lengths_are_temporary; errmsg->errcode = 0; *errmsg->errarg = '\0'; rules.loader = loader; rules.errmsg = errmsg; rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */ /* Parse ICU Collation Customization expression */ if ((rc = my_coll_rule_parse(&rules, cs->tailoring, cs->tailoring + strlen(cs->tailoring), cs->m_coll_name))) goto ex; if ((rc = my_coll_check_rule_and_inherit(cs, &rules))) { goto ex; } if ((rc = my_prepare_coll_param(cs, &rules))) { goto ex; } if (rules.uca->version == UCA_V520) /* Unicode-5.2.0 requested */ { src_uca = &my_uca_v520; cs->caseinfo = &my_unicase_unicode520; } else if (rules.uca->version == UCA_V400) /* Unicode-4.0.0 requested */ { src_uca = &my_uca_v400; if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; } else /* No Unicode version specified */ { src_uca = cs->uca ? cs->uca : &my_uca_v400; if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; } /* For UCA 9.0.0, we don't have a length page, but we still create one temporarily so that we can keep track of much memory we need to allocate for weights. */ src = src_uca; dst = &new_uca; dst->extra_ce_pri_base = cs->uca->extra_ce_pri_base; dst->extra_ce_sec_base = cs->uca->extra_ce_sec_base; dst->extra_ce_ter_base = cs->uca->extra_ce_ter_base; if (cs->coll_param && cs->coll_param == &zh_coll_param) { dst->extra_ce_pri_base = ZH_EXTRA_CE_PRI; } npages = (src->maxchar + 1) / 256; lengths_are_temporary = (rules.uca->version == UCA_V900); if (lengths_are_temporary) { src->lengths = static_cast(loader->mem_malloc(npages)); if (src->lengths == nullptr) goto ex; synthesize_lengths_900(src->lengths, src->weights, npages); } if ((rc = init_weight_level(cs, &rules, 0, dst, src))) goto ex; if (lengths_are_temporary) { loader->mem_free(src->lengths); loader->mem_free(dst->lengths); src->lengths = nullptr; dst->lengths = nullptr; } new_uca.version = src_uca->version; new_uca.m_based_on = src_uca; cs->uca = new MY_UCA_INFO(new_uca); ex: free(rules.rule); if (rc != 0 && errmsg->errcode) { delete new_uca.contraction_nodes; delete new_uca.contraction_flags; loader->reporter(ERROR_LEVEL, errmsg->errcode, errmsg->errarg); } return rc; } static void my_coll_uninit_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) { if (cs->uca && cs->uca->contraction_nodes) { delete cs->uca->contraction_nodes; delete cs->uca->contraction_flags; cs->uca->contraction_nodes = nullptr; cs->uca->contraction_flags = nullptr; } if (cs->uca != nullptr && cs->uca != &my_uca_v400 && cs->uca != &my_uca_v520 && cs->uca != &my_uca_v900) { if (cs->uca->m_allocated_weights != nullptr) { for (size_t i = 0; i < cs->uca->m_allocated_weights->size(); ++i) { if (cs->uca->m_allocated_weights->at(i) != 0) { loader->mem_free(cs->uca->weights[i]); cs->uca->weights[i] = nullptr; } } } loader->mem_free(cs->uca->lengths); cs->uca->lengths = nullptr; loader->mem_free(cs->uca->weights); cs->uca->weights = nullptr; delete cs->uca->m_allocated_weights; cs->uca->m_allocated_weights = nullptr; MY_UCA_INFO *to_be_deleted = cs->uca; cs->uca = cs->uca->m_based_on; delete to_be_deleted; cs->state &= ~MY_CS_READY; } } /* Universal CHARSET_INFO compatible wrappers for the above internal functions. Should work for any character set. */ extern "C" { static bool my_coll_init_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_CHARSET_ERRMSG *errmsg) { cs->pad_char = ' '; cs->ctype = my_charset_utf8mb3_unicode_ci.ctype; if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; if (!cs->uca) cs->uca = &my_uca_v400; return create_tailoring(cs, loader, errmsg); } static int my_strnncoll_any_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { return my_strnncoll_uca, 1>( cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); } Mb_wc_through_function_pointer mb_wc(cs); return my_strnncoll_uca, 1>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); } static int my_strnncollsp_any_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { return my_strnncollsp_uca(cs, Mb_wc_utf8mb4(), s, slen, t, tlen); } Mb_wc_through_function_pointer mb_wc(cs); return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen); } static void my_hash_sort_any_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { my_hash_sort_uca(cs, Mb_wc_utf8mb4(), s, slen, n1, n2); } else { Mb_wc_through_function_pointer mb_wc(cs); my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2); } } static size_t my_strnxfrm_any_uca(const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { return my_strnxfrm_uca(cs, Mb_wc_utf8mb4(), dst, dstlen, num_codepoints, src, srclen, flags); } Mb_wc_through_function_pointer mb_wc(cs); return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen, flags); } static int my_strnncoll_uca_900(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { switch (cs->levels_for_compare) { case 1: return my_strnncoll_uca, 1>( cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); case 2: return my_strnncoll_uca, 2>( cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); default: assert(false); case 3: return my_strnncoll_uca, 3>( cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); case 4: return my_strnncoll_uca, 4>( cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); } } Mb_wc_through_function_pointer mb_wc(cs); switch (cs->levels_for_compare) { case 1: return my_strnncoll_uca, 1>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); case 2: return my_strnncoll_uca, 2>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); default: assert(false); case 3: return my_strnncoll_uca, 3>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); case 4: return my_strnncoll_uca, 4>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); } } static int my_strnncollsp_uca_900(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) { // We are a NO PAD collation, so this is identical to strnncoll. return my_strnncoll_uca_900(cs, s, slen, t, tlen, false); } } // extern "C" template static void my_hash_sort_uca_900_tmpl(const CHARSET_INFO *cs, const Mb_wc mb_wc, const uint8_t *s, size_t slen, uint64_t *n1) { uca_scanner_900 scanner(mb_wc, cs, s, slen); /* A variation of the FNV-1a hash. The differences between this and standard FNV-1a as described in literature are: - We work naturally on 16-bit weights, so we XOR in the entire weight instead of hashing byte-by-byte. (This is effectively a speed/quality tradeoff, as it will reduce avalanche.) - We use the n1 seed by XOR-ing it onto the offset basis; FNV-1a as typically described does not use a seed. This should be safe, since there's nothing magical about the offset basis; it's just the FNV-1a hash of some human-readable text. This is nowhere near a perfect hash function; it has suboptimal avalanche characteristics, and it not multicollision resistant. In particular, it fails many SMHasher tests, mostly for bias (collision tests are fine). However, it is of much better quality than the home-grown hash used for other collations (which fails _all_ SMHasher tests), while being much faster. We ignore the n2 seed entirely, since we don't need it. The caller is responsible for doing hash folding at the end; we can't do that. See http://isthe.com/chongo/tech/comp/fnv/#FNV-param for constants. */ uint64_t h = *n1; h ^= 14695981039346656037ULL; scanner.for_each_weight( [&](int s_res, bool) -> bool { h ^= s_res; h *= 1099511628211ULL; return true; }, [](int) { return true; }); *n1 = h; } extern "C" { static void my_hash_sort_uca_900(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { switch (cs->levels_for_compare) { case 1: return my_hash_sort_uca_900_tmpl(cs, Mb_wc_utf8mb4(), s, slen, n1); case 2: return my_hash_sort_uca_900_tmpl(cs, Mb_wc_utf8mb4(), s, slen, n1); default: assert(false); case 3: return my_hash_sort_uca_900_tmpl(cs, Mb_wc_utf8mb4(), s, slen, n1); case 4: return my_hash_sort_uca_900_tmpl(cs, Mb_wc_utf8mb4(), s, slen, n1); } } Mb_wc_through_function_pointer mb_wc(cs); switch (cs->levels_for_compare) { case 1: return my_hash_sort_uca_900_tmpl(cs, mb_wc, s, slen, n1); case 2: return my_hash_sort_uca_900_tmpl(cs, mb_wc, s, slen, n1); default: assert(false); case 3: return my_hash_sort_uca_900_tmpl(cs, mb_wc, s, slen, n1); case 4: return my_hash_sort_uca_900_tmpl(cs, mb_wc, s, slen, n1); } } } // extern "C" /* Check if a constant can be propagated Currently we don't check the constant itself, and decide not to propagate a constant just if the collation itself allows expansions or contractions. */ bool my_propagate_uca_900(const CHARSET_INFO *cs, const uint8_t *str [[maybe_unused]], size_t length [[maybe_unused]]) { return !my_uca_have_contractions(cs->uca); } template static size_t my_strnxfrm_uca_900_tmpl(const CHARSET_INFO *cs, const Mb_wc mb_wc, uint8_t *dst, size_t dstlen, const uint8_t *src, size_t srclen, unsigned flags) { uint8_t *d0 = dst; uint8_t *dst_end = dst + dstlen; uca_scanner_900 scanner(mb_wc, cs, src, srclen); assert((dstlen % 2) == 0); if ((dstlen % 2) == 1) { // Emergency workaround for optimized mode. --dst_end; } if (dst != dst_end) { scanner.for_each_weight( [&dst, dst_end](int s_res, bool is_level_separator [[maybe_unused]]) -> bool { assert(is_level_separator == (s_res == 0)); if (LEVELS_FOR_COMPARE == 1) assert(!is_level_separator); dst = store16be(dst, s_res); return (dst < dst_end); }, [&dst, dst_end](int num_weights) { return (dst < dst_end - num_weights * 2); }); } if (flags & MY_STRXFRM_PAD_TO_MAXLEN) { memset(dst, 0, dst_end - dst); dst = dst_end; } return dst - d0; } extern "C" { static size_t my_strnxfrm_uca_900(const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints [[maybe_unused]], const uint8_t *src, size_t srclen, unsigned flags) { if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { switch (cs->levels_for_compare) { case 1: return my_strnxfrm_uca_900_tmpl( cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); case 2: return my_strnxfrm_uca_900_tmpl( cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); default: assert(false); case 3: return my_strnxfrm_uca_900_tmpl( cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); case 4: return my_strnxfrm_uca_900_tmpl( cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); } } else { Mb_wc_through_function_pointer mb_wc(cs); switch (cs->levels_for_compare) { case 1: return my_strnxfrm_uca_900_tmpl( cs, mb_wc, dst, dstlen, src, srclen, flags); case 2: return my_strnxfrm_uca_900_tmpl( cs, mb_wc, dst, dstlen, src, srclen, flags); default: assert(false); case 3: return my_strnxfrm_uca_900_tmpl( cs, mb_wc, dst, dstlen, src, srclen, flags); case 4: return my_strnxfrm_uca_900_tmpl( cs, mb_wc, dst, dstlen, src, srclen, flags); } } } static size_t my_strnxfrmlen_uca_900(const CHARSET_INFO *cs, size_t len) { /* The character with the most weights is U+FDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, which we truncate to eight weights. This is the most we can get in regular DUCET. In addition, collations with reorderings can add an extra weight per weight, which currently only happens on the primary level. We simulate this by simply adding an extra level. One could conceivably have tailorings yielding expansions having more than this, but we don't currently, and mostly, tailorings are about contractions and adding single weights anyway. We also need to add room for one level separator between each level. */ // We really ought to have len % 4 == 0, but not all calling code conforms. const size_t num_codepoints = (len + 3) / 4; const size_t max_num_weights_per_level = num_codepoints * 8; size_t max_num_weights = max_num_weights_per_level * cs->levels_for_compare; if (cs->coll_param && cs->coll_param->reorder_param) { max_num_weights += max_num_weights_per_level; } return (max_num_weights + (cs->levels_for_compare - 1)) * sizeof(uint16_t); } } // extern "C" /* UCS2 optimized CHARSET_INFO compatible wrappers. */ extern "C" { static int my_strnncoll_ucs2_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) { Mb_wc_through_function_pointer mb_wc(cs); return my_strnncoll_uca, 1>( cs, mb_wc, s, slen, t, tlen, t_is_prefix); } static int my_strnncollsp_ucs2_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) { Mb_wc_through_function_pointer mb_wc(cs); return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen); } static void my_hash_sort_ucs2_uca(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) { Mb_wc_through_function_pointer mb_wc(cs); my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2); } static size_t my_strnxfrm_ucs2_uca(const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) { Mb_wc_through_function_pointer mb_wc(cs); return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen, flags); } } // extern "C" MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_ucs2_uca, my_strnncollsp_ucs2_uca, my_strnxfrm_ucs2_uca, my_strnxfrmlen_simple, my_like_range_generic, my_wildcmp_uca, nullptr, my_instr_mb, my_hash_sort_ucs2_uca, my_propagate_complex}; CHARSET_INFO my_charset_ucs2_unicode_ci = { 128, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_icelandic_uca_ci = { 129, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_icelandic_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ icelandic, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_latvian_uca_ci = { 130, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_latvian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ latvian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_romanian_uca_ci = { 131, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_romanian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ romanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_slovenian_uca_ci = { 132, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovenian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ slovenian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_polish_uca_ci = { 133, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_polish_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ polish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_estonian_uca_ci = { 134, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_estonian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ estonian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_spanish_uca_ci = { 135, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_swedish_uca_ci = { 136, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_swedish_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ swedish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_turkish_uca_ci = { 137, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_turkish_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ turkish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_turkish, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_czech_uca_ci = { 138, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_czech_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ czech, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_danish_uca_ci = { 139, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_danish_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ danish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci = { 140, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_lithuanian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ lithuanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_slovak_uca_ci = { 141, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovak_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ slovak, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_spanish2_uca_ci = { 142, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish2_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ spanish2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_roman_uca_ci = { 143, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_roman_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_persian_uca_ci = { 144, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_persian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ persian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_esperanto_uca_ci = { 145, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_esperanto_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_hungarian_uca_ci = { 146, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_hungarian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ hungarian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_sinhala_uca_ci = { 147, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* csname */ "ucs2_sinhala_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ sinhala, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_german2_uca_ci = { 148, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* csname */ "ucs2_german2_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ german2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_croatian_uca_ci = { 149, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* csname */ "ucs2_croatian_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ croatian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_unicode_520_ci = { 150, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_520_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_ucs2_vietnamese_ci = { 151, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "ucs2", /* csname */ "ucs2_vietnamese_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ vietnamese, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler, PAD_SPACE}; MY_COLLATION_HANDLER my_collation_any_uca_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca, my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb, my_hash_sort_any_uca, my_propagate_complex}; MY_COLLATION_HANDLER my_collation_uca_900_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_uca_900, my_strnncollsp_uca_900, my_strnxfrm_uca_900, my_strnxfrmlen_uca_900, my_like_range_mb, my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb, my_hash_sort_uca_900, my_propagate_uca_900}; /* We consider bytes with code more than 127 as a letter. This guarantees that word boundaries work fine with regular expressions. Note, there is no need to mark byte 255 as a letter, it is illegal byte in UTF8. */ static const uint8_t ctype_utf8[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, 16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0}; extern MY_CHARSET_HANDLER my_charset_utf8mb3_handler; #define MY_CS_UTF8MB3_UCA_FLAGS \ (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) CHARSET_INFO my_charset_utf8mb3_unicode_ci = { 192, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_unicode_ci", /* m_coll_name */ "UCS-2 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_icelandic_uca_ci = { 193, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_icelandic_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ icelandic, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_latvian_uca_ci = { 194, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_latvian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ latvian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_romanian_uca_ci = { 195, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_romanian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ romanian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_slovenian_uca_ci = { 196, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_slovenian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ slovenian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_polish_uca_ci = { 197, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_polish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ polish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_estonian_uca_ci = { 198, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_estonian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ estonian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_spanish_uca_ci = { 199, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_spanish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_swedish_uca_ci = { 200, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_swedish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ swedish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_turkish_uca_ci = { 201, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_turkish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ turkish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_turkish, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 2, /* caseup_multiply */ 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_czech_uca_ci = { 202, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_czech_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ czech, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_danish_uca_ci = { 203, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_danish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ danish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_lithuanian_uca_ci = { 204, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_lithuanian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ lithuanian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_slovak_uca_ci = { 205, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_slovak_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ slovak, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_spanish2_uca_ci = { 206, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_spanish2_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ spanish2, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_roman_uca_ci = { 207, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_roman_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_persian_uca_ci = { 208, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_persian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ persian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_esperanto_uca_ci = { 209, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_esperanto_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_hungarian_uca_ci = { 210, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_hungarian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ hungarian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_sinhala_uca_ci = { 211, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_sinhala_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ sinhala, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_german2_uca_ci = { 212, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_german2_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ german2, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_croatian_uca_ci = { 213, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_croatian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ croatian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_unicode_520_ci = { 214, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* csname */ "utf8mb3_unicode_520_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb3_vietnamese_ci = { 215, 0, 0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ "utf8mb3", /* cs name */ "utf8mb3_vietnamese_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ vietnamese, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb3_handler, &my_collation_any_uca_handler, PAD_SPACE}; extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; #define MY_CS_UTF8MB4_UCA_FLAGS \ (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) CHARSET_INFO my_charset_utf8mb4_unicode_ci = { 224, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_unicode_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci = { 225, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_icelandic_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ icelandic, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci = { 226, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_latvian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ latvian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci = { 227, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_romanian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ romanian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci = { 228, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_slovenian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ slovenian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_polish_uca_ci = { 229, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_polish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ polish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci = { 230, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_estonian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ estonian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci = { 231, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_spanish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci = { 232, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_swedish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ swedish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci = { 233, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_turkish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ turkish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_turkish, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 2, /* caseup_multiply */ 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_czech_uca_ci = { 234, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_czech_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ czech, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_danish_uca_ci = { 235, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_danish_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ danish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci = { 236, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_lithuanian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ lithuanian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci = { 237, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_slovak_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ slovak, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci = { 238, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_spanish2_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ spanish2, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_roman_uca_ci = { 239, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_roman_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_persian_uca_ci = { 240, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_persian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ persian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci = { 241, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_esperanto_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci = { 242, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_hungarian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ hungarian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci = { 243, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_sinhala_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ sinhala, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_german2_uca_ci = { 244, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_german2_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ german2, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci = { 245, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_croatian_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ croatian, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_unicode_520_ci = { 246, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* flags */ "utf8mb4", /* csname */ "utf8mb4_unicode_520_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_vietnamese_ci = { 247, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_vietnamese_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ vietnamese, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler, PAD_SPACE}; MY_COLLATION_HANDLER my_collation_utf32_uca_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca, my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_generic, my_wildcmp_uca, nullptr, my_instr_mb, my_hash_sort_any_uca, my_propagate_complex}; extern MY_CHARSET_HANDLER my_charset_utf32_handler; #define MY_CS_UTF32_UCA_FLAGS \ (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | \ MY_CS_UNICODE_SUPPLEMENT | MY_CS_NONASCII) CHARSET_INFO my_charset_utf32_unicode_ci = { 160, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_unicode_ci", /* m_coll_name */ "", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_icelandic_uca_ci = { 161, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_icelandic_ci", /* m_coll_name */ "", /* comment */ icelandic, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_latvian_uca_ci = { 162, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_latvian_ci", /* m_coll_name */ "", /* comment */ latvian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_romanian_uca_ci = { 163, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_romanian_ci", /* m_coll_name */ "", /* comment */ romanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_slovenian_uca_ci = { 164, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_slovenian_ci", /* m_coll_name */ "", /* comment */ slovenian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_polish_uca_ci = { 165, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_polish_ci", /* m_coll_name */ "", /* comment */ polish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_estonian_uca_ci = { 166, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_estonian_ci", /* m_coll_name */ "", /* comment */ estonian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_spanish_uca_ci = { 167, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_spanish_ci", /* m_coll_name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_swedish_uca_ci = { 168, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_swedish_ci", /* m_coll_name */ "", /* comment */ swedish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_turkish_uca_ci = { 169, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_turkish_ci", /* m_coll_name */ "", /* comment */ turkish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_turkish, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_czech_uca_ci = { 170, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_czech_ci", /* m_coll_name */ "", /* comment */ czech, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_danish_uca_ci = { 171, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_danish_ci", /* m_coll_name */ "", /* comment */ danish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_lithuanian_uca_ci = { 172, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_lithuanian_ci", /* m_coll_name */ "", /* comment */ lithuanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_slovak_uca_ci = { 173, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_slovak_ci", /* m_coll_name */ "", /* comment */ slovak, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_spanish2_uca_ci = { 174, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_spanish2_ci", /* m_coll_name */ "", /* comment */ spanish2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_roman_uca_ci = { 175, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_roman_ci", /* m_coll_name */ "", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_persian_uca_ci = { 176, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_persian_ci", /* m_coll_name */ "", /* comment */ persian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_esperanto_uca_ci = { 177, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_esperanto_ci", /* m_coll_name */ "", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_hungarian_uca_ci = { 178, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_hungarian_ci", /* m_coll_name */ "", /* comment */ hungarian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_sinhala_uca_ci = { 179, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_sinhala_ci", /* m_coll_name */ "", /* comment */ sinhala, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_german2_uca_ci = { 180, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_german2_ci", /* m_coll_name */ "", /* comment */ german2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_croatian_uca_ci = { 181, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_croatian_ci", /* m_coll_name */ "", /* comment */ croatian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_unicode_520_ci = { 182, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* stat e */ "utf32", /* csname */ "utf32_unicode_520_ci", /* m_coll_name */ "", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf32_vietnamese_ci = { 183, 0, 0, /* number */ MY_CS_UTF32_UCA_FLAGS, /* state */ "utf32", /* csname */ "utf32_vietnamese_ci", /* m_coll_name */ "", /* comment */ vietnamese, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 4, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler, PAD_SPACE}; MY_COLLATION_HANDLER my_collation_utf16_uca_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca, my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_generic, my_wildcmp_uca, nullptr, my_instr_mb, my_hash_sort_any_uca, my_propagate_complex}; extern MY_CHARSET_HANDLER my_charset_utf16_handler; #define MY_CS_UTF16_UCA_FLAGS \ (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) CHARSET_INFO my_charset_utf16_unicode_ci = { 101, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* csname */ "utf16_unicode_ci", /* m_coll_name */ "", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_icelandic_uca_ci = { 102, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* csname */ "utf16_icelandic_ci", /* m_coll_name */ "", /* comment */ icelandic, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_latvian_uca_ci = { 103, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_latvian_ci", /* m_coll_name */ "", /* comment */ latvian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_romanian_uca_ci = { 104, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_romanian_ci", /* m_coll_name */ "", /* comment */ romanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_slovenian_uca_ci = { 105, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_slovenian_ci", /* m_coll_name */ "", /* comment */ slovenian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_polish_uca_ci = { 106, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_polish_ci", /* m_coll_name */ "", /* comment */ polish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_estonian_uca_ci = { 107, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_estonian_ci", /* m_coll_name */ "", /* comment */ estonian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_spanish_uca_ci = { 108, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_spanish_ci", /* m_coll_name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_swedish_uca_ci = { 109, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_swedish_ci", /* m_coll_name */ "", /* comment */ swedish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_turkish_uca_ci = { 110, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_turkish_ci", /* m_coll_name */ "", /* comment */ turkish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_turkish, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_czech_uca_ci = { 111, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_czech_ci", /* m_coll_name */ "", /* comment */ czech, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_danish_uca_ci = { 112, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_danish_ci", /* m_coll_name */ "", /* comment */ danish, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_lithuanian_uca_ci = { 113, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_lithuanian_ci", /* m_coll_name */ "", /* comment */ lithuanian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_slovak_uca_ci = { 114, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_slovak_ci", /* m_coll_name */ "", /* comment */ slovak, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_spanish2_uca_ci = { 115, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_spanish2_ci", /* m_coll_name */ "", /* comment */ spanish2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_roman_uca_ci = { 116, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_roman_ci", /* m_coll_name */ "", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_persian_uca_ci = { 117, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_persian_ci", /* m_coll_name */ "", /* comment */ persian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_esperanto_uca_ci = { 118, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_esperanto_ci", /* m_coll_name */ "", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_hungarian_uca_ci = { 119, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_hungarian_ci", /* m_coll_name */ "", /* comment */ hungarian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_sinhala_uca_ci = { 120, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_sinhala_ci", /* m_coll_name */ "", /* comment */ sinhala, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_german2_uca_ci = { 121, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_german2_ci", /* m_coll_name */ "", /* comment */ german2, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_croatian_uca_ci = { 122, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_croatian_ci", /* m_coll_name */ "", /* comment */ croatian, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_unicode_520_ci = { 123, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, "utf16", /* csname */ "utf16_unicode_520_ci", /* m_coll_name */ "", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ 0x20, /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf16_vietnamese_ci = { 124, 0, 0, /* number */ MY_CS_UTF16_UCA_FLAGS, /* state */ "utf16", /* cs name */ "utf16_vietnamese_ci", /* m_coll_name */ "", /* comment */ vietnamese, /* tailoring */ nullptr, /* coll_param */ nullptr, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ nullptr, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_default, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 2, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler, PAD_SPACE}; MY_COLLATION_HANDLER my_collation_gb18030_uca_handler = { my_coll_init_uca, /* init */ my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca, my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_uca, nullptr, my_instr_mb, my_hash_sort_any_uca, my_propagate_complex}; /** The array used for "type of characters" bit mask for each character. The ctype[0] is reserved for EOF(-1), so we use ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt */ static const uint8_t ctype_gb18030[257] = { 0, /* For standard library */ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, 16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0}; extern MY_CHARSET_HANDLER my_charset_gb18030_uca_handler; CHARSET_INFO my_charset_gb18030_unicode_520_ci = { 250, 0, 0, /* number */ MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_NONASCII, /* state */ "gb18030", /* cs name */ "gb18030_unicode_520_ci", /* m_coll_name */ "China National Standard GB18030", /* comment */ "", /* tailoring */ nullptr, /* coll_param */ ctype_gb18030, /* ctype */ nullptr, /* lower */ nullptr, /* UPPER */ nullptr, /* sort order */ &my_uca_v520, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode520, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 8, /* strxfrm_multiply */ 2, /* caseup_multiply */ 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xE3329A35, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_gb18030_uca_handler, &my_collation_gb18030_uca_handler, PAD_SPACE}; CHARSET_INFO my_charset_utf8mb4_0900_ai_ci = { 255, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_PRIMARY, /* state */ "utf8mb4", /* csname */ "utf8mb4_0900_ai_ci", /* m_coll_name */ "UTF-8 Unicode", /* comment */ nullptr, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca_900 */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci = { 256, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_de_pb_0900_ai_ci", /* m_coll_name */ "", /* comment */ de_pb_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca_900 */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci = { 257, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_is_0900_ai_ci", /* m_coll_name */ "", /* comment */ is_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci = { 258, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_lv_0900_ai_ci", /* m_coll_name */ "", /* comment */ lv_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci = { 259, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_ro_0900_ai_ci", /* m_coll_name */ "", /* comment */ ro_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci = { 260, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_sl_0900_ai_ci", /* m_coll_name */ "", /* comment */ sl_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci = { 261, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_pl_0900_ai_ci", /* m_coll_name */ "", /* comment */ pl_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci = { 262, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_et_0900_ai_ci", /* m_coll_name */ "", /* comment */ et_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci = { 263, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_es_0900_ai_ci", /* m_coll_name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci = { 264, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_sv_0900_ai_ci", /* m_coll_name */ "", /* comment */ sv_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci = { 265, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_tr_0900_ai_ci", /* m_coll_name */ "", /* comment */ tr_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci = { 266, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_cs_0900_ai_ci", /* m_coll_name */ "", /* comment */ cs_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci = { 267, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_da_0900_ai_ci", /* m_coll_name */ "", /* comment */ da_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci = { 268, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_lt_0900_ai_ci", /* m_coll_name */ "", /* comment */ lt_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci = { 269, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_sk_0900_ai_ci", /* m_coll_name */ "", /* comment */ sk_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci = { 270, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_es_trad_0900_ai_ci", /* m_coll_name */ "", /* comment */ es_trad_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci = { 271, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_la_0900_ai_ci", /* m_coll_name */ "", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; #if 0 CHARSET_INFO my_charset_utf8mb4_fa_0900_ai_ci= { 272, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ "utf8mb4", /* csname */ "utf8mb4_fa_0900_ai_ci", /* m_coll_name */ "", /* comment */ fa_cldr_30, /* tailoring */ &fa_coll_param, /* coll_param */ ctype_utf8, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ &my_uca_v900, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ &my_unicase_unicode900,/* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD }; #endif CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci = { 273, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_eo_0900_ai_ci", /* m_coll_name */ "", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci = { 274, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_hu_0900_ai_ci", /* m_coll_name */ "", /* comment */ hu_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci = { 275, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_hr_0900_ai_ci", /* m_coll_name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; #if 0 CHARSET_INFO my_charset_utf8mb4_si_0900_ai_ci= { 276, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ "utf8mb4", /* csname */ "utf8mb4_si_0900_ai_ci", /* m_coll_name */ "", /* comment */ si_cldr_30, /* tailoring */ NULL, /* coll_param */ ctype_utf8, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ &my_uca_v900, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ &my_unicase_unicode900,/* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD }; #endif CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci = { 277, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_vi_0900_ai_ci", /* m_coll_name */ "", /* comment */ vi_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_0900_as_cs = { 278, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_0900_as_cs", /* m_coll_name */ "", /* comment */ nullptr, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs = { 279, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_de_pb_0900_as_cs", /* m_coll_name */ "", /* comment */ de_pb_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs = { 280, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_is_0900_as_cs", /* m_coll_name */ "", /* comment */ is_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs = { 281, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_lv_0900_as_cs", /* m_coll_name */ "", /* comment */ lv_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs = { 282, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_ro_0900_as_cs", /* m_coll_name */ "", /* comment */ ro_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs = { 283, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_sl_0900_as_cs", /* m_coll_name */ "", /* comment */ sl_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs = { 284, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_pl_0900_as_cs", /* m_coll_name */ "", /* comment */ pl_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs = { 285, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_et_0900_as_cs", /* m_coll_name */ "", /* comment */ et_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs = { 286, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_es_0900_as_cs", /* m_coll_name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs = { 287, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_sv_0900_as_cs", /* m_coll_name */ "", /* comment */ sv_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs = { 288, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_tr_0900_as_cs", /* m_coll_name */ "", /* comment */ tr_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs = { 289, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_cs_0900_as_cs", /* m_coll_name */ "", /* comment */ cs_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs = { 290, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_da_0900_as_cs", /* m_coll_name */ "", /* comment */ da_cldr_30, /* tailoring */ &da_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs = { 291, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_lt_0900_as_cs", /* m_coll_name */ "", /* comment */ lt_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs = { 292, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_sk_0900_as_cs", /* m_coll_name */ "", /* comment */ sk_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs = { 293, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_es_trad_0900_as_cs", /* m_coll_name */ "", /* comment */ es_trad_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs = { 294, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_la_0900_as_cs", /* m_coll_name */ "", /* comment */ roman, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; #if 0 CHARSET_INFO my_charset_utf8mb4_fa_0900_as_cs= { 295, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */ "utf8mb4", /* csname */ "utf8mb4_fa_0900_as_cs", /* m_coll_name */ "", /* comment */ fa_cldr_30, /* tailoring */ &fa_coll_param, /* coll_param */ ctype_utf8, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ &my_uca_v900, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ &my_unicase_unicode900,/* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD }; #endif CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs = { 296, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_eo_0900_as_cs", /* m_coll_name */ "", /* comment */ esperanto, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs = { 297, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_hu_0900_as_cs", /* m_coll_name */ "", /* comment */ hu_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs = { 298, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_hr_0900_as_cs", /* m_coll_name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; #if 0 CHARSET_INFO my_charset_utf8mb4_si_0900_as_cs= { 299, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */ "utf8mb4", /* csname */ "utf8mb4_si_0900_as_cs", /* m_coll_name */ "", /* comment */ si_cldr_30, /* tailoring */ NULL, /* coll_param */ ctype_utf8, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ &my_uca_v900, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ &my_unicase_unicode900,/* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD }; #endif CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs = { 300, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_vi_0900_as_cs", /* m_coll_name */ "", /* comment */ vi_cldr_30, /* tailoring */ &vi_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs = { 303, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_ja_0900_as_cs", /* m_coll_name */ "", /* comment */ ja_cldr_30, /* tailoring */ &ja_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks = { 304, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_ja_0900_as_cs_ks", /* m_coll_name */ "", /* comment */ ja_cldr_30, /* tailoring */ &ja_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 24, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 4, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_0900_as_ci = { 305, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_0900_as_ci", /* m_coll_name */ "", /* comment */ nullptr, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 2, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci = { 306, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_ru_0900_ai_ci", /* m_coll_name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs = { 307, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_ru_0900_as_cs", /* m_coll_name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs = { 308, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_zh_0900_as_cs", /* m_coll_name */ "", /* comment */ zh_cldr_30, /* tailoring */ &zh_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; /* Comparing the UTF-8 representation automatically yields codepoint order, so we can just do a binary comparison. Note that my_strnxfrm_unicode_full_bin() chooses to transform to UCS before collation; this is purely for legacy reasons and is not needed here. */ static size_t my_strnxfrm_utf8mb4_0900_bin(const CHARSET_INFO *cs [[maybe_unused]], uint8_t *dst, size_t dstlen, unsigned nweights [[maybe_unused]], const uint8_t *src, size_t srclen, unsigned flags) { assert(src); size_t weight_len = std::min(srclen, dstlen); memcpy(dst, src, weight_len); if (flags & MY_STRXFRM_PAD_TO_MAXLEN) { memset(dst + weight_len, 0, dstlen - weight_len); return dstlen; } return weight_len; } static int my_strnncollsp_utf8mb4_0900_bin(const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) { return my_strnncoll_mb_bin(cs, s, slen, t, tlen, false); } static MY_COLLATION_HANDLER my_collation_utf8mb4_0900_bin_handler = { nullptr, /* init */ nullptr, my_strnncoll_mb_bin, my_strnncollsp_utf8mb4_0900_bin, my_strnxfrm_utf8mb4_0900_bin, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb_bin, my_strcasecmp_mb_bin, my_instr_mb, my_hash_sort_mb_bin, my_propagate_simple}; CHARSET_INFO my_charset_utf8mb4_0900_bin = { 309, 0, 0, // number MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_BINSORT, // state "utf8mb4", // cs name "utf8mb4_0900_bin", // name "", // comment nullptr, // tailoring nullptr, // coll_param ctype_utf8, // ctype nullptr, // to_lower nullptr, // to_upper nullptr, // sort_order nullptr, // uca nullptr, // tab_to_uni nullptr, // tab_from_uni &my_unicase_unicode900, // caseinfo nullptr, // state_map nullptr, // ident_map 1, // strxfrm_multiply 1, // caseup_multiply 1, // casedn_multiply 1, // mbminlen 4, // mbmaxlen 1, // mbmaxlenlen 0, // min_sort_char 0x10FFFF, // max_sort_char ' ', // pad char false, // escape_with_backslash_is_dangerous 1, // levels_for_compare &my_charset_utf8mb4_handler, &my_collation_utf8mb4_0900_bin_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci = { 310, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_nb_0900_ai_ci", /* name */ "", /* comment */ da_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs = { 311, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_nb_0900_as_cs", /* name */ "", /* comment */ da_cldr_30, /* tailoring */ &no_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci = { 312, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_nn_0900_ai_ci", /* name */ "", /* comment */ da_cldr_30, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs = { 313, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_nn_0900_as_cs", /* name */ "", /* comment */ da_cldr_30, /* tailoring */ &no_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci = { 314, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_sr_latn_0900_ai_ci", /* name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs = { 315, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_sr_latn_0900_as_cs", /* name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci = { 316, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_bs_0900_ai_ci", /* name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs = { 317, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_bs_0900_as_cs", /* name */ "", /* comment */ hr_cldr_30, /* tailoring */ &hr_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci = { 318, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_bg_0900_ai_ci", /* name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs = { 319, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_bg_0900_as_cs", /* name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci = { 320, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_gl_0900_ai_ci", /* name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs = { 321, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_gl_0900_as_cs", /* name */ "", /* comment */ spanish, /* tailoring */ nullptr, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 9, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci = { 322, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS, /* state */ "utf8mb4", /* csname */ "utf8mb4_mn_cyrl_0900_ai_ci", /* name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD}; CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs = { 323, 0, 0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ "utf8mb4", /* csname */ "utf8mb4_mn_cyrl_0900_as_cs", /* name */ "", /* comment */ "", /* tailoring */ &ru_coll_param, /* coll_param */ ctype_utf8, /* ctype */ nullptr, /* to_lower */ nullptr, /* to_upper */ nullptr, /* sort_order */ &my_uca_v900, /* uca */ nullptr, /* tab_to_uni */ nullptr, /* tab_from_uni */ &my_unicase_unicode900, /* caseinfo */ nullptr, /* state_map */ nullptr, /* ident_map */ 0, /* strxfrm_multiply */ 1, /* caseup_multiply */ 1, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 1, /* mbmaxlenlen */ 32, /* min_sort_char */ 0x10FFFF, /* max_sort_char */ ' ', /* pad char */ false, /* escape_with_backslash_is_dangerous */ 3, /* levels_for_compare */ &my_charset_utf8mb4_handler, &my_collation_uca_900_handler, NO_PAD};