/* * Copyright (C) 2015-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef StringCommon_h #define StringCommon_h #include #include namespace WTF { template bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length); template bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB); template bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&); template bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length); template bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]); template bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]); template inline T loadUnaligned(const char* s) { #if COMPILER(CLANG) T tmp; memcpy(&tmp, s, sizeof(T)); return tmp; #else // This may result in undefined behavior due to unaligned access. return *reinterpret_cast(s); #endif } // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. #if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) { unsigned dwordLength = length >> 3; const char* a = reinterpret_cast(aLChar); const char* b = reinterpret_cast(bLChar); if (dwordLength) { for (unsigned i = 0; i != dwordLength; ++i) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint64_t); b += sizeof(uint64_t); } } if (length & 4) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint32_t); b += sizeof(uint32_t); } if (length & 2) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint16_t); b += sizeof(uint16_t); } if (length & 1 && (*reinterpret_cast(a) != *reinterpret_cast(b))) return false; return true; } ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) { unsigned dwordLength = length >> 2; const char* a = reinterpret_cast(aUChar); const char* b = reinterpret_cast(bUChar); if (dwordLength) { for (unsigned i = 0; i != dwordLength; ++i) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint64_t); b += sizeof(uint64_t); } } if (length & 2) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint32_t); b += sizeof(uint32_t); } if (length & 1 && (*reinterpret_cast(a) != *reinterpret_cast(b))) return false; return true; } #elif CPU(X86) && !ASAN_ENABLED ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) { const char* a = reinterpret_cast(aLChar); const char* b = reinterpret_cast(bLChar); unsigned wordLength = length >> 2; for (unsigned i = 0; i != wordLength; ++i) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint32_t); b += sizeof(uint32_t); } length &= 3; if (length) { const LChar* aRemainder = reinterpret_cast(a); const LChar* bRemainder = reinterpret_cast(b); for (unsigned i = 0; i < length; ++i) { if (aRemainder[i] != bRemainder[i]) return false; } } return true; } ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) { const char* a = reinterpret_cast(aUChar); const char* b = reinterpret_cast(bUChar); unsigned wordLength = length >> 1; for (unsigned i = 0; i != wordLength; ++i) { if (loadUnaligned(a) != loadUnaligned(b)) return false; a += sizeof(uint32_t); b += sizeof(uint32_t); } if (length & 1 && *reinterpret_cast(a) != *reinterpret_cast(b)) return false; return true; } #elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { bool isEqual = false; uint32_t aValue; uint32_t bValue; asm("subs %[length], #4\n" "blo 2f\n" "0:\n" // Label 0 = Start of loop over 32 bits. "ldr %[aValue], [%[a]], #4\n" "ldr %[bValue], [%[b]], #4\n" "cmp %[aValue], %[bValue]\n" "bne 66f\n" "subs %[length], #4\n" "bhs 0b\n" // At this point, length can be: // -0: 00000000000000000000000000000000 (0 bytes left) // -1: 11111111111111111111111111111111 (3 bytes left) // -2: 11111111111111111111111111111110 (2 bytes left) // -3: 11111111111111111111111111111101 (1 byte left) // -4: 11111111111111111111111111111100 (length was 0) // The pointers are at the correct position. "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. "tst %[length], #2\n" "beq 1f\n" "ldrh %[aValue], [%[a]], #2\n" "ldrh %[bValue], [%[b]], #2\n" "cmp %[aValue], %[bValue]\n" "bne 66f\n" "1:\n" // Label 1 = Check for a single character left. "tst %[length], #1\n" "beq 42f\n" "ldrb %[aValue], [%[a]]\n" "ldrb %[bValue], [%[b]]\n" "cmp %[aValue], %[bValue]\n" "bne 66f\n" "42:\n" // Label 42 = Success. "mov %[isEqual], #1\n" "66:\n" // Label 66 = End without changing isEqual to 1. : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) : : ); return isEqual; } ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { bool isEqual = false; uint32_t aValue; uint32_t bValue; asm("subs %[length], #2\n" "blo 1f\n" "0:\n" // Label 0 = Start of loop over 32 bits. "ldr %[aValue], [%[a]], #4\n" "ldr %[bValue], [%[b]], #4\n" "cmp %[aValue], %[bValue]\n" "bne 66f\n" "subs %[length], #2\n" "bhs 0b\n" // At this point, length can be: // -0: 00000000000000000000000000000000 (0 bytes left) // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) // -2: 11111111111111111111111111111110 (length was zero) // The pointers are at the correct position. "1:\n" // Label 1 = Check for a single character left. "tst %[length], #1\n" "beq 42f\n" "ldrh %[aValue], [%[a]]\n" "ldrh %[bValue], [%[b]]\n" "cmp %[aValue], %[bValue]\n" "bne 66f\n" "42:\n" // Label 42 = Success. "mov %[isEqual], #1\n" "66:\n" // Label 66 = End without changing isEqual to 1. : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) : : ); return isEqual; } #elif !ASAN_ENABLED ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } #else ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { for (unsigned i = 0; i < length; ++i) { if (a[i] != b[i]) return false; } return true; } ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { for (unsigned i = 0; i < length; ++i) { if (a[i] != b[i]) return false; } return true; } #endif ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) { for (unsigned i = 0; i < length; ++i) { if (a[i] != b[i]) return false; } return true; } ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } template ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b) { unsigned length = a.length(); if (length != b.length()) return false; if (a.is8Bit()) { if (b.is8Bit()) return equal(a.characters8(), b.characters8(), length); return equal(a.characters8(), b.characters16(), length); } if (b.is8Bit()) return equal(a.characters16(), b.characters8(), length); return equal(a.characters16(), b.characters16(), length); } template ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b) { if (a == b) return true; if (!a || !b) return false; return equal(*a, *b); } template inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length) { for (unsigned i = 0; i < length; ++i) { if (toASCIILower(a[i]) != toASCIILower(b[i])) return false; } return true; } template inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB) { return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA); } template bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b) { unsigned length = a.length(); if (length != b.length()) return false; if (a.is8Bit()) { if (b.is8Bit()) return equalIgnoringASCIICase(a.characters8(), b.characters8(), length); return equalIgnoringASCIICase(a.characters8(), b.characters16(), length); } if (b.is8Bit()) return equalIgnoringASCIICase(a.characters16(), b.characters8(), length); return equalIgnoringASCIICase(a.characters16(), b.characters16(), length); } template bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b) { unsigned length = a.length(); if (length != strlen(b)) return false; if (a.is8Bit()) return equalIgnoringASCIICase(a.characters8(), b, length); return equalIgnoringASCIICase(a.characters16(), b, length); } template bool startsWith(const StringClassA& reference, const StringClassB& prefix) { unsigned prefixLength = prefix.length(); if (prefixLength > reference.length()) return false; if (reference.is8Bit()) { if (prefix.is8Bit()) return equal(reference.characters8(), prefix.characters8(), prefixLength); return equal(reference.characters8(), prefix.characters16(), prefixLength); } if (prefix.is8Bit()) return equal(reference.characters16(), prefix.characters8(), prefixLength); return equal(reference.characters16(), prefix.characters16(), prefixLength); } template bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix) { unsigned prefixLength = prefix.length(); if (prefixLength > reference.length()) return false; if (reference.is8Bit()) { if (prefix.is8Bit()) return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength); return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength); } if (prefix.is8Bit()) return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength); return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength); } template bool endsWith(const StringClassA& reference, const StringClassB& suffix) { unsigned suffixLength = suffix.length(); unsigned referenceLength = reference.length(); if (suffixLength > referenceLength) return false; unsigned startOffset = referenceLength - suffixLength; if (reference.is8Bit()) { if (suffix.is8Bit()) return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength); return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength); } if (suffix.is8Bit()) return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength); return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength); } template bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix) { unsigned suffixLength = suffix.length(); unsigned referenceLength = reference.length(); if (suffixLength > referenceLength) return false; unsigned startOffset = referenceLength - suffixLength; if (reference.is8Bit()) { if (suffix.is8Bit()) return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength); return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength); } if (suffix.is8Bit()) return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength); return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength); } template size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength) { ASSERT(searchLength >= matchLength); const SearchCharacterType* startSearchedCharacters = source + startOffset; // delta is the number of additional times to test; delta == 0 means test only once. unsigned delta = searchLength - matchLength; for (unsigned i = 0; i <= delta; ++i) { if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength)) return startOffset + i; } return notFound; } template size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset) { unsigned sourceStringLength = source.length(); unsigned matchLength = stringToFind.length(); if (!matchLength) return std::min(startOffset, sourceStringLength); // Check startOffset & matchLength are in range. if (startOffset > sourceStringLength) return notFound; unsigned searchLength = sourceStringLength - startOffset; if (matchLength > searchLength) return notFound; if (source.is8Bit()) { if (stringToFind.is8Bit()) return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength); return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength); } if (stringToFind.is8Bit()) return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength); return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength); } template ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) { // Optimization: keep a running hash of the strings, // only call equal() if the hashes match. // delta is the number of additional times to test; delta == 0 means test only once. unsigned delta = searchLength - matchLength; unsigned searchHash = 0; unsigned matchHash = 0; for (unsigned i = 0; i < matchLength; ++i) { searchHash += searchCharacters[i]; matchHash += matchCharacters[i]; } unsigned i = 0; // keep looping until we match while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { if (i == delta) return notFound; searchHash += searchCharacters[i + matchLength]; searchHash -= searchCharacters[i]; ++i; } return index + i; } template inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) { while (index < length) { if (characters[index] == matchCharacter) return index; ++index; } return notFound; } ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) { return find(characters, length, static_cast(matchCharacter), index); } inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) { if (matchCharacter & ~0xFF) return notFound; return find(characters, length, static_cast(matchCharacter), index); } template size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start) { unsigned needleLength = needle.length(); if (needleLength == 1) { if (haystack.is8Bit()) return WTF::find(haystack.characters8(), haystack.length(), needle[0], start); return WTF::find(haystack.characters16(), haystack.length(), needle[0], start); } if (!needleLength) return std::min(start, haystack.length()); if (start > haystack.length()) return notFound; unsigned searchLength = haystack.length() - start; if (needleLength > searchLength) return notFound; if (haystack.is8Bit()) { if (needle.is8Bit()) return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength); return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength); } if (needle.is8Bit()) return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength); return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength); } // This is marked inline since it's mostly used in non-inline functions for each string type. // When used directly in code it's probably OK to be inline; maybe the loop will be unrolled. template inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length) { for (unsigned i = 0; i < length; ++i) { if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i])) return false; } return true; } template inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]) { ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1); unsigned lowercaseLettersStringLength = lowercaseLettersLength - 1; return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength); } template bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length) { #if !ASSERT_DISABLED ASSERT(*lowercaseLetters); for (const char* letter = lowercaseLetters; *letter; ++letter) ASSERT(toASCIILowerUnchecked(*letter) == *letter); #endif ASSERT(string.length() >= length); if (string.is8Bit()) return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length); return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length); } // This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere. template bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) { unsigned length = string.length(); if (length != strlen(lowercaseLetters)) return false; return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length); } template bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) { size_t prefixLength = strlen(lowercaseLetters); if (!prefixLength) return true; if (string.length() < prefixLength) return false; return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength); } template inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) { // Don't actually use the length; we are choosing code size over speed. ASSERT(strlen(lowercaseLetters) == length - 1); const char* pointer = lowercaseLetters; return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); } template inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) { const char* pointer = lowercaseLetters; return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); } } using WTF::equalIgnoringASCIICase; using WTF::equalLettersIgnoringASCIICase; #endif // StringCommon_h