// Copyright 2024 The Abseil Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/debugging/internal/utf8_for_code_point.h" #include #include "gtest/gtest.h" #include "absl/base/config.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace debugging_internal { namespace { TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) { Utf8ForCodePoint utf8(uint64_t{0}); ASSERT_EQ(utf8.length, 1); EXPECT_EQ(utf8.bytes[0], '\0'); } TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) { Utf8ForCodePoint utf8(uint64_t{'a'}); ASSERT_EQ(utf8.length, 1); EXPECT_EQ(utf8.bytes[0], 'a'); } TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x7f}); ASSERT_EQ(utf8.length, 1); EXPECT_EQ(utf8.bytes[0], '\x7f'); } TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x80}); ASSERT_EQ(utf8.length, 2); EXPECT_EQ(utf8.bytes[0], static_cast(0xc2)); EXPECT_EQ(utf8.bytes[1], static_cast(0x80)); } TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) { Utf8ForCodePoint utf8(uint64_t{0xf1}); ASSERT_EQ(utf8.length, 2); const char* want = "ñ"; EXPECT_EQ(utf8.bytes[0], want[0]); EXPECT_EQ(utf8.bytes[1], want[1]); } TEST(Utf8ForCodePointTest, RecognizesCapitalPi) { Utf8ForCodePoint utf8(uint64_t{0x3a0}); ASSERT_EQ(utf8.length, 2); const char* want = "Π"; EXPECT_EQ(utf8.bytes[0], want[0]); EXPECT_EQ(utf8.bytes[1], want[1]); } TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x7ff}); ASSERT_EQ(utf8.length, 2); EXPECT_EQ(utf8.bytes[0], static_cast(0xdf)); EXPECT_EQ(utf8.bytes[1], static_cast(0xbf)); } TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x800}); ASSERT_EQ(utf8.length, 3); EXPECT_EQ(utf8.bytes[0], static_cast(0xe0)); EXPECT_EQ(utf8.bytes[1], static_cast(0xa0)); EXPECT_EQ(utf8.bytes[2], static_cast(0x80)); } TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) { Utf8ForCodePoint utf8(uint64_t{0x4e2d}); ASSERT_EQ(utf8.length, 3); const char* want = "中"; EXPECT_EQ(utf8.bytes[0], want[0]); EXPECT_EQ(utf8.bytes[1], want[1]); EXPECT_EQ(utf8.bytes[2], want[2]); } TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) { Utf8ForCodePoint utf8(uint64_t{0xd7ff}); ASSERT_EQ(utf8.length, 3); EXPECT_EQ(utf8.bytes[0], static_cast(0xed)); EXPECT_EQ(utf8.bytes[1], static_cast(0x9f)); EXPECT_EQ(utf8.bytes[2], static_cast(0xbf)); } TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) { Utf8ForCodePoint utf8(uint64_t{0xd800}); EXPECT_EQ(utf8.length, 0); } TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) { Utf8ForCodePoint utf8(uint64_t{0xdfff}); EXPECT_EQ(utf8.length, 0); } TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) { Utf8ForCodePoint utf8(uint64_t{0xe000}); ASSERT_EQ(utf8.length, 3); EXPECT_EQ(utf8.bytes[0], static_cast(0xee)); EXPECT_EQ(utf8.bytes[1], static_cast(0x80)); EXPECT_EQ(utf8.bytes[2], static_cast(0x80)); } TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0xffff}); ASSERT_EQ(utf8.length, 3); EXPECT_EQ(utf8.bytes[0], static_cast(0xef)); EXPECT_EQ(utf8.bytes[1], static_cast(0xbf)); EXPECT_EQ(utf8.bytes[2], static_cast(0xbf)); } TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x10000}); ASSERT_EQ(utf8.length, 4); EXPECT_EQ(utf8.bytes[0], static_cast(0xf0)); EXPECT_EQ(utf8.bytes[1], static_cast(0x90)); EXPECT_EQ(utf8.bytes[2], static_cast(0x80)); EXPECT_EQ(utf8.bytes[3], static_cast(0x80)); } TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) { Utf8ForCodePoint utf8(uint64_t{0x1f0bb}); ASSERT_EQ(utf8.length, 4); const char* want = "🂻"; EXPECT_EQ(utf8.bytes[0], want[0]); EXPECT_EQ(utf8.bytes[1], want[1]); EXPECT_EQ(utf8.bytes[2], want[2]); EXPECT_EQ(utf8.bytes[3], want[3]); } TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x10ffff}); ASSERT_EQ(utf8.length, 4); EXPECT_EQ(utf8.bytes[0], static_cast(0xf4)); EXPECT_EQ(utf8.bytes[1], static_cast(0x8f)); EXPECT_EQ(utf8.bytes[2], static_cast(0xbf)); EXPECT_EQ(utf8.bytes[3], static_cast(0xbf)); } TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) { Utf8ForCodePoint utf8(uint64_t{0x110000}); EXPECT_EQ(utf8.length, 0); } TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) { Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000}); EXPECT_EQ(utf8.length, 0); } TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) { EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok()); } TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) { EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok()); } } // namespace } // namespace debugging_internal ABSL_NAMESPACE_END } // namespace absl