/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include #include #include #include #include "gtest/gtest.h" #include "hs.h" #include "test_util.h" using namespace std; using namespace testing; namespace /* anonymous */ { // Dummy callback: does nothing, returns 0 (keep matching) int dummyHandler(unsigned, unsigned long long, unsigned long long, unsigned, void *) { // empty return 0; } unsigned lastMatchId = 0; unsigned long long lastMatchFrom = 0; unsigned long long lastMatchTo = 0; unsigned lastMatchFlags = 0; void *lastMatchCtx = nullptr; // Single match Callback: record all the details from a single match int singleHandler(unsigned id, unsigned long long from, unsigned long long to, unsigned flags, void *ctx) { lastMatchId = id; lastMatchFrom = from; lastMatchTo = to; lastMatchFlags = flags; lastMatchCtx = ctx; return 0; } unsigned matchCount = 0; // Counter callback: just counts the number of calls int countHandler(unsigned, unsigned long long, unsigned long long, unsigned, void *) { matchCount++; return 0; } // Stop handler: take one match and tell Hyperscan to not deliver any more. int stopHandler(unsigned, unsigned long long, unsigned long long, unsigned, void *) { matchCount++; return 1; } // Can we scan 5GB of data (pushing past the uint32_t limit) TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) { hs_error_t err; const size_t datalen = 1024 * 1024; size_t megabytes = 5 * 1024; vector data(datalen, 'X'); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; err = hs_compile("hatstand.*teakettle.*badgerbrush", HS_FLAG_DOTALL, HS_MODE_STREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); // streaming mode scan of our megabyte of data 5K times hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); while (megabytes-- > 0) { err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch, dummyHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); } err = hs_close_stream(stream, scratch, dummyHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } struct HugeScanMatchingData { const char *pattern; unsigned int flags; const char *preBlock; // scan before 5GB of X's const char *postBlock; // scan after 5GB of X's, single match at end }; class HyperscanScanGigabytesMatch : public TestWithParam { }; // Can we scan 5GB of data (pushing past the uint32_t limit) and get a real match TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) { const HugeScanMatchingData ¶ms = GetParam(); SCOPED_TRACE(params.pattern); hs_error_t err; const size_t datalen = 1024*1024; vector data(datalen, 'X'); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; err = hs_compile(params.pattern, params.flags, HS_MODE_STREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); // gb is the number of gigabytes to scan between pre-block and post-block // run over 1,2,4,8 gb #ifdef NDEBUG for (unsigned long long gb = 1; gb <= 8; gb *= 2) { #else for (unsigned long long gb = 1; gb <= 2; gb *= 2) { #endif SCOPED_TRACE(gb); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); // scan our pre-block lastMatchTo = 0; err = hs_scan_stream(stream, params.preBlock, strlen(params.preBlock), 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(0ULL, lastMatchTo); // streaming mode scan of our megabyte of data gb*1024 times unsigned long remaining = gb * 1024; while (remaining-- > 0) { err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(0ULL, lastMatchTo); } // scan post-block and close stream err = hs_scan_stream(stream, params.postBlock, strlen(params.postBlock), 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); err = hs_close_stream(stream, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); // make sure we got our match unsigned long long totalLen = strlen(params.preBlock) + gb * 1024 * datalen + strlen(params.postBlock); ASSERT_EQ(totalLen, lastMatchTo); } // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } // Helper function to actually perform scans for BlockMatch test below void allocAndScanBlock(const HugeScanMatchingData ¶ms, const hs_database_t *db, hs_scratch_t *scratch, unsigned int len) { SCOPED_TRACE(len); char *data = (char *)calloc(len, 1); if (data == nullptr) { printf("Can't allocate %u bytes, skipping test\n", len); return; } // write pre-block at the start and post-block at the end memcpy(data, params.preBlock, strlen(params.preBlock)); memcpy(data+len - strlen(params.postBlock), params.postBlock, strlen(params.postBlock)); lastMatchTo = 0; hs_error_t err; err = hs_scan(db, data, len, 0, scratch, singleHandler, nullptr); free(data); ASSERT_EQ(HS_SUCCESS, err); // make sure we got our match ASSERT_EQ(len, lastMatchTo); } // Define to run even more huge block tests //#define BIG_BLOCKS // Can we scan gigabytes of data in a block and get a real match TEST_P(HyperscanScanGigabytesMatch, BlockMatch) { const HugeScanMatchingData ¶ms = GetParam(); SCOPED_TRACE(params.pattern); hs_error_t err; // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; err = hs_compile(params.pattern, params.flags, HS_MODE_BLOCK, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); // try various data sizes (in kilobytes) size_t blockSizes[] = { // kilobytes 1, 4, 8, 16, 32, 64, 128, 256, 512, // megabytes 1*1024, #ifdef BIG_BLOCKS 4*1024, 32*1024, 128*1024, 512*1024, #ifdef NDEBUG // gigabytes 1024*1024, // big cases for big beefy machines 2048*1024, 3072*1024 #endif // NDEBUG #endif // BIG_BLOCKS }; for (const auto size : blockSizes) { size_t bytes = size * 1024; // The block API only accepts blocks up to UINT_MAX in size. ASSERT_GE(UINT_MAX, bytes); ASSERT_GE(UINT_MAX, bytes + strlen(params.postBlock)); // First test: block size exactly. allocAndScanBlock(params, db, scratch, bytes); // Second test: block size plus four bytes, to make our post-block // straddle the block size boundary. allocAndScanBlock(params, db, scratch, bytes + 4); // Third test: block size plus the length of post-block. allocAndScanBlock(params, db, scratch, bytes + strlen(params.postBlock)); } // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } static const HugeScanMatchingData gigTests[] = { { "foobar", 0, "flibble", "foobar" }, { "longliteralislongerthanlong", 0, "precursor", "longliteralislongerthanlong" }, { "foobar\\z", 0, "flibble", "foobar" }, { "hatstand.*teakettle.*badgerbrush", HS_FLAG_DOTALL, "hatstand teakettle", "_badgerbrush" }, { "hatstand.*teakettle.*badgerbrush\\z", HS_FLAG_DOTALL, "hatstand teakettle", "_badgerbrush" }, { "a.*(([0123][56789]){3,6}|flibble|xyz{1,2}y)", 0, "a", "051629" }, { "^a.*(([0123][56789]){3,6}|flibble|xyz{1,2}y)", 0, "a", "051629" }, { "(badger.*){3,}mushroom.*mushroom", HS_FLAG_DOTALL, "badger badger badger", "mushroom! mushroom" }, { "(badger.*){3,}mushroom.*mushroom$", HS_FLAG_DOTALL, "badger badger badger", "mushroom! mushroom" }, { "foo[^X]{16}", HS_FLAG_SINGLEMATCH, "preblock", "foo0123456789abcdef" }, { "foobar.*bazbaz[^X]{16}", HS_FLAG_DOTALL|HS_FLAG_SINGLEMATCH, "foobar", "bazbaz0123456789abcdef" }, { "t?((w.(u|t|.)){6,10}|[dqnt]|e|va)", 0, "~~~~", "tva" }, { "HTTP.*foobar.*blah", HS_FLAG_DOTALL, "bing", "HTTP foobar blah" }, }; INSTANTIATE_TEST_CASE_P(HyperscanTestBehaviour, HyperscanScanGigabytesMatch, ValuesIn(gigTests)); // Do we actually only get a single match back from HS_FLAG_SINGLEMATCH? TEST(HyperscanTestBehaviour, StreamingThereCanBeOnlyOne) { hs_error_t err; const std::string data1("hackhackHACKhackHACkhAcK"); const std::string data2("KHACKhackHACKhaCkhAcKhaaaaaaaaaaaaaaaaaaaackk"); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; err = hs_compile(".ck", HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH, HS_MODE_STREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; err = hs_scan_stream(stream, data1.c_str(), data1.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // only one match err = hs_scan_stream(stream, data2.c_str(), data2.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // just the one from the first call above err = hs_close_stream(stream, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } // Do we actually only get a single match back from HS_FLAG_SINGLEMATCH? TEST(HyperscanTestBehaviour, BlockThereCanBeOnlyOne) { hs_error_t err; const std::string data1("hackhackHACKhackHACkhAcKzofunvuynlslijlnikshvb ar yhtkubq45ytvb iuyh " "ackeruniou viytdfjhg nvldkrjgnal"); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; err = hs_compile(".ck", HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH, HS_MODE_BLOCK, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data1.c_str(), data1.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // only one match // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } class HyperscanLiteralLengthTest : public TestWithParam { protected: virtual void SetUp() { literal_len = GetParam(); } size_t literal_len; }; TEST_P(HyperscanLiteralLengthTest, FloatingBlock) { SCOPED_TRACE(literal_len); const string pattern(literal_len, 'a'); const string data(literal_len + 4, 'a'); SCOPED_TRACE(pattern); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; err = hs_compile(pattern.c_str(), 0, HS_MODE_BLOCK, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(5U, matchCount); // five matches // Should see no match from five bytes in matchCount = 0; err = hs_scan(db, data.c_str() + 5, data.size() - 5, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // no matches // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST_P(HyperscanLiteralLengthTest, AnchoredBlock) { SCOPED_TRACE(literal_len); const string pattern = "^" + string(literal_len, 'a'); const string data(literal_len + 4, 'a'); SCOPED_TRACE(pattern); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; err = hs_compile(pattern.c_str(), 0, HS_MODE_BLOCK, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // one match // Should see no match from five bytes in matchCount = 0; err = hs_scan(db, data.c_str() + 5, data.size() - 5, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // no matches // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } INSTANTIATE_TEST_CASE_P(HyperscanLiteralLength, HyperscanLiteralLengthTest, Values(1, 2, 3, 4, 8, 16, 17, 32, 100, 200, 400, 1000, 4096, 8192, 15000, 15999)); struct CallbackStopData { const char *pattern; unsigned int flags; const char *corpus; }; class CallbackReturnStop : public TestWithParam { }; TEST_P(CallbackReturnStop, Block) { const CallbackStopData ¶ms = GetParam(); SCOPED_TRACE(params.pattern); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; err = hs_compile(params.pattern, params.flags, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, params.corpus, strlen(params.corpus), 0, scratch, stopHandler, nullptr); ASSERT_EQ(1U, matchCount) << "One match exactly should be recorded."; ASSERT_EQ(HS_SCAN_TERMINATED, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST_P(CallbackReturnStop, Streaming) { const CallbackStopData ¶ms = GetParam(); SCOPED_TRACE(params.pattern); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; err = hs_compile(params.pattern, params.flags, HS_MODE_STREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; err = hs_scan_stream(stream, params.corpus, strlen(params.corpus), 0, scratch, stopHandler, nullptr); ASSERT_EQ(1U, matchCount) << "One match exactly should be recorded."; ASSERT_EQ(HS_SCAN_TERMINATED, err); err = hs_close_stream(stream, scratch, stopHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST_P(CallbackReturnStop, Vectored) { const CallbackStopData ¶ms = GetParam(); SCOPED_TRACE(params.pattern); // build a database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; err = hs_compile(params.pattern, params.flags, HS_MODE_VECTORED, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; const char *data[] = { params.corpus }; unsigned int len[] = { (unsigned int)strlen(params.corpus) }; err = hs_scan_vector(db, data, len, 1, 0, scratch, stopHandler, nullptr); ASSERT_EQ(1U, matchCount) << "One match exactly should be recorded."; ASSERT_EQ(HS_SCAN_TERMINATED, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } static const CallbackStopData callbackStopTests[] = { { "foobar", 0, "xxxfoobarxxxfoobarxxxfoobar" }, { "a", 0, "xxxaaaaaaaaaaaaaaaaaaa" }, { "a", HS_FLAG_CASELESS, "xxxAaAaAaAa" }, { ".", 0, "acbdef" }, { ".", HS_FLAG_DOTALL, "abcdef" }, { "foo.*bar.*foo", HS_FLAG_DOTALL, "foobarfoobarfoobarfoobarfoo" }, { "(badger.*){3,}", HS_FLAG_DOTALL, "badger badger badger badger badger badger badger badger" }, { "foobar.*", 0, "foobarxxx" }, { "[012]{3,7}.*abba", 0, "0120120120abbaabba" }, }; INSTANTIATE_TEST_CASE_P(HyperscanTestBehaviour, CallbackReturnStop, ValuesIn(callbackStopTests)); // Serialization, then deserialization: A simple case, with just a single // trivial pattern. TEST(HyperscanTestBehaviour, SerializedDogfood1) { // build database hs_database_t *db = buildDB("puppy treats!", 0, 0, HS_MODE_BLOCK); ASSERT_TRUE(db != nullptr); hs_error_t err; size_t origSize; err = hs_database_size(db, &origSize); ASSERT_EQ(HS_SUCCESS, err); // serialise to buffer char *bytes = nullptr; size_t length = 0; err = hs_serialize_database(db, &bytes, &length); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(bytes != nullptr); ASSERT_LT(0U, length); // free original database hs_free_database(db); db = nullptr; // deserialize hs_database_t *db2 = nullptr; err = hs_deserialize_database(bytes, length, &db2); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db2 != nullptr); free(bytes); // tests size_t newSize; err = hs_database_size(db2, &newSize); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(newSize, origSize) << "Database lengths should match."; // check that we can get a match out of it lastMatchTo = 0; const char *data = "delicious puppy treats!"; unsigned int len = strlen(data); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db2, &scratch); ASSERT_EQ(HS_SUCCESS, err); err = hs_scan(db2, data, len, 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len, lastMatchTo); err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } // A more complicated case, with several patterns TEST(HyperscanTestBehaviour, SerializedDogfood2) { // build database hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; static const vector expressions { "puppy treats!", "foobar", "foo.*bar.*foo", "(badger.*){3,}", "[012]{3,7}.*abba" }; static const vector flags { 0, 0, HS_FLAG_DOTALL, HS_FLAG_DOTALL, 0 }; static const vector ids { 1000, 1001, 1002, 1003, 1004 }; size_t num = expressions.size(); ASSERT_EQ(num, flags.size()); ASSERT_EQ(num, ids.size()); err = hs_compile_multi(expressions.data(), flags.data(), ids.data(), num, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); ASSERT_EQ(HS_SUCCESS, err); size_t origSize; err = hs_database_size(db, &origSize); ASSERT_EQ(HS_SUCCESS, err); // serialise to buffer char *bytes = nullptr; size_t length = 0; err = hs_serialize_database(db, &bytes, &length); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(bytes != nullptr); ASSERT_LT(0U, length); // free original database hs_free_database(db); db = nullptr; // deserialize hs_database_t *db2 = nullptr; err = hs_deserialize_database(bytes, length, &db2); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db2 != nullptr); free(bytes); // tests size_t newSize; err = hs_database_size(db2, &newSize); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(newSize, origSize) << "Database lengths should match."; // check that we can get a match out of it lastMatchTo = 0; lastMatchId = 0; const char *data = "delicious puppy treats!"; unsigned int len = strlen(data); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db2, &scratch); ASSERT_EQ(HS_SUCCESS, err); err = hs_scan(db2, data, len, 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len, lastMatchTo); ASSERT_EQ(ids[0], lastMatchId); // check another pattern const char *data2 = "badger badger badger"; unsigned int len2 = strlen(data2); err = hs_scan(db2, data2, len2, 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len2, lastMatchTo); ASSERT_EQ(ids[3], lastMatchId); err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } // Serialization, then deserialization into a preallocated memory region. A // simple case, with just a single trivial pattern. TEST(HyperscanTestBehaviour, SerializedDogfood3) { // build database hs_database_t *db = buildDB("puppy treats!", 0, 0, HS_MODE_BLOCK); ASSERT_TRUE(db != nullptr); hs_error_t err; size_t origSize; err = hs_database_size(db, &origSize); ASSERT_EQ(HS_SUCCESS, err); // serialise to buffer char *bytes = nullptr; size_t length = 0; err = hs_serialize_database(db, &bytes, &length); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(bytes != nullptr); ASSERT_LT(0U, length); // free original database hs_free_database(db); db = nullptr; // find out how much space we need size_t slength; err = hs_serialized_database_size(bytes, length, &slength); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(slength, origSize) << "Database lengths should match."; // fail tests: try to deserialize into an incorrectly aligned region. char *mem = (char *)malloc(slength + 16); ASSERT_TRUE(mem != nullptr); ASSERT_EQ(0U, (ptrdiff_t)mem % 8); // add one for epic fail err = hs_deserialize_database_at(bytes, length, (hs_database_t *)(mem + 1)); ASSERT_EQ(HS_BAD_ALIGN, err); // add four, which should give us 4-byte alignment which isn't good enough. err = hs_deserialize_database_at(bytes, length, (hs_database_t *)(mem + 4)); ASSERT_EQ(HS_BAD_ALIGN, err); // deserialize for real err = hs_deserialize_database_at(bytes, length, (hs_database_t *)mem); ASSERT_EQ(HS_SUCCESS, err); free(bytes); const hs_database_t *db2 = (const hs_database_t *)mem; // tests size_t newSize; err = hs_database_size(db2, &newSize); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(newSize, origSize) << "Database lengths should match."; // check that we can get a match out of it lastMatchTo = 0; const char *data = "delicious puppy treats!"; unsigned int len = strlen(data); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db2, &scratch); ASSERT_EQ(HS_SUCCESS, err); err = hs_scan(db2, data, len, 0, scratch, singleHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len, lastMatchTo); err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); free(mem); } // Test that we do get a pending match on stream close. TEST(HyperscanTestBehaviour, CloseStreamMatch) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo.*bar$", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; const string data("foo bar"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // hasn't matched until stream end err = hs_close_stream(stream, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // our match was returned // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } /* Test that if we scan without a callback during main phase that we are still * tracking stuff internally */ TEST(HyperscanTestBehaviour, NoMainCB) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo|foo.*bar$", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; const string data("foo bar"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // hasn't matched until stream end err = hs_close_stream(stream, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // our match was returned // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } // Test that we don't get a match that's pending on stream close when we use // hs_close_stream with no event handler. TEST(HyperscanTestBehaviour, CloseStreamNoMatch) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo.*bar$", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; const string data("foo bar"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // hasn't matched until stream end err = hs_close_stream(stream, scratch, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // no match was returned // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } // Test that we can close a stream correctly after we've terminated matching. TEST(HyperscanTestBehaviour, CloseStreamAfterTermination) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo.*(bar|baz\\z)", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); matchCount = 0; const string data("foo bar baz"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, stopHandler, nullptr); ASSERT_EQ(HS_SCAN_TERMINATED, err); EXPECT_EQ(1U, matchCount); // after "bar" err = hs_close_stream(stream, scratch, stopHandler, nullptr); EXPECT_EQ(1U, matchCount); // no match for baz ASSERT_EQ(HS_SUCCESS, err); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored1) { hs_error_t err; // build a database hs_database_t *db = buildDB("^foo.*bar", HS_FLAG_DOTALL, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "foo", " ", "bar" }; unsigned int len[] = { 3, 3, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 3, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored2) { hs_error_t err; // build a database hs_database_t *db = buildDB("^foo.*bar", HS_FLAG_DOTALL, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "", "foo", " ", "bar" }; unsigned int len[] = { 0, 3, 3, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 4, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored3) { hs_error_t err; // build a database hs_database_t *db = buildDB("^foo.*bar", HS_FLAG_DOTALL, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "foo", " ", "", "bar" }; unsigned int len[] = { 3, 3, 0, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 4, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored4) { hs_error_t err; // build a database hs_database_t *db = buildDB("^foo.*bar", HS_FLAG_DOTALL, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "foo", " ", "bar", "" }; unsigned int len[] = { 3, 3, 3, 0}; matchCount = 0; err = hs_scan_vector(db, data, len, 4, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored5) { hs_error_t err; // build a database hs_database_t *db = buildDB("^foo.*bar", HS_FLAG_DOTALL, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "", "foo", " ", "bar" }; unsigned int len[] = { 0, 3, 3, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 0, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored6) { hs_error_t err; // build a database hs_database_t *db = buildDB("^", HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "", "foo", " ", "bar" }; unsigned int len[] = { 0, 3, 3, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 0, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, Vectored7) { hs_error_t err; // build a database hs_database_t *db = buildDB("$", HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, 0, HS_MODE_VECTORED); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); const char *data[] = { "", "foo", " ", "bar" }; unsigned int len[] = { 0, 3, 3, 3}; matchCount = 0; err = hs_scan_vector(db, data, len, 0, 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, MultiStream1) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo.*bar.*\\b", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); hs_stream_t *stream2 = nullptr; err = hs_open_stream(db, 0, &stream2); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream2 != nullptr); matchCount = 0; const string data("foo bara"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // hasn't matched until stream end const string data2("foo bar "); err = hs_scan_stream(stream2, data2.c_str(), data2.size(), 0, scratch, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); err = hs_close_stream(stream, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); err = hs_close_stream(stream2, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(HyperscanTestBehaviour, MultiStream2) { hs_error_t err; // build a database hs_database_t *db = buildDB("foo.*bar.*\\b", 0, 0, HS_MODE_STREAM); ASSERT_TRUE(db != nullptr); // alloc some scratch hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); hs_stream_t *stream = nullptr; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); hs_stream_t *stream2 = nullptr; err = hs_open_stream(db, 0, &stream2); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream2 != nullptr); matchCount = 0; const string data2("foo bar "); err = hs_scan_stream(stream2, data2.c_str(), data2.size(), 0, scratch, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); const string data("foo bara"); err = hs_scan_stream(stream, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); // hasn't matched until stream end err = hs_close_stream(stream2, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0U, matchCount); err = hs_close_stream(stream, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1U, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(regression, UE_1005) { hs_error_t err; vector patterns; patterns.push_back(pattern("match[^Z]*", HS_FLAG_DOTALL|HS_FLAG_SINGLEMATCH, 1)); patterns.push_back(pattern("[^X]+\\z", HS_FLAG_DOTALL|HS_FLAG_SINGLEMATCH, 2)); patterns.push_back(pattern("[^Y]+\\z", HS_FLAG_DOTALL|HS_FLAG_SINGLEMATCH, 3)); hs_database_t *db = buildDB(patterns, HS_MODE_STREAM); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); hs_stream_t *stream = nullptr; CallBackContext c; const char dataA[] = "match"; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(stream != nullptr); err = hs_scan_stream(stream, dataA, strlen(dataA), 0, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); err = hs_close_stream(stream, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(3U, c.matches.size()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(5, 1)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(5, 2)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(5, 3)) != c.matches.end()); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(regression, UE_2425) { const char regex[] = "(b|[cd](\\B|a){14}|[ba]cd.[^ece]b.[da]cbe|d[cad]cb.[da](cd|[abedc])|\\ba.edbac){18}"; unsigned flags = HS_FLAG_DOTALL | HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8 | HS_FLAG_PREFILTER; vector patterns; patterns.push_back(pattern(regex, flags, 1)); // This bug was an assertion failure at Puff construction time (Puff // generated by prefilter reductions.) hs_database_t *db = buildDB(patterns, HS_MODE_STREAM); ASSERT_NE(nullptr, db); hs_free_database(db); } #ifdef NDEBUG TEST(regression, UE_2485) { const char regex[] = "(?:(.EeEa|((a{2}BD[bc]Bd[eae]|[DCd]|c|ebCa|d)){7,21})(E{5,}A{4,}[Cc].cc{3,6}|eCec|e+CaBEd|[Bb])){10}DB(a|[AAda])..A?DE?E"; unsigned flags = HS_FLAG_DOTALL | HS_FLAG_CASELESS | HS_FLAG_UTF8 | HS_FLAG_PREFILTER; vector patterns; patterns.push_back(pattern(regex, flags, 1)); // This bug was an assertion failure at compile time due to // removeRedundancy rendering a graph unimplementable after a merge, for // the prefiltered pattern. hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_free_database(db); } #endif TEST(regression, UE_2452) { const char regex[] = "/ab.b[bca]{2,}ca((?:c|(abc(?sxmi-xm)){10,14}|c|b|[abcb])){4,23}acbcbb*ba((?:(a|.{4,}|.|[acba])){3,16}a)+"; unsigned flags = HS_FLAG_MULTILINE | HS_FLAG_CASELESS | HS_FLAG_UTF8 | HS_FLAG_UCP | HS_FLAG_PREFILTER; vector patterns; patterns.push_back(pattern(regex, flags, 1)); // This bug was an interaction between bounded repeat analysis, // prefiltering and edge redundancy. hs_database_t *db = buildDB(patterns, HS_MODE_STREAM); ASSERT_NE(nullptr, db); hs_free_database(db); } #ifdef NDEBUG TEST(regression, UE_2595) { const char regex[] = "(?:(?:acAa|c[EAA]aEb|((?:CC[bdd].cE((?x-msix)BE){32}(?:\\B)){16,19}CdD.E(E|E|B)){3,6}|E(a|d|.)(?:(?xs-isxm)|b|.|C))){17,}"; unsigned flags = HS_FLAG_MULTILINE | HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH | HS_FLAG_PREFILTER; vector patterns; patterns.push_back(pattern(regex, flags, 1)); // This pattern has some very large depth values and isn't compilable by // PCRE at all. hs_database_t *db = buildDB(patterns, HS_MODE_STREAM); ASSERT_NE(nullptr, db); hs_free_database(db); } #endif TEST(regression, UE_2762) { const vector patterns = { pattern("\\Aa\\z", HS_FLAG_MULTILINE, 1), pattern("^a", HS_FLAG_MULTILINE, 2), pattern("a|^a", HS_FLAG_MULTILINE | HS_FLAG_SOM_LEFTMOST, 3)}; hs_database_t *db = buildDB(patterns, HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; hs_error_t err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); hs_stream_t *stream = nullptr; CallBackContext c; const char dataA[] = "a"; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_NE(nullptr, stream); err = hs_scan_stream(stream, dataA, strlen(dataA), 0, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); err = hs_close_stream(stream, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(3U, c.matches.size()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(1, 1)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(1, 2)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(1, 3)) != c.matches.end()); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(regression, UE_2763) { const vector patterns = { pattern("aaa.a+$", HS_FLAG_CASELESS, 1), pattern("aaa.a", HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8, 2)}; hs_database_t *db = buildDB(patterns, HS_MODE_STREAM); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; hs_error_t err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); hs_stream_t *stream = nullptr; CallBackContext c; const char dataA[] = "aAasaA"; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_NE(nullptr, stream); err = hs_scan_stream(stream, dataA, strlen(dataA), 0, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); err = hs_close_stream(stream, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(2U, c.matches.size()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(6, 1)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(5, 2)) != c.matches.end()); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(regression, UE_2798) { const vector patterns = { pattern("([ab]b|aab+)$", HS_FLAG_DOTALL, 1), pattern("ab+", HS_FLAG_SOM_LEFTMOST, 2), pattern("a(b.)?ba+b", 0, 3)}; hs_database_t *db = buildDB(patterns, HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; hs_error_t err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); hs_stream_t *stream = nullptr; CallBackContext c; const char dataA[] = "ab_baab\n"; err = hs_open_stream(db, 0, &stream); ASSERT_EQ(HS_SUCCESS, err); ASSERT_NE(nullptr, stream); err = hs_scan_stream(stream, dataA, strlen(dataA), 0, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); err = hs_close_stream(stream, scratch, record_cb, (void *)&c); ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(4U, c.matches.size()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(7, 1)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(2, 2)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(7, 2)) != c.matches.end()); ASSERT_TRUE(find(c.matches.begin(), c.matches.end(), MatchRecord(7, 3)) != c.matches.end()); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(PcreSpace, NewPcre) { const char regex[] = "\\s"; const string data = "\x09\x0a\x0b\x0c\x0d\x20"; /* aka "\t\n\v\f\r " */ unsigned flags = 0; vector patterns; patterns.push_back(pattern(regex, flags, 1)); hs_error_t err; hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(data.size(), matchCount); // all are spaces // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(PcreSpace, NewPcreClass) { const char regex[] = "[\\s]"; const string data = "\x09\x0a\x0b\x0c\x0d\x20"; /* aka "\t\n\v\f\r " */ unsigned flags = 0; vector patterns; patterns.push_back(pattern(regex, flags, 1)); hs_error_t err; hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(data.size(), matchCount); // all are spaces // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(PcreSpace, NewPcreNeg) { const char regex[] = "\\S"; const string data = "\x09\x0a\x0b\x0c\x0d\x20"; /* aka "\t\n\v\f\r " */ unsigned flags = 0; vector patterns; patterns.push_back(pattern(regex, flags, 1)); hs_error_t err; hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0, matchCount); // no matches, all are spaces // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(PcreSpace, NewPcreClassNeg) { const char regex[] = "[\\S]"; const string data = "\x09\x0a\x0b\x0c\x0d\x20"; /* aka "\t\n\v\f\r " */ unsigned flags = 0; vector patterns; patterns.push_back(pattern(regex, flags, 1)); hs_error_t err; hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(0, matchCount); // no matches, all are spaces // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } TEST(Parser, NewlineTerminatedComment) { // Extended mode comments and whitespace should be stripped. const vector patterns = { pattern("(?x)foo # initial comment \n bar # second comment \n baz", 0, 1)}; const string data = "foobarbaz"; hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK); ASSERT_NE(nullptr, db); hs_scratch_t *scratch = nullptr; hs_error_t err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(scratch != nullptr); matchCount = 0; err = hs_scan(db, data.c_str(), data.size(), 0, scratch, countHandler, nullptr); ASSERT_EQ(HS_SUCCESS, err); EXPECT_EQ(1, matchCount); // teardown err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } } // namespace