/* Copyright (c) 2020 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX clmul #include #include static int test_simde_x_clmul_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a; const uint64_t b; const uint64_t r; } test_vec[] = { { UINT64_C( 3172393302982392208), UINT64_C(13735374816641287390), UINT64_C( 3846784924301700320) }, { UINT64_C( 2044753197648351232), UINT64_C(17171186745849913133), UINT64_C( 9324690142177808384) }, { UINT64_C(15878177146981999432), UINT64_C(17725848129279761057), UINT64_C(14704261962833067592) }, { UINT64_C(14248111140186106732), UINT64_C(11051947085071581716), UINT64_C( 2350367993186272112) }, { UINT64_C( 3864970927616292810), UINT64_C( 3037243358930395708), UINT64_C( 4601354589070078104) }, { UINT64_C(14194084542956518303), UINT64_C( 5148061316303689350), UINT64_C(18148658662839280834) }, { UINT64_C(10905099652190648717), UINT64_C(16104009427115953300), UINT64_C( 6274968844062237796) }, { UINT64_C( 5809100127999444266), UINT64_C(17936880103521309735), UINT64_C( 9053755385840400022) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_x_clmul_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = simde_test_codegen_random_u64(); uint64_t r = simde_x_clmul_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 1223702322309085129), -INT64_C( 8879567886794638539) }, { INT64_C( 7036755598991405826), -INT64_C( 7046740992093250117) }, { INT64_C( 2473170614696794478), INT64_C( 2784948867630424147) } }, { { -INT64_C( 1917888373892700613), INT64_C( 8617134813468493061) }, { INT64_C( 304687083854804713), -INT64_C( 9179921235781725292) }, { -INT64_C( 3162169552144031667), INT64_C( 131895041431953025) } }, { { INT64_C( 3393838533477392516), -INT64_C( 6248506160320443813) }, { INT64_C( 2728261705044924814), -INT64_C( 1597447023443835378) }, { -INT64_C( 3677024356011843784), INT64_C( 1792451366425607224) } }, { { INT64_C( 8787708278673885338), -INT64_C( 7121999645218556224) }, { INT64_C( 8458129975209667500), -INT64_C( 2907745100206865833) }, { INT64_C( 4051687636460756032), INT64_C( 7169855552534900382) } }, { { -INT64_C( 4972660111729507483), -INT64_C( 8759108909232666683) }, { -INT64_C( 8220718353488812785), -INT64_C( 5279645577665465434) }, { INT64_C( 4586024153164530195), INT64_C( 6527978219423536898) } }, { { -INT64_C( 6168462655054260815), INT64_C( 1360068410154590078) }, { INT64_C( 7897188145811727061), -INT64_C( 9138636182688571098) }, { INT64_C( 8359736955429931494), INT64_C( 444600807865829376) } }, { { INT64_C( 2157102855485155168), INT64_C( 3622676809066638896) }, { INT64_C( 7434551563239935736), -INT64_C( 1799456849400005194) }, { INT64_C( 7717179827338582336), INT64_C( 747598803828444825) } }, { { -INT64_C( 851711651099167885), -INT64_C( 1001364843725806454) }, { -INT64_C( 2024969269964351683), INT64_C( 4812501714981208922) }, { INT64_C( 3859857334761625956), INT64_C( 4404639048340928077) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r; switch(i & 3) { case 0: r = simde_mm_clmulepi64_si128(a, b, 0); break; case 1: r = simde_mm_clmulepi64_si128(a, b, 1); break; case 2: r = simde_mm_clmulepi64_si128(a, b, 16); break; case 3: r = simde_mm_clmulepi64_si128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r; switch(i & 3) { case 0: r = simde_mm_clmulepi64_si128(a, b, 0); break; case 1: r = simde_mm_clmulepi64_si128(a, b, 1); break; case 2: r = simde_mm_clmulepi64_si128(a, b, 16); break; case 3: r = simde_mm_clmulepi64_si128(a, b, 17); break; } simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 8861067646028006915), INT64_C( 8317570772367584624), -INT64_C( 8479226455593734885), INT64_C( 4655311630250670693) }, { -INT64_C( 24828561109850826), -INT64_C( 2740442853538738953), INT64_C( 6245039060616885418), -INT64_C( 5596787430870122544) }, { -INT64_C( 3605505260195947138), INT64_C( 8993753863940455899), -INT64_C( 4734815501727457266), INT64_C( 2980667195098018359) } }, { { -INT64_C( 2798286907617568867), INT64_C( 7052730161524750920), INT64_C( 562435620224459400), INT64_C( 4673170309893549227) }, { INT64_C( 8305365776004329350), INT64_C( 8893019314550579025), -INT64_C( 4324931084003054661), INT64_C( 797917258246198574) }, { INT64_C( 7257084772150149552), INT64_C( 1360287739278401518), -INT64_C( 6290854813130530827), INT64_C( 3504988492777302678) } }, { { INT64_C( 7283082647590633076), -INT64_C( 6790626808863749036), -INT64_C( 6440553767362054461), -INT64_C( 4381007043721983580) }, { -INT64_C( 5656110618526963961), -INT64_C( 8392532913333670143), INT64_C( 5660748388734737265), -INT64_C( 7521386788528191859) }, { INT64_C( 1518845827335983732), INT64_C( 3548116382125449697), -INT64_C( 3695683971048001449), INT64_C( 6487700039540703498) } }, { { INT64_C( 8601371122160828753), -INT64_C( 8973451863074711772), INT64_C( 7996418085041790208), INT64_C( 7649900638574833409) }, { INT64_C( 3951475540046109929), INT64_C( 1829192730767668173), -INT64_C( 3835451934689210762), -INT64_C( 2328776664548635093) }, { INT64_C( 2331280469638464916), INT64_C( 909250411302452898), -INT64_C( 6456576299602598101), INT64_C( 3116995424408104456) } }, { { INT64_C( 624052268936643937), -INT64_C( 2704049405787911352), INT64_C( 2642477159451540759), -INT64_C( 1307707947028576508) }, { INT64_C( 8439474315985951195), INT64_C( 7499143535494825940), INT64_C( 4515407529790423948), INT64_C( 248844939888759290) }, { INT64_C( 9077383510885573499), INT64_C( 260944091473821904), -INT64_C( 1092040103143313820), INT64_C( 519807910688705558) } }, { { -INT64_C( 5562163387622042580), -INT64_C( 9080947942458940096), INT64_C( 3438692059224424769), INT64_C( 3597296407246467522) }, { -INT64_C( 9183020781049018626), -INT64_C( 5154639738173294080), -INT64_C( 1103123323737113012), -INT64_C( 8136941776144800035) }, { -INT64_C( 7404742307908519552), INT64_C( 4664504263945087872), INT64_C( 4990989065081341336), INT64_C( 1275537234475318560) } }, { { -INT64_C( 392381640637440051), -INT64_C( 746357658714333324), -INT64_C( 6916359408935577627), INT64_C( 8191367829375868557) }, { -INT64_C( 8707415128020215756), INT64_C( 7298673085559019947), INT64_C( 8453956785101390063), -INT64_C( 7030048190026754982) }, { INT64_C( 41632217925301279), INT64_C( 2511585804835601581), -INT64_C( 3040775301231111182), INT64_C( 6698050587281837184) } }, { { -INT64_C( 6416513484366942376), INT64_C( 7995090548861593103), INT64_C( 4855289321808227333), -INT64_C( 8263908277331233721) }, { INT64_C( 2469075490216353186), -INT64_C( 8701335448767611566), INT64_C( 1819126401625972497), -INT64_C( 3059321611736088375) }, { -INT64_C( 7112732884917849042), INT64_C( 3926000826017084906), INT64_C( 606846234052846655), INT64_C( 8057559887502386125) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; switch(i & 3) { case 0: r = simde_mm256_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm256_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm256_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm256_clmulepi64_epi128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r; switch(i & 3) { case 0: r = simde_mm256_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm256_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm256_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm256_clmulepi64_epi128(a, b, 17); break; } simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 4508748162316205256), -INT64_C( 3099372905628098829), INT64_C( 1016139251664777007), INT64_C( 4077612542125204877), -INT64_C( 5671456079578199782), -INT64_C( 2619368007630660594), INT64_C( 1145311247069902226), -INT64_C( 6767389031831375652) }, { -INT64_C( 7502866647642040208), -INT64_C( 4910877245634443694), -INT64_C( 8712927427156658812), -INT64_C( 4787193028275804570), -INT64_C( 8992902091992483055), INT64_C( 3365977461916539858), INT64_C( 2612731813066721039), INT64_C( 3547044437640259119) }, { INT64_C( 7408876882598746752), INT64_C( 7975566437154259906), -INT64_C( 7050998394792826820), INT64_C( 515545422847956525), INT64_C( 480365564844410554), INT64_C( 6443112411602834649), INT64_C( 8842248389601172078), INT64_C( 126356363133991767) } }, { { -INT64_C( 4462225071124306595), -INT64_C( 112462523525131112), INT64_C( 2133857325112992955), -INT64_C( 5840373422947127646), INT64_C( 6101057788694085368), -INT64_C( 7507491933819344413), INT64_C( 2789888179682970027), INT64_C( 2114961351064965467) }, { -INT64_C( 3915684273188333928), -INT64_C( 1040776515624944102), -INT64_C( 6440357149699039500), -INT64_C( 372484428694190190), -INT64_C( 6417076916505879999), -INT64_C( 1410985712041489355), -INT64_C( 3838468523262871862), -INT64_C( 438725240223101812) }, { -INT64_C( 4547041686557986496), INT64_C( 5174280718535161140), -INT64_C( 4052619716788453016), INT64_C( 4691236899805083089), INT64_C( 2621818318605369123), INT64_C( 6491826105943833570), INT64_C( 6742861852404586798), INT64_C( 659489253161718155) } }, { { INT64_C( 7160742299907929903), -INT64_C( 900076532547060322), INT64_C( 8737198701753722943), INT64_C( 5008091942838911439), INT64_C( 5771750718125940872), INT64_C( 9185769016342804318), INT64_C( 1492721472516751952), -INT64_C( 553068161141104649) }, { INT64_C( 1350737615534379964), INT64_C( 6736843039141635263), -INT64_C( 6778390594892172139), INT64_C( 7050670702599309936), INT64_C( 1160958178873018454), INT64_C( 6745141857603269937), -INT64_C( 626802408982891262), INT64_C( 6248657622998252976) }, { -INT64_C( 1487980720625885147), INT64_C( 2020763090915505301), -INT64_C( 3819958634599510576), INT64_C( 1236167621747222011), -INT64_C( 2121305814586519288), INT64_C( 1348880127149281018), INT64_C( 5474740827809109760), INT64_C( 294120293277450556) } }, { { INT64_C( 5015177187211445817), INT64_C( 4132367358880885043), INT64_C( 7409040097932578900), -INT64_C( 7744375673736877521), -INT64_C( 7542483310570706951), INT64_C( 5319011699478746720), INT64_C( 1700661811459962236), -INT64_C( 4426530695618800223) }, { -INT64_C( 4057705442362544204), -INT64_C( 6109362486595664903), -INT64_C( 944497729116152860), -INT64_C( 6027926296062907443), INT64_C( 4853882362087614255), -INT64_C( 3561668997202986057), -INT64_C( 5330150999214821181), -INT64_C( 6535105179831961942) }, { INT64_C( 6360605561114465723), INT64_C( 1874229146988567910), -INT64_C( 7830980124631947605), INT64_C( 6777172427082462138), INT64_C( 6513423505254783264), INT64_C( 3838992877777853549), INT64_C( 3476875994488591594), INT64_C( 8855849316839725126) } }, { { INT64_C( 4492507234734576331), -INT64_C( 5473360150391322286), -INT64_C( 3546918803188636377), -INT64_C( 9087772883149647162), INT64_C( 330715171927825361), -INT64_C( 7341065912809636999), INT64_C( 9143275604432054512), -INT64_C( 2037426853844986750) }, { -INT64_C( 5085177230146187082), -INT64_C( 1613903879242273139), INT64_C( 3918380942342636586), INT64_C( 7794648714629225014), -INT64_C( 4075403372112025736), INT64_C( 131556394091910384), -INT64_C( 1616998204104715867), -INT64_C( 7169627097723110049) }, { -INT64_C( 3555569070185312534), INT64_C( 1929572009916099035), -INT64_C( 6395368968046142570), INT64_C( 1709120932256690080), INT64_C( 2523358017318337272), INT64_C( 247934416460539433), INT64_C( 244155990319815984), INT64_C( 3215872428597678272) } }, { { -INT64_C( 8590961978253224507), -INT64_C( 7840127111964488358), -INT64_C( 4993511927647037930), INT64_C( 664649571826066808), -INT64_C( 5925102877851218524), -INT64_C( 7864392723294439589), -INT64_C( 4955067607878416345), -INT64_C( 125379126724457739) }, { INT64_C( 8295348044106975774), INT64_C( 3694526249633119286), -INT64_C( 6386401476306689696), INT64_C( 7229159512343623164), -INT64_C( 4253663533883617684), -INT64_C( 6111460234302671889), -INT64_C( 3263512319597026211), INT64_C( 2525248044784915809) }, { INT64_C( 8089940058166097068), INT64_C( 4477169705139521861), -INT64_C( 1959136724352530176), INT64_C( 398152220925199002), -INT64_C( 2261768737661003756), INT64_C( 8063510882445894936), INT64_C( 8314094987896017049), INT64_C( 5675994356149893427) } }, { { -INT64_C( 4442063450844700663), -INT64_C( 1637078751344930479), -INT64_C( 6667107275842742831), INT64_C( 2674669911162921283), -INT64_C( 5346874038663002826), INT64_C( 5286550275917777639), INT64_C( 4977438571891807535), -INT64_C( 1563770842758750709) }, { INT64_C( 305612682310190883), INT64_C( 6272622752075124555), INT64_C( 8174837505515387946), INT64_C( 3735986652260643385), -INT64_C( 2806032233167860500), -INT64_C( 8860928630086400377), -INT64_C( 1982670522702245980), -INT64_C( 1938331273838651947) }, { -INT64_C( 5520935947472743149), INT64_C( 4537885097490984960), INT64_C( 8936024623814528617), INT64_C( 2290930245258479633), -INT64_C( 3134810519185361790), INT64_C( 6379269477255759439), INT64_C( 7072358411685850147), INT64_C( 4195183069244768317) } }, { { INT64_C( 6042983852705567862), -INT64_C( 4223521988775085949), INT64_C( 965824450707592408), INT64_C( 6359096840191948897), -INT64_C( 5764827233489571872), -INT64_C( 2047185047042326096), INT64_C( 1462207194453140997), INT64_C( 2972568486031604998) }, { -INT64_C( 8882271837156434977), INT64_C( 4522998559612946646), -INT64_C( 137423594610545839), INT64_C( 1546465536708609539), -INT64_C( 713298132974154814), -INT64_C( 6153153657742856668), -INT64_C( 2396541879086868794), -INT64_C( 8316000295776967604) }, { INT64_C( 6427478446298047098), INT64_C( 1161562776606732481), INT64_C( 4521385688827072163), INT64_C( 334400680930988147), INT64_C( 2179120516870574272), INT64_C( 7717920184672985687), -INT64_C( 7275182691465067096), INT64_C( 1538449054100258674) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r; switch(i & 3) { case 0: r = simde_mm512_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm512_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm512_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm512_clmulepi64_epi128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r; switch(i & 3) { case 0: r = simde_mm512_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm512_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm512_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm512_clmulepi64_epi128(a, b, 17); break; } simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_clmul_u64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_END #include