/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using pcmpistri to drive it. Does not check the e-vs-i or i-vs-m aspect. */ #include #include #include typedef unsigned int UInt; typedef signed int Int; typedef unsigned char UChar; typedef signed char Char; typedef unsigned long long int ULong; typedef UChar Bool; #define False ((Bool)0) #define True ((Bool)1) //typedef unsigned char V128[16]; typedef union { UChar uChar[16]; UInt uInt[4]; } V128; #define SHIFT_O 11 #define SHIFT_S 7 #define SHIFT_Z 6 #define SHIFT_A 4 #define SHIFT_C 0 #define SHIFT_P 2 #define MASK_O (1ULL << SHIFT_O) #define MASK_S (1ULL << SHIFT_S) #define MASK_Z (1ULL << SHIFT_Z) #define MASK_A (1ULL << SHIFT_A) #define MASK_C (1ULL << SHIFT_C) #define MASK_P (1ULL << SHIFT_P) UInt clz32 ( UInt x ) { Int y, m, n; y = -(x >> 16); m = (y >> 16) & 16; n = 16 - m; x = x >> m; y = x - 0x100; m = (y >> 16) & 8; n = n + m; x = x << m; y = x - 0x1000; m = (y >> 16) & 4; n = n + m; x = x << m; y = x - 0x4000; m = (y >> 16) & 2; n = n + m; x = x << m; y = x >> 14; m = y & ~(y >> 1); return n + 2 - m; } UInt ctz32 ( UInt x ) { return 32 - clz32((~x) & (x-1)); } void expand ( V128* dst, char* summary ) { Int i; assert( strlen(summary) == 16 ); for (i = 0; i < 16; i++) { UChar xx = 0; UChar x = summary[15-i]; if (x >= '0' && x <= '9') { xx = x - '0'; } else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } else assert(0); assert(xx < 16); xx = (xx << 4) | xx; assert(xx < 256); dst->uChar[i] = xx; } } void try_istri ( char* which, UInt(*h_fn)(V128*,V128*), UInt(*s_fn)(V128*,V128*), char* summL, char* summR ) { assert(strlen(which) == 2); V128 argL, argR; expand(&argL, summL); expand(&argR, summR); UInt h_res = h_fn(&argL, &argR); UInt s_res = s_fn(&argL, &argR); printf("istri %s %s %s -> %08x %08x %s\n", which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); } UInt zmask_from_V128 ( V128* arg ) { UInt i, res = 0; for (i = 0; i < 16; i++) { res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; } return res; } ////////////////////////////////////////////////////////// // // // GENERAL // // // ////////////////////////////////////////////////////////// /* Given partial results from a pcmpXstrX operation (intRes1, basically), generate an I format (index value for ECX) output, and also the new OSZACP flags. */ static void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, /*OUT*/UInt* resOSZACP, UInt intRes1, UInt zmaskL, UInt zmaskR, UInt validL, UInt pol, UInt idx ) { assert((pol >> 2) == 0); assert((idx >> 1) == 0); UInt intRes2 = 0; switch (pol) { case 0: intRes2 = intRes1; break; // pol + case 1: intRes2 = ~intRes1; break; // pol - case 2: intRes2 = intRes1; break; // pol m+ case 3: intRes2 = intRes1 ^ validL; break; // pol m- } intRes2 &= 0xFFFF; // generate ecx value UInt newECX = 0; if (idx) { // index of ms-1-bit newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); } else { // index of ls-1-bit newECX = intRes2 == 0 ? 16 : ctz32(intRes2); } *(UInt*)(&resV[0]) = newECX; // generate new flags, common to all ISTRI and ISTRM cases *resOSZACP // A, P are zero = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] } /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} variants. For xSTRI variants, the new ECX value is placed in the 32 bits pointed to by *resV. For xSTRM variants, the result is a 128 bit value and is placed at *resV in the obvious way. For all variants, the new OSZACP value is placed at *resOSZACP. argLV and argRV are the vector args. The caller must prepare a 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this must be 1 for each zero byte of of the respective arg. For ESTRx variants this is derived from the explicit length indication, and must be 0 in all places except at the bit index corresponding to the valid length (0 .. 16). If the valid length is 16 then the mask must be all zeroes. In all cases, bits 31:16 must be zero. imm8 is the original immediate from the instruction. isSTRM indicates whether this is a xSTRM or xSTRI variant, which controls how much of *res is written. If the given imm8 case can be handled, the return value is True. If not, False is returned, and neither *res not *resOSZACP are altered. */ Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, /*OUT*/UInt* resOSZACP, V128* argLV, V128* argRV, UInt zmaskL, UInt zmaskR, UInt imm8, Bool isSTRM ) { assert(imm8 < 0x80); assert((zmaskL >> 16) == 0); assert((zmaskR >> 16) == 0); /* Explicitly reject any imm8 values that haven't been validated, even if they would probably work. Life is too short to have unvalidated cases in the code base. */ switch (imm8) { case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: case 0x10: case 0x12: case 0x14: case 0x18: case 0x1A: case 0x30: case 0x34: case 0x38: case 0x3A: case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A: case 0x62: case 0x70: case 0x72: break; default: return False; } UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask /*----------------------------------------*/ /*-- strcmp on byte data --*/ /*----------------------------------------*/ if (agg == 2/*equal each, aka strcmp*/ && (fmt == 0/*ub*/ || fmt == 2/*sb*/) && !isSTRM) { Int i; UChar* argL = (UChar*)argLV; UChar* argR = (UChar*)argRV; UInt boolResII = 0; for (i = 15; i >= 0; i--) { UChar cL = argL[i]; UChar cR = argR[i]; boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); } UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) // do invalidation, common to all equal-each cases UInt intRes1 = (boolResII & validL & validR) // if both valid, use cmpres | (~ (validL | validR)); // if both invalid, force 1 // else force 0 intRes1 &= 0xFFFF; // generate I-format output pcmpXstrX_WRK_gen_output_fmt_I( resV, resOSZACP, intRes1, zmaskL, zmaskR, validL, pol, idx ); return True; } /*----------------------------------------*/ /*-- set membership on byte data --*/ /*----------------------------------------*/ if (agg == 0/*equal any, aka find chars in a set*/ && (fmt == 0/*ub*/ || fmt == 2/*sb*/) && !isSTRM) { /* argL: the string, argR: charset */ UInt si, ci; UChar* argL = (UChar*)argLV; UChar* argR = (UChar*)argRV; UInt boolRes = 0; UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) for (si = 0; si < 16; si++) { if ((validL & (1 << si)) == 0) // run off the end of the string. break; UInt m = 0; for (ci = 0; ci < 16; ci++) { if ((validR & (1 << ci)) == 0) break; if (argR[ci] == argL[si]) { m = 1; break; } } boolRes |= (m << si); } // boolRes is "pre-invalidated" UInt intRes1 = boolRes & 0xFFFF; // generate I-format output pcmpXstrX_WRK_gen_output_fmt_I( resV, resOSZACP, intRes1, zmaskL, zmaskR, validL, pol, idx ); return True; } /*----------------------------------------*/ /*-- substring search on byte data --*/ /*----------------------------------------*/ if (agg == 3/*equal ordered, aka substring search*/ && (fmt == 0/*ub*/ || fmt == 2/*sb*/) && !isSTRM) { /* argL: haystack, argR: needle */ UInt ni, hi; UChar* argL = (UChar*)argLV; UChar* argR = (UChar*)argRV; UInt boolRes = 0; UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) for (hi = 0; hi < 16; hi++) { UInt m = 1; for (ni = 0; ni < 16; ni++) { if ((validR & (1 << ni)) == 0) break; UInt i = ni + hi; if (i >= 16) break; if (argL[i] != argR[ni]) { m = 0; break; } } boolRes |= (m << hi); if ((validL & (1 << hi)) == 0) // run off the end of the haystack break; } // boolRes is "pre-invalidated" UInt intRes1 = boolRes & 0xFFFF; // generate I-format output pcmpXstrX_WRK_gen_output_fmt_I( resV, resOSZACP, intRes1, zmaskL, zmaskR, validL, pol, idx ); return True; } /*----------------------------------------*/ /*-- ranges, unsigned byte data --*/ /*----------------------------------------*/ if (agg == 1/*ranges*/ && fmt == 0/*ub*/ && !isSTRM) { /* argL: string, argR: range-pairs */ UInt ri, si; UChar* argL = (UChar*)argLV; UChar* argR = (UChar*)argRV; UInt boolRes = 0; UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) for (si = 0; si < 16; si++) { if ((validL & (1 << si)) == 0) // run off the end of the string break; UInt m = 0; for (ri = 0; ri < 16; ri += 2) { if ((validR & (3 << ri)) != (3 << ri)) break; if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { m = 1; break; } } boolRes |= (m << si); } // boolRes is "pre-invalidated" UInt intRes1 = boolRes & 0xFFFF; // generate I-format output pcmpXstrX_WRK_gen_output_fmt_I( resV, resOSZACP, intRes1, zmaskL, zmaskR, validL, pol, idx ); return True; } /*----------------------------------------*/ /*-- ranges, signed byte data --*/ /*----------------------------------------*/ if (agg == 1/*ranges*/ && fmt == 2/*sb*/ && !isSTRM) { /* argL: string, argR: range-pairs */ UInt ri, si; Char* argL = (Char*)argLV; Char* argR = (Char*)argRV; UInt boolRes = 0; UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) for (si = 0; si < 16; si++) { if ((validL & (1 << si)) == 0) // run off the end of the string break; UInt m = 0; for (ri = 0; ri < 16; ri += 2) { if ((validR & (3 << ri)) != (3 << ri)) break; if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { m = 1; break; } } boolRes |= (m << si); } // boolRes is "pre-invalidated" UInt intRes1 = boolRes & 0xFFFF; // generate I-format output pcmpXstrX_WRK_gen_output_fmt_I( resV, resOSZACP, intRes1, zmaskL, zmaskR, validL, pol, idx ); return True; } return False; } ////////////////////////////////////////////////////////// // // // ISTRI_4A // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_4A ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x4A, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_4A ( void ) { char* wot = "4A"; UInt(*h)(V128*,V128*) = h_pcmpistri_4A; UInt(*s)(V128*,V128*) = s_pcmpistri_4A; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_3A // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_3A ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x3A, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_3A ( void ) { char* wot = "3A"; UInt(*h)(V128*,V128*) = h_pcmpistri_3A; UInt(*s)(V128*,V128*) = s_pcmpistri_3A; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_0C // // // ////////////////////////////////////////////////////////// __attribute__((noinline)) UInt h_pcmpistri_0C ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res = 0, flags = 0; __asm__ __volatile__( "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x0C, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_0C ( void ) { char* wot = "0C"; UInt(*h)(V128*,V128*) = h_pcmpistri_0C; UInt(*s)(V128*,V128*) = s_pcmpistri_0C; try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); try_istri(wot,h,s, "1111111111111234", "0000000000000000"); try_istri(wot,h,s, "1111111111111234", "0000000000000001"); try_istri(wot,h,s, "1111111111111234", "0000000000000011"); try_istri(wot,h,s, "1111111111111234", "1111111111111234"); try_istri(wot,h,s, "a111111111111111", "000000000000000a"); try_istri(wot,h,s, "b111111111111111", "000000000000000a"); try_istri(wot,h,s, "b111111111111111", "0000000000000000"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); } ////////////////////////////////////////////////////////// // // // ISTRI_08 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_08 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x08, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_08 ( void ) { char* wot = "08"; UInt(*h)(V128*,V128*) = h_pcmpistri_08; UInt(*s)(V128*,V128*) = s_pcmpistri_08; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_18 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_18 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_18 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x18, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_18 ( void ) { char* wot = "18"; UInt(*h)(V128*,V128*) = h_pcmpistri_18; UInt(*s)(V128*,V128*) = s_pcmpistri_18; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_1A // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_1A ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x1A, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_1A ( void ) { char* wot = "1A"; UInt(*h)(V128*,V128*) = h_pcmpistri_1A; UInt(*s)(V128*,V128*) = s_pcmpistri_1A; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_02 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_02 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x02, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_02 ( void ) { char* wot = "02"; UInt(*h)(V128*,V128*) = h_pcmpistri_02; UInt(*s)(V128*,V128*) = s_pcmpistri_02; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_12 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_12 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x12, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_12 ( void ) { char* wot = "12"; UInt(*h)(V128*,V128*) = h_pcmpistri_12; UInt(*s)(V128*,V128*) = s_pcmpistri_12; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_44 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_44 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x44, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_44 ( void ) { char* wot = "44"; UInt(*h)(V128*,V128*) = h_pcmpistri_44; UInt(*s)(V128*,V128*) = s_pcmpistri_44; try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); } ////////////////////////////////////////////////////////// // // // ISTRI_00 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_00 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x00, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_00 ( void ) { char* wot = "00"; UInt(*h)(V128*,V128*) = h_pcmpistri_00; UInt(*s)(V128*,V128*) = s_pcmpistri_00; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_38 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_38 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x38, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_38 ( void ) { char* wot = "38"; UInt(*h)(V128*,V128*) = h_pcmpistri_38; UInt(*s)(V128*,V128*) = s_pcmpistri_38; try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); } ////////////////////////////////////////////////////////// // // // ISTRI_46 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_46 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_46 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x46, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_46 ( void ) { char* wot = "46"; UInt(*h)(V128*,V128*) = h_pcmpistri_46; UInt(*s)(V128*,V128*) = s_pcmpistri_46; try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); } ////////////////////////////////////////////////////////// // // // ISTRI_30 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_30 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_30 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x30, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_30 ( void ) { char* wot = "30"; UInt(*h)(V128*,V128*) = h_pcmpistri_30; UInt(*s)(V128*,V128*) = s_pcmpistri_30; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_40 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_40 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_40 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x40, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_40 ( void ) { char* wot = "40"; UInt(*h)(V128*,V128*) = h_pcmpistri_40; UInt(*s)(V128*,V128*) = s_pcmpistri_40; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_42 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_42 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_42 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x42, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_42 ( void ) { char* wot = "42"; UInt(*h)(V128*,V128*) = h_pcmpistri_42; UInt(*s)(V128*,V128*) = s_pcmpistri_42; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_0E // // // ////////////////////////////////////////////////////////// __attribute__((noinline)) UInt h_pcmpistri_0E ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res = 0, flags = 0; __asm__ __volatile__( "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_0E ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x0E, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_0E ( void ) { char* wot = "0E"; UInt(*h)(V128*,V128*) = h_pcmpistri_0E; UInt(*s)(V128*,V128*) = s_pcmpistri_0E; try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); try_istri(wot,h,s, "1111111111111234", "0000000000000000"); try_istri(wot,h,s, "1111111111111234", "0000000000000001"); try_istri(wot,h,s, "1111111111111234", "0000000000000011"); try_istri(wot,h,s, "1111111111111234", "1111111111111234"); try_istri(wot,h,s, "a111111111111111", "000000000000000a"); try_istri(wot,h,s, "b111111111111111", "000000000000000a"); try_istri(wot,h,s, "b111111111111111", "0000000000000000"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); } ////////////////////////////////////////////////////////// // // // ISTRI_34 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_34 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_34 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x34, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_34 ( void ) { char* wot = "34"; UInt(*h)(V128*,V128*) = h_pcmpistri_34; UInt(*s)(V128*,V128*) = s_pcmpistri_34; try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); } ////////////////////////////////////////////////////////// // // // ISTRI_14 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_14 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_14 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x14, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_14 ( void ) { char* wot = "14"; UInt(*h)(V128*,V128*) = h_pcmpistri_14; UInt(*s)(V128*,V128*) = s_pcmpistri_14; try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); } ////////////////////////////////////////////////////////// // // // ISTRI_70 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_70 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x70, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_70 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x70, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_70 ( void ) { char* wot = "70"; UInt(*h)(V128*,V128*) = h_pcmpistri_70; UInt(*s)(V128*,V128*) = s_pcmpistri_70; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_62 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_62 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_62 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x62, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_62 ( void ) { char* wot = "62"; UInt(*h)(V128*,V128*) = h_pcmpistri_62; UInt(*s)(V128*,V128*) = s_pcmpistri_62; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_72 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_72 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_72 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x72, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_72 ( void ) { char* wot = "72"; UInt(*h)(V128*,V128*) = h_pcmpistri_72; UInt(*s)(V128*,V128*) = s_pcmpistri_72; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // ISTRI_10 // // // ////////////////////////////////////////////////////////// UInt h_pcmpistri_10 ( V128* argL, V128* argR ) { V128 block[2]; memcpy(&block[0], argL, sizeof(V128)); memcpy(&block[1], argR, sizeof(V128)); ULong res, flags; __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu 0(%2), %%xmm2" "\n\t" "movdqu 16(%2), %%xmm11" "\n\t" "pcmpistri $0x10, %%xmm2, %%xmm11" "\n\t" //"pcmpistrm $0x10, %%xmm2, %%xmm11" "\n\t" //"movd %%xmm0, %%ecx" "\n\t" "pushfq" "\n\t" "popq %%rdx" "\n\t" "movq %%rcx, %0" "\n\t" "movq %%rdx, %1" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" ); return ((flags & 0x8D5) << 16) | (res & 0xFFFF); } UInt s_pcmpistri_10 ( V128* argLU, V128* argRU ) { V128 resV; UInt resOSZACP, resECX; Bool ok = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, zmask_from_V128(argLU), zmask_from_V128(argRU), 0x10, False/*!isSTRM*/ ); assert(ok); resECX = resV.uInt[0]; return (resOSZACP << 16) | resECX; } void istri_10 ( void ) { char* wot = "10"; UInt(*h)(V128*,V128*) = h_pcmpistri_10; UInt(*s)(V128*,V128*) = s_pcmpistri_10; try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); try_istri(wot,h,s, "0000000000000000", "0000000000000000"); try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); } ////////////////////////////////////////////////////////// // // // main // // // ////////////////////////////////////////////////////////// int main ( void ) { istri_4A(); istri_3A(); istri_08(); istri_18(); istri_1A(); istri_02(); istri_0C(); istri_12(); istri_44(); istri_00(); istri_38(); istri_46(); istri_30(); istri_40(); istri_42(); istri_0E(); istri_14(); istri_34(); istri_70(); istri_62(); istri_72(); istri_10(); return 0; }