1 
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3    pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4    aspect. */
5 
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9 
10 typedef  unsigned int   UInt;
11 typedef  signed int     Int;
12 typedef  unsigned char  UChar;
13 typedef  signed char    Char;
14 typedef  unsigned long long int ULong;
15 typedef  UChar          Bool;
16 #define False ((Bool)0)
17 #define True  ((Bool)1)
18 
19 //typedef  unsigned char  V128[16];
20 typedef
21    union {
22       UChar uChar[16];
23       UInt  uInt[4];
24    }
25    V128;
26 
27 #define SHIFT_O   11
28 #define SHIFT_S   7
29 #define SHIFT_Z   6
30 #define SHIFT_A   4
31 #define SHIFT_C   0
32 #define SHIFT_P   2
33 
34 #define MASK_O    (1ULL << SHIFT_O)
35 #define MASK_S    (1ULL << SHIFT_S)
36 #define MASK_Z    (1ULL << SHIFT_Z)
37 #define MASK_A    (1ULL << SHIFT_A)
38 #define MASK_C    (1ULL << SHIFT_C)
39 #define MASK_P    (1ULL << SHIFT_P)
40 
41 
clz32(UInt x)42 UInt clz32 ( UInt x )
43 {
44    Int y, m, n;
45    y = -(x >> 16);
46    m = (y >> 16) & 16;
47    n = 16 - m;
48    x = x >> m;
49    y = x - 0x100;
50    m = (y >> 16) & 8;
51    n = n + m;
52    x = x << m;
53    y = x - 0x1000;
54    m = (y >> 16) & 4;
55    n = n + m;
56    x = x << m;
57    y = x - 0x4000;
58    m = (y >> 16) & 2;
59    n = n + m;
60    x = x << m;
61    y = x >> 14;
62    m = y & ~(y >> 1);
63    return n + 2 - m;
64 }
65 
ctz32(UInt x)66 UInt ctz32 ( UInt x )
67 {
68    return 32 - clz32((~x) & (x-1));
69 }
70 
expand(V128 * dst,char * summary)71 void expand ( V128* dst, char* summary )
72 {
73    Int i;
74    assert( strlen(summary) == 16 );
75    for (i = 0; i < 16; i++) {
76       UChar xx = 0;
77       UChar x = summary[15-i];
78       if      (x >= '0' && x <= '9') { xx = x - '0'; }
79       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81       else assert(0);
82 
83       assert(xx < 16);
84       xx = (xx << 4) | xx;
85       assert(xx < 256);
86       dst->uChar[i] = xx;
87    }
88 }
89 
try_istri(char * which,UInt (* h_fn)(V128 *,V128 *),UInt (* s_fn)(V128 *,V128 *),char * summL,char * summR)90 void try_istri ( char* which,
91                  UInt(*h_fn)(V128*,V128*),
92                  UInt(*s_fn)(V128*,V128*),
93                  char* summL, char* summR )
94 {
95    assert(strlen(which) == 2);
96    V128 argL, argR;
97    expand(&argL, summL);
98    expand(&argR, summR);
99    UInt h_res = h_fn(&argL, &argR);
100    UInt s_res = s_fn(&argL, &argR);
101    printf("istri %s  %s %s -> %08x %08x %s\n",
102           which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103 }
104 
zmask_from_V128(V128 * arg)105 UInt zmask_from_V128 ( V128* arg )
106 {
107    UInt i, res = 0;
108    for (i = 0; i < 16; i++) {
109       res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
110    }
111    return res;
112 }
113 
114 //////////////////////////////////////////////////////////
115 //                                                      //
116 //                       GENERAL                        //
117 //                                                      //
118 //////////////////////////////////////////////////////////
119 
120 
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122    basically), generate an I format (index value for ECX) output, and
123    also the new OSZACP flags.
124 */
125 static
pcmpXstrX_WRK_gen_output_fmt_I(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx)126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127                                     /*OUT*/UInt* resOSZACP,
128                                     UInt intRes1,
129                                     UInt zmaskL, UInt zmaskR,
130                                     UInt validL,
131                                     UInt pol, UInt idx )
132 {
133    assert((pol >> 2) == 0);
134    assert((idx >> 1) == 0);
135 
136    UInt intRes2 = 0;
137    switch (pol) {
138       case 0: intRes2 = intRes1;          break; // pol +
139       case 1: intRes2 = ~intRes1;         break; // pol -
140       case 2: intRes2 = intRes1;          break; // pol m+
141       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142    }
143    intRes2 &= 0xFFFF;
144 
145    // generate ecx value
146    UInt newECX = 0;
147    if (idx) {
148      // index of ms-1-bit
149      newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150    } else {
151      // index of ls-1-bit
152      newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153    }
154 
155    *(UInt*)(&resV[0]) = newECX;
156 
157    // generate new flags, common to all ISTRI and ISTRM cases
158    *resOSZACP    // A, P are zero
159      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
162      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
163 }
164 
165 
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167    variants.
168 
169    For xSTRI variants, the new ECX value is placed in the 32 bits
170    pointed to by *resV.  For xSTRM variants, the result is a 128 bit
171    value and is placed at *resV in the obvious way.
172 
173    For all variants, the new OSZACP value is placed at *resOSZACP.
174 
175    argLV and argRV are the vector args.  The caller must prepare a
176    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
177    must be 1 for each zero byte of of the respective arg.  For ESTRx
178    variants this is derived from the explicit length indication, and
179    must be 0 in all places except at the bit index corresponding to
180    the valid length (0 .. 16).  If the valid length is 16 then the
181    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
182 
183    imm8 is the original immediate from the instruction.  isSTRM
184    indicates whether this is a xSTRM or xSTRI variant, which controls
185    how much of *res is written.
186 
187    If the given imm8 case can be handled, the return value is True.
188    If not, False is returned, and neither *res not *resOSZACP are
189    altered.
190 */
191 
pcmpXstrX_WRK(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isSTRM)192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193                      /*OUT*/UInt* resOSZACP,
194                      V128* argLV,  V128* argRV,
195                      UInt zmaskL, UInt zmaskR,
196                      UInt imm8,   Bool isSTRM )
197 {
198    assert(imm8 < 0x80);
199    assert((zmaskL >> 16) == 0);
200    assert((zmaskR >> 16) == 0);
201 
202    /* Explicitly reject any imm8 values that haven't been validated,
203       even if they would probably work.  Life is too short to have
204       unvalidated cases in the code base. */
205    switch (imm8) {
206       case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
207       case 0x12: case 0x14: case 0x18: case 0x1A:
208       case 0x30: case 0x34: case 0x38: case 0x3A:
209       case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
210          break;
211       default:
212          return False;
213    }
214 
215    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
216    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
217    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
218    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
219 
220    /*----------------------------------------*/
221    /*-- strcmp on byte data                --*/
222    /*----------------------------------------*/
223 
224    if (agg == 2/*equal each, aka strcmp*/
225        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226        && !isSTRM) {
227       Int    i;
228       UChar* argL = (UChar*)argLV;
229       UChar* argR = (UChar*)argRV;
230       UInt boolResII = 0;
231       for (i = 15; i >= 0; i--) {
232          UChar cL  = argL[i];
233          UChar cR  = argR[i];
234          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
235       }
236       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
237       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
238 
239       // do invalidation, common to all equal-each cases
240       UInt intRes1
241          = (boolResII & validL & validR)  // if both valid, use cmpres
242            | (~ (validL | validR));       // if both invalid, force 1
243                                           // else force 0
244       intRes1 &= 0xFFFF;
245 
246       // generate I-format output
247       pcmpXstrX_WRK_gen_output_fmt_I(
248          resV, resOSZACP,
249          intRes1, zmaskL, zmaskR, validL, pol, idx
250       );
251 
252       return True;
253    }
254 
255    /*----------------------------------------*/
256    /*-- set membership on byte data        --*/
257    /*----------------------------------------*/
258 
259    if (agg == 0/*equal any, aka find chars in a set*/
260        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
261        && !isSTRM) {
262       /* argL: the string,  argR: charset */
263       UInt   si, ci;
264       UChar* argL    = (UChar*)argLV;
265       UChar* argR    = (UChar*)argRV;
266       UInt   boolRes = 0;
267       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
268       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
269 
270       for (si = 0; si < 16; si++) {
271          if ((validL & (1 << si)) == 0)
272             // run off the end of the string.
273             break;
274          UInt m = 0;
275          for (ci = 0; ci < 16; ci++) {
276             if ((validR & (1 << ci)) == 0) break;
277             if (argR[ci] == argL[si]) { m = 1; break; }
278          }
279          boolRes |= (m << si);
280       }
281 
282       // boolRes is "pre-invalidated"
283       UInt intRes1 = boolRes & 0xFFFF;
284 
285       // generate I-format output
286       pcmpXstrX_WRK_gen_output_fmt_I(
287          resV, resOSZACP,
288          intRes1, zmaskL, zmaskR, validL, pol, idx
289       );
290 
291       return True;
292    }
293 
294    /*----------------------------------------*/
295    /*-- substring search on byte data      --*/
296    /*----------------------------------------*/
297 
298    if (agg == 3/*equal ordered, aka substring search*/
299        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300        && !isSTRM) {
301 
302       /* argL: haystack,  argR: needle */
303       UInt   ni, hi;
304       UChar* argL    = (UChar*)argLV;
305       UChar* argR    = (UChar*)argRV;
306       UInt   boolRes = 0;
307       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
308       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
309       for (hi = 0; hi < 16; hi++) {
310          UInt m = 1;
311          for (ni = 0; ni < 16; ni++) {
312             if ((validR & (1 << ni)) == 0) break;
313             UInt i = ni + hi;
314             if (i >= 16) break;
315             if (argL[i] != argR[ni]) { m = 0; break; }
316          }
317          boolRes |= (m << hi);
318          if ((validL & (1 << hi)) == 0)
319             // run off the end of the haystack
320             break;
321       }
322 
323       // boolRes is "pre-invalidated"
324       UInt intRes1 = boolRes & 0xFFFF;
325 
326       // generate I-format output
327       pcmpXstrX_WRK_gen_output_fmt_I(
328          resV, resOSZACP,
329          intRes1, zmaskL, zmaskR, validL, pol, idx
330       );
331 
332       return True;
333    }
334 
335    /*----------------------------------------*/
336    /*-- ranges, unsigned byte data         --*/
337    /*----------------------------------------*/
338 
339    if (agg == 1/*ranges*/
340        && fmt == 0/*ub*/
341        && !isSTRM) {
342 
343       /* argL: string,  argR: range-pairs */
344       UInt   ri, si;
345       UChar* argL    = (UChar*)argLV;
346       UChar* argR    = (UChar*)argRV;
347       UInt   boolRes = 0;
348       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
349       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
350       for (si = 0; si < 16; si++) {
351          if ((validL & (1 << si)) == 0)
352             // run off the end of the string
353             break;
354          UInt m = 0;
355          for (ri = 0; ri < 16; ri += 2) {
356             if ((validR & (3 << ri)) != (3 << ri)) break;
357             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
358                m = 1; break;
359             }
360          }
361          boolRes |= (m << si);
362       }
363 
364       // boolRes is "pre-invalidated"
365       UInt intRes1 = boolRes & 0xFFFF;
366 
367       // generate I-format output
368       pcmpXstrX_WRK_gen_output_fmt_I(
369          resV, resOSZACP,
370          intRes1, zmaskL, zmaskR, validL, pol, idx
371       );
372 
373       return True;
374    }
375 
376    /*----------------------------------------*/
377    /*-- ranges, signed byte data           --*/
378    /*----------------------------------------*/
379 
380    if (agg == 1/*ranges*/
381        && fmt == 2/*sb*/
382        && !isSTRM) {
383 
384       /* argL: string,  argR: range-pairs */
385       UInt   ri, si;
386       Char*  argL    = (Char*)argLV;
387       Char*  argR    = (Char*)argRV;
388       UInt   boolRes = 0;
389       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
390       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
391       for (si = 0; si < 16; si++) {
392          if ((validL & (1 << si)) == 0)
393             // run off the end of the string
394             break;
395          UInt m = 0;
396          for (ri = 0; ri < 16; ri += 2) {
397             if ((validR & (3 << ri)) != (3 << ri)) break;
398             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
399                m = 1; break;
400             }
401          }
402          boolRes |= (m << si);
403       }
404 
405       // boolRes is "pre-invalidated"
406       UInt intRes1 = boolRes & 0xFFFF;
407 
408       // generate I-format output
409       pcmpXstrX_WRK_gen_output_fmt_I(
410          resV, resOSZACP,
411          intRes1, zmaskL, zmaskR, validL, pol, idx
412       );
413 
414       return True;
415    }
416 
417    return False;
418 }
419 
420 
421 //////////////////////////////////////////////////////////
422 //                                                      //
423 //                       ISTRI_4A                       //
424 //                                                      //
425 //////////////////////////////////////////////////////////
426 
h_pcmpistri_4A(V128 * argL,V128 * argR)427 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
428 {
429    V128 block[2];
430    memcpy(&block[0], argL, sizeof(V128));
431    memcpy(&block[1], argR, sizeof(V128));
432    ULong res, flags;
433    __asm__ __volatile__(
434       "subq      $1024,  %%rsp"             "\n\t"
435       "movdqu    0(%2),  %%xmm2"            "\n\t"
436       "movdqu    16(%2), %%xmm11"           "\n\t"
437       "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
438       "pushfq"                              "\n\t"
439       "popq      %%rdx"                     "\n\t"
440       "movq      %%rcx,  %0"                "\n\t"
441       "movq      %%rdx,  %1"                "\n\t"
442       "addq      $1024,  %%rsp"             "\n\t"
443       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
444       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
445    );
446    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
447 }
448 
s_pcmpistri_4A(V128 * argLU,V128 * argRU)449 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
450 {
451    V128 resV;
452    UInt resOSZACP, resECX;
453    Bool ok
454       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
455                        zmask_from_V128(argLU),
456                        zmask_from_V128(argRU),
457                        0x4A, False/*!isSTRM*/
458         );
459    assert(ok);
460    resECX = resV.uInt[0];
461    return (resOSZACP << 16) | resECX;
462 }
463 
istri_4A(void)464 void istri_4A ( void )
465 {
466    char* wot = "4A";
467    UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
468    UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
469 
470    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
471 
472    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
473    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
474    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
475    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
476 
477    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
478    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
479    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
480 
481    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
482    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
483    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
484    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
485 
486    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
487    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
488    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
489 
490    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491 
492    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
493    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
494    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
495 
496    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
497    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
498    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
499 
500    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
501    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
502    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
503 
504    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
505    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
506    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
507 
508    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
509    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
510 }
511 
512 //////////////////////////////////////////////////////////
513 //                                                      //
514 //                       ISTRI_3A                       //
515 //                                                      //
516 //////////////////////////////////////////////////////////
517 
h_pcmpistri_3A(V128 * argL,V128 * argR)518 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
519 {
520    V128 block[2];
521    memcpy(&block[0], argL, sizeof(V128));
522    memcpy(&block[1], argR, sizeof(V128));
523    ULong res, flags;
524    __asm__ __volatile__(
525       "subq      $1024,  %%rsp"             "\n\t"
526       "movdqu    0(%2),  %%xmm2"            "\n\t"
527       "movdqu    16(%2), %%xmm11"           "\n\t"
528       "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
529       "pushfq"                              "\n\t"
530       "popq      %%rdx"                     "\n\t"
531       "movq      %%rcx,  %0"                "\n\t"
532       "movq      %%rdx,  %1"                "\n\t"
533       "addq      $1024,  %%rsp"             "\n\t"
534       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
535       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
536    );
537    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
538 }
539 
s_pcmpistri_3A(V128 * argLU,V128 * argRU)540 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
541 {
542    V128 resV;
543    UInt resOSZACP, resECX;
544    Bool ok
545       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
546                        zmask_from_V128(argLU),
547                        zmask_from_V128(argRU),
548                        0x3A, False/*!isSTRM*/
549         );
550    assert(ok);
551    resECX = resV.uInt[0];
552    return (resOSZACP << 16) | resECX;
553 }
554 
istri_3A(void)555 void istri_3A ( void )
556 {
557    char* wot = "3A";
558    UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
559    UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
560 
561    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
562 
563    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
564    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
565    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
566    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
567 
568    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
569    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
570    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
571 
572    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
573    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
574    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
575    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
576 
577    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
578    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
579    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
580 
581    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582 
583    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
584    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
585    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
586 
587    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
588    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
589    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
590 
591    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
592    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
593    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
594 
595    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
596    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
597    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
598 
599    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
600    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
601 }
602 
603 
604 
605 //////////////////////////////////////////////////////////
606 //                                                      //
607 //                       ISTRI_0C                       //
608 //                                                      //
609 //////////////////////////////////////////////////////////
610 
611 __attribute__((noinline))
h_pcmpistri_0C(V128 * argL,V128 * argR)612 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
613 {
614    V128 block[2];
615    memcpy(&block[0], argL, sizeof(V128));
616    memcpy(&block[1], argR, sizeof(V128));
617    ULong res = 0, flags = 0;
618    __asm__ __volatile__(
619       "movdqu    0(%2),  %%xmm2"            "\n\t"
620       "movdqu    16(%2), %%xmm11"           "\n\t"
621       "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
622       //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
623       //"movd %%xmm0, %%ecx" "\n\t"
624       "pushfq"                              "\n\t"
625       "popq      %%rdx"                     "\n\t"
626       "movq      %%rcx,  %0"                "\n\t"
627       "movq      %%rdx,  %1"                "\n\t"
628       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
629       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
630    );
631    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
632 }
633 
s_pcmpistri_0C(V128 * argLU,V128 * argRU)634 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
635 {
636    V128 resV;
637    UInt resOSZACP, resECX;
638    Bool ok
639       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
640                        zmask_from_V128(argLU),
641                        zmask_from_V128(argRU),
642                        0x0C, False/*!isSTRM*/
643         );
644    assert(ok);
645    resECX = resV.uInt[0];
646    return (resOSZACP << 16) | resECX;
647 }
648 
istri_0C(void)649 void istri_0C ( void )
650 {
651    char* wot = "0C";
652    UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
653    UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
654 
655    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
656 
657    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
658 
659    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
660    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
661    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
662 
663    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
664 
665    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
666    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
667    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
668    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
669    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
670 
671    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
672    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
673    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
674 
675    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
676    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
677 
678    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
679    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
680    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
681 
682    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
683    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
684    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
685 
686    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
687    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
688    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
689    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
690 }
691 
692 
693 //////////////////////////////////////////////////////////
694 //                                                      //
695 //                       ISTRI_08                       //
696 //                                                      //
697 //////////////////////////////////////////////////////////
698 
h_pcmpistri_08(V128 * argL,V128 * argR)699 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
700 {
701    V128 block[2];
702    memcpy(&block[0], argL, sizeof(V128));
703    memcpy(&block[1], argR, sizeof(V128));
704    ULong res, flags;
705    __asm__ __volatile__(
706       "subq      $1024,  %%rsp"             "\n\t"
707       "movdqu    0(%2),  %%xmm2"            "\n\t"
708       "movdqu    16(%2), %%xmm11"           "\n\t"
709       "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
710       "pushfq"                              "\n\t"
711       "popq      %%rdx"                     "\n\t"
712       "movq      %%rcx,  %0"                "\n\t"
713       "movq      %%rdx,  %1"                "\n\t"
714       "addq      $1024,  %%rsp"             "\n\t"
715       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
716       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
717    );
718    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
719 }
720 
s_pcmpistri_08(V128 * argLU,V128 * argRU)721 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
722 {
723    V128 resV;
724    UInt resOSZACP, resECX;
725    Bool ok
726       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
727                        zmask_from_V128(argLU),
728                        zmask_from_V128(argRU),
729                        0x08, False/*!isSTRM*/
730         );
731    assert(ok);
732    resECX = resV.uInt[0];
733    return (resOSZACP << 16) | resECX;
734 }
735 
istri_08(void)736 void istri_08 ( void )
737 {
738    char* wot = "08";
739    UInt(*h)(V128*,V128*) = h_pcmpistri_08;
740    UInt(*s)(V128*,V128*) = s_pcmpistri_08;
741 
742    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
743 
744    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
745    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
746    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
747    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
748 
749    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
750    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
751    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
752 
753    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
754    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
755    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
756    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
757 
758    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
759    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
760    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
761 
762    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763 
764    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
765    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
766    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
767 
768    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
769    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
770    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
771 
772    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
773    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
774    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
775 
776    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
777    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
778    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
779 
780    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
781    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
782 }
783 
784 
785 
786 //////////////////////////////////////////////////////////
787 //                                                      //
788 //                       ISTRI_18                       //
789 //                                                      //
790 //////////////////////////////////////////////////////////
791 
h_pcmpistri_18(V128 * argL,V128 * argR)792 UInt h_pcmpistri_18 ( V128* argL, V128* argR )
793 {
794    V128 block[2];
795    memcpy(&block[0], argL, sizeof(V128));
796    memcpy(&block[1], argR, sizeof(V128));
797    ULong res, flags;
798    __asm__ __volatile__(
799       "subq      $1024,  %%rsp"             "\n\t"
800       "movdqu    0(%2),  %%xmm2"            "\n\t"
801       "movdqu    16(%2), %%xmm11"           "\n\t"
802       "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
803       "pushfq"                              "\n\t"
804       "popq      %%rdx"                     "\n\t"
805       "movq      %%rcx,  %0"                "\n\t"
806       "movq      %%rdx,  %1"                "\n\t"
807       "addq      $1024,  %%rsp"             "\n\t"
808       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
809       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
810    );
811    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
812 }
813 
s_pcmpistri_18(V128 * argLU,V128 * argRU)814 UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
815 {
816    V128 resV;
817    UInt resOSZACP, resECX;
818    Bool ok
819       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
820                        zmask_from_V128(argLU),
821                        zmask_from_V128(argRU),
822                        0x18, False/*!isSTRM*/
823         );
824    assert(ok);
825    resECX = resV.uInt[0];
826    return (resOSZACP << 16) | resECX;
827 }
828 
istri_18(void)829 void istri_18 ( void )
830 {
831    char* wot = "18";
832    UInt(*h)(V128*,V128*) = h_pcmpistri_18;
833    UInt(*s)(V128*,V128*) = s_pcmpistri_18;
834 
835    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
836 
837    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
838    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
839    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
840    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
841 
842    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
843    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
844    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
845 
846    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
847    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
848    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
849    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
850 
851    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
852    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
853    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
854 
855    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856 
857    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
858    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
859    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
860 
861    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
862    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
863    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
864 
865    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
866    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
867    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
868 
869    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
870    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
871    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
872 
873    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
874    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
875 }
876 
877 
878 
879 //////////////////////////////////////////////////////////
880 //                                                      //
881 //                       ISTRI_1A                       //
882 //                                                      //
883 //////////////////////////////////////////////////////////
884 
h_pcmpistri_1A(V128 * argL,V128 * argR)885 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
886 {
887    V128 block[2];
888    memcpy(&block[0], argL, sizeof(V128));
889    memcpy(&block[1], argR, sizeof(V128));
890    ULong res, flags;
891    __asm__ __volatile__(
892       "subq      $1024,  %%rsp"             "\n\t"
893       "movdqu    0(%2),  %%xmm2"            "\n\t"
894       "movdqu    16(%2), %%xmm11"           "\n\t"
895       "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
896       "pushfq"                              "\n\t"
897       "popq      %%rdx"                     "\n\t"
898       "movq      %%rcx,  %0"                "\n\t"
899       "movq      %%rdx,  %1"                "\n\t"
900       "addq      $1024,  %%rsp"             "\n\t"
901       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
902       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
903    );
904    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
905 }
906 
s_pcmpistri_1A(V128 * argLU,V128 * argRU)907 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
908 {
909    V128 resV;
910    UInt resOSZACP, resECX;
911    Bool ok
912       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
913                        zmask_from_V128(argLU),
914                        zmask_from_V128(argRU),
915                        0x1A, False/*!isSTRM*/
916         );
917    assert(ok);
918    resECX = resV.uInt[0];
919    return (resOSZACP << 16) | resECX;
920 }
921 
istri_1A(void)922 void istri_1A ( void )
923 {
924    char* wot = "1A";
925    UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
926    UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
927 
928    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
929 
930    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
931    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
932    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
933    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
934 
935    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
936    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
937    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
938 
939    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
940    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
941    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
942    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
943 
944    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
945    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
946    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
947 
948    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
949 
950    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
951    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
952    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
953 
954    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
955    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
956    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
957 
958    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
959    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
960    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
961 
962    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
963    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
964    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
965 
966    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
967    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
968 }
969 
970 
971 
972 //////////////////////////////////////////////////////////
973 //                                                      //
974 //                       ISTRI_02                       //
975 //                                                      //
976 //////////////////////////////////////////////////////////
977 
h_pcmpistri_02(V128 * argL,V128 * argR)978 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
979 {
980    V128 block[2];
981    memcpy(&block[0], argL, sizeof(V128));
982    memcpy(&block[1], argR, sizeof(V128));
983    ULong res, flags;
984    __asm__ __volatile__(
985       "subq      $1024,  %%rsp"             "\n\t"
986       "movdqu    0(%2),  %%xmm2"            "\n\t"
987       "movdqu    16(%2), %%xmm11"           "\n\t"
988       "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
989 //"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
990 //"movd %%xmm0, %%ecx" "\n\t"
991       "pushfq"                              "\n\t"
992       "popq      %%rdx"                     "\n\t"
993       "movq      %%rcx,  %0"                "\n\t"
994       "movq      %%rdx,  %1"                "\n\t"
995       "addq      $1024,  %%rsp"             "\n\t"
996       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
997       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
998    );
999    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1000 }
1001 
s_pcmpistri_02(V128 * argLU,V128 * argRU)1002 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1003 {
1004    V128 resV;
1005    UInt resOSZACP, resECX;
1006    Bool ok
1007       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1008                        zmask_from_V128(argLU),
1009                        zmask_from_V128(argRU),
1010                        0x02, False/*!isSTRM*/
1011         );
1012    assert(ok);
1013    resECX = resV.uInt[0];
1014    return (resOSZACP << 16) | resECX;
1015 }
1016 
istri_02(void)1017 void istri_02 ( void )
1018 {
1019    char* wot = "02";
1020    UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1021    UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1022 
1023    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1024    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1025    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1026    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1027 
1028    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1029    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1030    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1031    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1032    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1033 
1034    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1036    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1037    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1038 
1039    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1040    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1041 
1042    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1043    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1044    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1045    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1046 
1047    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1048 
1049    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1050    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1051 }
1052 
1053 
1054 //////////////////////////////////////////////////////////
1055 //                                                      //
1056 //                       ISTRI_12                       //
1057 //                                                      //
1058 //////////////////////////////////////////////////////////
1059 
h_pcmpistri_12(V128 * argL,V128 * argR)1060 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1061 {
1062    V128 block[2];
1063    memcpy(&block[0], argL, sizeof(V128));
1064    memcpy(&block[1], argR, sizeof(V128));
1065    ULong res, flags;
1066    __asm__ __volatile__(
1067       "subq      $1024,  %%rsp"             "\n\t"
1068       "movdqu    0(%2),  %%xmm2"            "\n\t"
1069       "movdqu    16(%2), %%xmm11"           "\n\t"
1070       "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
1071 //"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
1072 //"movd %%xmm0, %%ecx" "\n\t"
1073       "pushfq"                              "\n\t"
1074       "popq      %%rdx"                     "\n\t"
1075       "movq      %%rcx,  %0"                "\n\t"
1076       "movq      %%rdx,  %1"                "\n\t"
1077       "addq      $1024,  %%rsp"             "\n\t"
1078       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1079       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1080    );
1081    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1082 }
1083 
s_pcmpistri_12(V128 * argLU,V128 * argRU)1084 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1085 {
1086    V128 resV;
1087    UInt resOSZACP, resECX;
1088    Bool ok
1089       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1090                        zmask_from_V128(argLU),
1091                        zmask_from_V128(argRU),
1092                        0x12, False/*!isSTRM*/
1093         );
1094    assert(ok);
1095    resECX = resV.uInt[0];
1096    return (resOSZACP << 16) | resECX;
1097 }
1098 
istri_12(void)1099 void istri_12 ( void )
1100 {
1101    char* wot = "12";
1102    UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1103    UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1104 
1105    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1106    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1107    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1108    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1109 
1110    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1111    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1112    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1113    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1114    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1115 
1116    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1118    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1119    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1120 
1121    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1122    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1123 
1124    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1125    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1126    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1127    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1128 
1129    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1130 
1131    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1132    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1133 }
1134 
1135 
1136 
1137 //////////////////////////////////////////////////////////
1138 //                                                      //
1139 //                       ISTRI_44                       //
1140 //                                                      //
1141 //////////////////////////////////////////////////////////
1142 
h_pcmpistri_44(V128 * argL,V128 * argR)1143 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1144 {
1145    V128 block[2];
1146    memcpy(&block[0], argL, sizeof(V128));
1147    memcpy(&block[1], argR, sizeof(V128));
1148    ULong res, flags;
1149    __asm__ __volatile__(
1150       "subq      $1024,  %%rsp"             "\n\t"
1151       "movdqu    0(%2),  %%xmm2"            "\n\t"
1152       "movdqu    16(%2), %%xmm11"           "\n\t"
1153       "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1154 //"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1155 //"movd %%xmm0, %%ecx" "\n\t"
1156       "pushfq"                              "\n\t"
1157       "popq      %%rdx"                     "\n\t"
1158       "movq      %%rcx,  %0"                "\n\t"
1159       "movq      %%rdx,  %1"                "\n\t"
1160       "addq      $1024,  %%rsp"             "\n\t"
1161       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1162       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1163    );
1164    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1165 }
1166 
s_pcmpistri_44(V128 * argLU,V128 * argRU)1167 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1168 {
1169    V128 resV;
1170    UInt resOSZACP, resECX;
1171    Bool ok
1172       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1173                        zmask_from_V128(argLU),
1174                        zmask_from_V128(argRU),
1175                        0x44, False/*!isSTRM*/
1176         );
1177    assert(ok);
1178    resECX = resV.uInt[0];
1179    return (resOSZACP << 16) | resECX;
1180 }
1181 
istri_44(void)1182 void istri_44 ( void )
1183 {
1184    char* wot = "44";
1185    UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1186    UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1187 
1188    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1189    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1190    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1191    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1192 
1193    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1194    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1195    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1196    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1197    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1198 
1199    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1200 
1201    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1202    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1203    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1204 
1205    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1206    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1207    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1208 
1209    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1210    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1211 
1212    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1213    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1214 
1215    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1216    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1217    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1218    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1219 }
1220 
1221 
1222 //////////////////////////////////////////////////////////
1223 //                                                      //
1224 //                       ISTRI_00                       //
1225 //                                                      //
1226 //////////////////////////////////////////////////////////
1227 
h_pcmpistri_00(V128 * argL,V128 * argR)1228 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1229 {
1230    V128 block[2];
1231    memcpy(&block[0], argL, sizeof(V128));
1232    memcpy(&block[1], argR, sizeof(V128));
1233    ULong res, flags;
1234    __asm__ __volatile__(
1235       "subq      $1024,  %%rsp"             "\n\t"
1236       "movdqu    0(%2),  %%xmm2"            "\n\t"
1237       "movdqu    16(%2), %%xmm11"           "\n\t"
1238       "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1239 //"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1240 //"movd %%xmm0, %%ecx" "\n\t"
1241       "pushfq"                              "\n\t"
1242       "popq      %%rdx"                     "\n\t"
1243       "movq      %%rcx,  %0"                "\n\t"
1244       "movq      %%rdx,  %1"                "\n\t"
1245       "addq      $1024,  %%rsp"             "\n\t"
1246       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1247       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1248    );
1249    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1250 }
1251 
s_pcmpistri_00(V128 * argLU,V128 * argRU)1252 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1253 {
1254    V128 resV;
1255    UInt resOSZACP, resECX;
1256    Bool ok
1257       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1258                        zmask_from_V128(argLU),
1259                        zmask_from_V128(argRU),
1260                        0x00, False/*!isSTRM*/
1261         );
1262    assert(ok);
1263    resECX = resV.uInt[0];
1264    return (resOSZACP << 16) | resECX;
1265 }
1266 
istri_00(void)1267 void istri_00 ( void )
1268 {
1269    char* wot = "00";
1270    UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1271    UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1272 
1273    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1274    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1275    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1276    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1277 
1278    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1279    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1280    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1281    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1282    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1283 
1284    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1286    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1287    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1288 
1289    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1290    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1291 
1292    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1293    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1294    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1295    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1296 
1297    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1298 
1299    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1300    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1301 }
1302 
1303 
1304 //////////////////////////////////////////////////////////
1305 //                                                      //
1306 //                       ISTRI_38                       //
1307 //                                                      //
1308 //////////////////////////////////////////////////////////
1309 
h_pcmpistri_38(V128 * argL,V128 * argR)1310 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1311 {
1312    V128 block[2];
1313    memcpy(&block[0], argL, sizeof(V128));
1314    memcpy(&block[1], argR, sizeof(V128));
1315    ULong res, flags;
1316    __asm__ __volatile__(
1317       "subq      $1024,  %%rsp"             "\n\t"
1318       "movdqu    0(%2),  %%xmm2"            "\n\t"
1319       "movdqu    16(%2), %%xmm11"           "\n\t"
1320       "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1321       "pushfq"                              "\n\t"
1322       "popq      %%rdx"                     "\n\t"
1323       "movq      %%rcx,  %0"                "\n\t"
1324       "movq      %%rdx,  %1"                "\n\t"
1325       "addq      $1024,  %%rsp"             "\n\t"
1326       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1327       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1328    );
1329    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1330 }
1331 
s_pcmpistri_38(V128 * argLU,V128 * argRU)1332 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1333 {
1334    V128 resV;
1335    UInt resOSZACP, resECX;
1336    Bool ok
1337       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1338                        zmask_from_V128(argLU),
1339                        zmask_from_V128(argRU),
1340                        0x38, False/*!isSTRM*/
1341         );
1342    assert(ok);
1343    resECX = resV.uInt[0];
1344    return (resOSZACP << 16) | resECX;
1345 }
1346 
istri_38(void)1347 void istri_38 ( void )
1348 {
1349    char* wot = "38";
1350    UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1351    UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1352 
1353    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1354 
1355    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1356    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1357    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1358    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1359 
1360    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1361    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1362    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1363 
1364    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1365    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1366    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1367    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1368 
1369    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1370    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1371    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1372 
1373    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374 
1375    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1376    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1377    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1378 
1379    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1380    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1381    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1382 
1383    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1384    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1385    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1386 
1387    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1388    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1389    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1390 
1391    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1392    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1393 }
1394 
1395 
1396 
1397 //////////////////////////////////////////////////////////
1398 //                                                      //
1399 //                       ISTRI_46                       //
1400 //                                                      //
1401 //////////////////////////////////////////////////////////
1402 
h_pcmpistri_46(V128 * argL,V128 * argR)1403 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1404 {
1405    V128 block[2];
1406    memcpy(&block[0], argL, sizeof(V128));
1407    memcpy(&block[1], argR, sizeof(V128));
1408    ULong res, flags;
1409    __asm__ __volatile__(
1410       "subq      $1024,  %%rsp"             "\n\t"
1411       "movdqu    0(%2),  %%xmm2"            "\n\t"
1412       "movdqu    16(%2), %%xmm11"           "\n\t"
1413       "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
1414       "pushfq"                              "\n\t"
1415       "popq      %%rdx"                     "\n\t"
1416       "movq      %%rcx,  %0"                "\n\t"
1417       "movq      %%rdx,  %1"                "\n\t"
1418       "addq      $1024,  %%rsp"             "\n\t"
1419       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1420       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1421    );
1422    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1423 }
1424 
s_pcmpistri_46(V128 * argLU,V128 * argRU)1425 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1426 {
1427    V128 resV;
1428    UInt resOSZACP, resECX;
1429    Bool ok
1430       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1431                        zmask_from_V128(argLU),
1432                        zmask_from_V128(argRU),
1433                        0x46, False/*!isSTRM*/
1434         );
1435    assert(ok);
1436    resECX = resV.uInt[0];
1437    return (resOSZACP << 16) | resECX;
1438 }
1439 
istri_46(void)1440 void istri_46 ( void )
1441 {
1442    char* wot = "46";
1443    UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1444    UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1445 
1446    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1447    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1448    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1449    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1450 
1451    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1452    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1453    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1454    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1455    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1456 
1457    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1458 
1459    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1460    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1461    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1462 
1463    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1464    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1465    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1466 
1467    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1468    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1469 
1470    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1471    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1472 
1473    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1474    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1475    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1476    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1477 }
1478 
1479 
1480 //////////////////////////////////////////////////////////
1481 //                                                      //
1482 //                       ISTRI_30                       //
1483 //                                                      //
1484 //////////////////////////////////////////////////////////
1485 
h_pcmpistri_30(V128 * argL,V128 * argR)1486 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1487 {
1488    V128 block[2];
1489    memcpy(&block[0], argL, sizeof(V128));
1490    memcpy(&block[1], argR, sizeof(V128));
1491    ULong res, flags;
1492    __asm__ __volatile__(
1493       "subq      $1024,  %%rsp"             "\n\t"
1494       "movdqu    0(%2),  %%xmm2"            "\n\t"
1495       "movdqu    16(%2), %%xmm11"           "\n\t"
1496       "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1497       "pushfq"                              "\n\t"
1498       "popq      %%rdx"                     "\n\t"
1499       "movq      %%rcx,  %0"                "\n\t"
1500       "movq      %%rdx,  %1"                "\n\t"
1501       "addq      $1024,  %%rsp"             "\n\t"
1502       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1503       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1504    );
1505    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1506 }
1507 
s_pcmpistri_30(V128 * argLU,V128 * argRU)1508 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1509 {
1510    V128 resV;
1511    UInt resOSZACP, resECX;
1512    Bool ok
1513       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1514                        zmask_from_V128(argLU),
1515                        zmask_from_V128(argRU),
1516                        0x30, False/*!isSTRM*/
1517         );
1518    assert(ok);
1519    resECX = resV.uInt[0];
1520    return (resOSZACP << 16) | resECX;
1521 }
1522 
istri_30(void)1523 void istri_30 ( void )
1524 {
1525    char* wot = "30";
1526    UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1527    UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1528 
1529    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1530    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1531    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1532    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1533 
1534    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1535    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1536    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1537    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1538    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1539 
1540    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1542    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1543    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1544 
1545    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1546    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1547 
1548    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1549    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1550    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1551    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1552 
1553    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1554 
1555    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1556    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1557 }
1558 
1559 
1560 //////////////////////////////////////////////////////////
1561 //                                                      //
1562 //                       ISTRI_40                       //
1563 //                                                      //
1564 //////////////////////////////////////////////////////////
1565 
h_pcmpistri_40(V128 * argL,V128 * argR)1566 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1567 {
1568    V128 block[2];
1569    memcpy(&block[0], argL, sizeof(V128));
1570    memcpy(&block[1], argR, sizeof(V128));
1571    ULong res, flags;
1572    __asm__ __volatile__(
1573       "subq      $1024,  %%rsp"             "\n\t"
1574       "movdqu    0(%2),  %%xmm2"            "\n\t"
1575       "movdqu    16(%2), %%xmm11"           "\n\t"
1576       "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1577       "pushfq"                              "\n\t"
1578       "popq      %%rdx"                     "\n\t"
1579       "movq      %%rcx,  %0"                "\n\t"
1580       "movq      %%rdx,  %1"                "\n\t"
1581       "addq      $1024,  %%rsp"             "\n\t"
1582       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1583       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1584    );
1585    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1586 }
1587 
s_pcmpistri_40(V128 * argLU,V128 * argRU)1588 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1589 {
1590    V128 resV;
1591    UInt resOSZACP, resECX;
1592    Bool ok
1593       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1594                        zmask_from_V128(argLU),
1595                        zmask_from_V128(argRU),
1596                        0x40, False/*!isSTRM*/
1597         );
1598    assert(ok);
1599    resECX = resV.uInt[0];
1600    return (resOSZACP << 16) | resECX;
1601 }
1602 
istri_40(void)1603 void istri_40 ( void )
1604 {
1605    char* wot = "40";
1606    UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1607    UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1608 
1609    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1610    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1611    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1612    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1613 
1614    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1615    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1616    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1617    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1618    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1619 
1620    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1622    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1623    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1624 
1625    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1626    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1627 
1628    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1629    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1630    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1631    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1632 
1633    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1634 
1635    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1636    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1637 }
1638 
1639 
1640 //////////////////////////////////////////////////////////
1641 //                                                      //
1642 //                       ISTRI_42                       //
1643 //                                                      //
1644 //////////////////////////////////////////////////////////
1645 
h_pcmpistri_42(V128 * argL,V128 * argR)1646 UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1647 {
1648    V128 block[2];
1649    memcpy(&block[0], argL, sizeof(V128));
1650    memcpy(&block[1], argR, sizeof(V128));
1651    ULong res, flags;
1652    __asm__ __volatile__(
1653       "subq      $1024,  %%rsp"             "\n\t"
1654       "movdqu    0(%2),  %%xmm2"            "\n\t"
1655       "movdqu    16(%2), %%xmm11"           "\n\t"
1656       "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
1657       "pushfq"                              "\n\t"
1658       "popq      %%rdx"                     "\n\t"
1659       "movq      %%rcx,  %0"                "\n\t"
1660       "movq      %%rdx,  %1"                "\n\t"
1661       "addq      $1024,  %%rsp"             "\n\t"
1662       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1663       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1664    );
1665    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1666 }
1667 
s_pcmpistri_42(V128 * argLU,V128 * argRU)1668 UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1669 {
1670    V128 resV;
1671    UInt resOSZACP, resECX;
1672    Bool ok
1673       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1674                        zmask_from_V128(argLU),
1675                        zmask_from_V128(argRU),
1676                        0x42, False/*!isSTRM*/
1677         );
1678    assert(ok);
1679    resECX = resV.uInt[0];
1680    return (resOSZACP << 16) | resECX;
1681 }
1682 
istri_42(void)1683 void istri_42 ( void )
1684 {
1685    char* wot = "42";
1686    UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1687    UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1688 
1689    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1690    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1691    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1692    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1693 
1694    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1695    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1696    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1697    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1698    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1699 
1700    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1702    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1703    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1704 
1705    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1706    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1707 
1708    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1709    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1710    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1711    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1712 
1713    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1714 
1715    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1716    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1717 }
1718 
1719 
1720 //////////////////////////////////////////////////////////
1721 //                                                      //
1722 //                       ISTRI_0E                       //
1723 //                                                      //
1724 //////////////////////////////////////////////////////////
1725 
1726 __attribute__((noinline))
h_pcmpistri_0E(V128 * argL,V128 * argR)1727 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1728 {
1729    V128 block[2];
1730    memcpy(&block[0], argL, sizeof(V128));
1731    memcpy(&block[1], argR, sizeof(V128));
1732    ULong res = 0, flags = 0;
1733    __asm__ __volatile__(
1734       "movdqu    0(%2),  %%xmm2"            "\n\t"
1735       "movdqu    16(%2), %%xmm11"           "\n\t"
1736       "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
1737       "pushfq"                              "\n\t"
1738       "popq      %%rdx"                     "\n\t"
1739       "movq      %%rcx,  %0"                "\n\t"
1740       "movq      %%rdx,  %1"                "\n\t"
1741       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1742       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1743    );
1744    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1745 }
1746 
s_pcmpistri_0E(V128 * argLU,V128 * argRU)1747 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1748 {
1749    V128 resV;
1750    UInt resOSZACP, resECX;
1751    Bool ok
1752       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1753                        zmask_from_V128(argLU),
1754                        zmask_from_V128(argRU),
1755                        0x0E, False/*!isSTRM*/
1756         );
1757    assert(ok);
1758    resECX = resV.uInt[0];
1759    return (resOSZACP << 16) | resECX;
1760 }
1761 
istri_0E(void)1762 void istri_0E ( void )
1763 {
1764    char* wot = "0E";
1765    UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1766    UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1767 
1768    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1769 
1770    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1771 
1772    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1773    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1774    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1775 
1776    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1777 
1778    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1779    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1780    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1781    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1782    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1783 
1784    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1785    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1786    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1787 
1788    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1789    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1790 
1791    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1792    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1793    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1794 
1795    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1796    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1797    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1798 
1799    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1800    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1801    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1802    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1803 }
1804 
1805 
1806 //////////////////////////////////////////////////////////
1807 //                                                      //
1808 //                       ISTRI_34                       //
1809 //                                                      //
1810 //////////////////////////////////////////////////////////
1811 
h_pcmpistri_34(V128 * argL,V128 * argR)1812 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1813 {
1814    V128 block[2];
1815    memcpy(&block[0], argL, sizeof(V128));
1816    memcpy(&block[1], argR, sizeof(V128));
1817    ULong res, flags;
1818    __asm__ __volatile__(
1819       "subq      $1024,  %%rsp"             "\n\t"
1820       "movdqu    0(%2),  %%xmm2"            "\n\t"
1821       "movdqu    16(%2), %%xmm11"           "\n\t"
1822       "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
1823       "pushfq"                              "\n\t"
1824       "popq      %%rdx"                     "\n\t"
1825       "movq      %%rcx,  %0"                "\n\t"
1826       "movq      %%rdx,  %1"                "\n\t"
1827       "addq      $1024,  %%rsp"             "\n\t"
1828       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1829       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1830    );
1831    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1832 }
1833 
s_pcmpistri_34(V128 * argLU,V128 * argRU)1834 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1835 {
1836    V128 resV;
1837    UInt resOSZACP, resECX;
1838    Bool ok
1839       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1840                        zmask_from_V128(argLU),
1841                        zmask_from_V128(argRU),
1842                        0x34, False/*!isSTRM*/
1843         );
1844    assert(ok);
1845    resECX = resV.uInt[0];
1846    return (resOSZACP << 16) | resECX;
1847 }
1848 
istri_34(void)1849 void istri_34 ( void )
1850 {
1851    char* wot = "34";
1852    UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1853    UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1854 
1855    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1856    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1857    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1858    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1859 
1860    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1861    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1862    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1863    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1864    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1865 
1866    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1867 
1868    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1869    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1870    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1871 
1872    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1873    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1874    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1875 
1876    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1877    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1878 
1879    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1880    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1881 
1882    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1883    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1884    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1885    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1886 }
1887 
1888 
1889 //////////////////////////////////////////////////////////
1890 //                                                      //
1891 //                       ISTRI_14                       //
1892 //                                                      //
1893 //////////////////////////////////////////////////////////
1894 
h_pcmpistri_14(V128 * argL,V128 * argR)1895 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1896 {
1897    V128 block[2];
1898    memcpy(&block[0], argL, sizeof(V128));
1899    memcpy(&block[1], argR, sizeof(V128));
1900    ULong res, flags;
1901    __asm__ __volatile__(
1902       "subq      $1024,  %%rsp"             "\n\t"
1903       "movdqu    0(%2),  %%xmm2"            "\n\t"
1904       "movdqu    16(%2), %%xmm11"           "\n\t"
1905       "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
1906       "pushfq"                              "\n\t"
1907       "popq      %%rdx"                     "\n\t"
1908       "movq      %%rcx,  %0"                "\n\t"
1909       "movq      %%rdx,  %1"                "\n\t"
1910       "addq      $1024,  %%rsp"             "\n\t"
1911       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1912       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1913    );
1914    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1915 }
1916 
s_pcmpistri_14(V128 * argLU,V128 * argRU)1917 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1918 {
1919    V128 resV;
1920    UInt resOSZACP, resECX;
1921    Bool ok
1922       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1923                        zmask_from_V128(argLU),
1924                        zmask_from_V128(argRU),
1925                        0x14, False/*!isSTRM*/
1926         );
1927    assert(ok);
1928    resECX = resV.uInt[0];
1929    return (resOSZACP << 16) | resECX;
1930 }
1931 
istri_14(void)1932 void istri_14 ( void )
1933 {
1934    char* wot = "14";
1935    UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1936    UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1937 
1938    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1939    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1940    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1941    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1942 
1943    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1944    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1945    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1946    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1947    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1948 
1949    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1950 
1951    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1952    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1953    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1954 
1955    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1956    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1957    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1958 
1959    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1960    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1961 
1962    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1963    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1964 
1965    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1966    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1967    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1968    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1969 }
1970 
1971 
1972 //////////////////////////////////////////////////////////
1973 //                                                      //
1974 //                         main                         //
1975 //                                                      //
1976 //////////////////////////////////////////////////////////
1977 
main(void)1978 int main ( void )
1979 {
1980    istri_4A();
1981    istri_3A();
1982    istri_08();
1983    istri_18();
1984    istri_1A();
1985    istri_02();
1986    istri_0C();
1987    istri_12();
1988    istri_44();
1989    istri_00();
1990    istri_38();
1991    istri_46();
1992    istri_30();
1993    istri_40();
1994    istri_42();
1995    istri_0E();
1996    istri_14();
1997    istri_34();
1998    return 0;
1999 }
2000