1
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
4 aspect. */
5
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef signed char Char;
14 typedef unsigned long long int ULong;
15 typedef UChar Bool;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
18
19 //typedef unsigned char V128[16];
20 typedef
21 union {
22 UChar uChar[16];
23 UInt uInt[4];
24 }
25 V128;
26
27 #define SHIFT_O 11
28 #define SHIFT_S 7
29 #define SHIFT_Z 6
30 #define SHIFT_A 4
31 #define SHIFT_C 0
32 #define SHIFT_P 2
33
34 #define MASK_O (1ULL << SHIFT_O)
35 #define MASK_S (1ULL << SHIFT_S)
36 #define MASK_Z (1ULL << SHIFT_Z)
37 #define MASK_A (1ULL << SHIFT_A)
38 #define MASK_C (1ULL << SHIFT_C)
39 #define MASK_P (1ULL << SHIFT_P)
40
41
clz32(UInt x)42 UInt clz32 ( UInt x )
43 {
44 Int y, m, n;
45 y = -(x >> 16);
46 m = (y >> 16) & 16;
47 n = 16 - m;
48 x = x >> m;
49 y = x - 0x100;
50 m = (y >> 16) & 8;
51 n = n + m;
52 x = x << m;
53 y = x - 0x1000;
54 m = (y >> 16) & 4;
55 n = n + m;
56 x = x << m;
57 y = x - 0x4000;
58 m = (y >> 16) & 2;
59 n = n + m;
60 x = x << m;
61 y = x >> 14;
62 m = y & ~(y >> 1);
63 return n + 2 - m;
64 }
65
ctz32(UInt x)66 UInt ctz32 ( UInt x )
67 {
68 return 32 - clz32((~x) & (x-1));
69 }
70
expand(V128 * dst,char * summary)71 void expand ( V128* dst, char* summary )
72 {
73 Int i;
74 assert( strlen(summary) == 16 );
75 for (i = 0; i < 16; i++) {
76 UChar xx = 0;
77 UChar x = summary[15-i];
78 if (x >= '0' && x <= '9') { xx = x - '0'; }
79 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81 else assert(0);
82
83 assert(xx < 16);
84 xx = (xx << 4) | xx;
85 assert(xx < 256);
86 dst->uChar[i] = xx;
87 }
88 }
89
try_istri(char * which,UInt (* h_fn)(V128 *,V128 *),UInt (* s_fn)(V128 *,V128 *),char * summL,char * summR)90 void try_istri ( char* which,
91 UInt(*h_fn)(V128*,V128*),
92 UInt(*s_fn)(V128*,V128*),
93 char* summL, char* summR )
94 {
95 assert(strlen(which) == 2);
96 V128 argL, argR;
97 expand(&argL, summL);
98 expand(&argR, summR);
99 UInt h_res = h_fn(&argL, &argR);
100 UInt s_res = s_fn(&argL, &argR);
101 printf("istri %s %s %s -> %08x %08x %s\n",
102 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103 }
104
zmask_from_V128(V128 * arg)105 UInt zmask_from_V128 ( V128* arg )
106 {
107 UInt i, res = 0;
108 for (i = 0; i < 16; i++) {
109 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i;
110 }
111 return res;
112 }
113
114 //////////////////////////////////////////////////////////
115 // //
116 // GENERAL //
117 // //
118 //////////////////////////////////////////////////////////
119
120
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122 basically), generate an I format (index value for ECX) output, and
123 also the new OSZACP flags.
124 */
125 static
pcmpXstrX_WRK_gen_output_fmt_I(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx)126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127 /*OUT*/UInt* resOSZACP,
128 UInt intRes1,
129 UInt zmaskL, UInt zmaskR,
130 UInt validL,
131 UInt pol, UInt idx )
132 {
133 assert((pol >> 2) == 0);
134 assert((idx >> 1) == 0);
135
136 UInt intRes2 = 0;
137 switch (pol) {
138 case 0: intRes2 = intRes1; break; // pol +
139 case 1: intRes2 = ~intRes1; break; // pol -
140 case 2: intRes2 = intRes1; break; // pol m+
141 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142 }
143 intRes2 &= 0xFFFF;
144
145 // generate ecx value
146 UInt newECX = 0;
147 if (idx) {
148 // index of ms-1-bit
149 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150 } else {
151 // index of ls-1-bit
152 newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153 }
154
155 *(UInt*)(&resV[0]) = newECX;
156
157 // generate new flags, common to all ISTRI and ISTRM cases
158 *resOSZACP // A, P are zero
159 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
162 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
163 }
164
165
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167 variants.
168
169 For xSTRI variants, the new ECX value is placed in the 32 bits
170 pointed to by *resV. For xSTRM variants, the result is a 128 bit
171 value and is placed at *resV in the obvious way.
172
173 For all variants, the new OSZACP value is placed at *resOSZACP.
174
175 argLV and argRV are the vector args. The caller must prepare a
176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
177 must be 1 for each zero byte of of the respective arg. For ESTRx
178 variants this is derived from the explicit length indication, and
179 must be 0 in all places except at the bit index corresponding to
180 the valid length (0 .. 16). If the valid length is 16 then the
181 mask must be all zeroes. In all cases, bits 31:16 must be zero.
182
183 imm8 is the original immediate from the instruction. isSTRM
184 indicates whether this is a xSTRM or xSTRI variant, which controls
185 how much of *res is written.
186
187 If the given imm8 case can be handled, the return value is True.
188 If not, False is returned, and neither *res not *resOSZACP are
189 altered.
190 */
191
pcmpXstrX_WRK(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isSTRM)192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193 /*OUT*/UInt* resOSZACP,
194 V128* argLV, V128* argRV,
195 UInt zmaskL, UInt zmaskR,
196 UInt imm8, Bool isSTRM )
197 {
198 assert(imm8 < 0x80);
199 assert((zmaskL >> 16) == 0);
200 assert((zmaskR >> 16) == 0);
201
202 /* Explicitly reject any imm8 values that haven't been validated,
203 even if they would probably work. Life is too short to have
204 unvalidated cases in the code base. */
205 switch (imm8) {
206 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
207 case 0x12: case 0x14: case 0x18: case 0x1A:
208 case 0x30: case 0x34: case 0x38: case 0x3A:
209 case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
210 break;
211 default:
212 return False;
213 }
214
215 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
216 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
217 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
218 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
219
220 /*----------------------------------------*/
221 /*-- strcmp on byte data --*/
222 /*----------------------------------------*/
223
224 if (agg == 2/*equal each, aka strcmp*/
225 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226 && !isSTRM) {
227 Int i;
228 UChar* argL = (UChar*)argLV;
229 UChar* argR = (UChar*)argRV;
230 UInt boolResII = 0;
231 for (i = 15; i >= 0; i--) {
232 UChar cL = argL[i];
233 UChar cR = argR[i];
234 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
235 }
236 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
237 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
238
239 // do invalidation, common to all equal-each cases
240 UInt intRes1
241 = (boolResII & validL & validR) // if both valid, use cmpres
242 | (~ (validL | validR)); // if both invalid, force 1
243 // else force 0
244 intRes1 &= 0xFFFF;
245
246 // generate I-format output
247 pcmpXstrX_WRK_gen_output_fmt_I(
248 resV, resOSZACP,
249 intRes1, zmaskL, zmaskR, validL, pol, idx
250 );
251
252 return True;
253 }
254
255 /*----------------------------------------*/
256 /*-- set membership on byte data --*/
257 /*----------------------------------------*/
258
259 if (agg == 0/*equal any, aka find chars in a set*/
260 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
261 && !isSTRM) {
262 /* argL: the string, argR: charset */
263 UInt si, ci;
264 UChar* argL = (UChar*)argLV;
265 UChar* argR = (UChar*)argRV;
266 UInt boolRes = 0;
267 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
268 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
269
270 for (si = 0; si < 16; si++) {
271 if ((validL & (1 << si)) == 0)
272 // run off the end of the string.
273 break;
274 UInt m = 0;
275 for (ci = 0; ci < 16; ci++) {
276 if ((validR & (1 << ci)) == 0) break;
277 if (argR[ci] == argL[si]) { m = 1; break; }
278 }
279 boolRes |= (m << si);
280 }
281
282 // boolRes is "pre-invalidated"
283 UInt intRes1 = boolRes & 0xFFFF;
284
285 // generate I-format output
286 pcmpXstrX_WRK_gen_output_fmt_I(
287 resV, resOSZACP,
288 intRes1, zmaskL, zmaskR, validL, pol, idx
289 );
290
291 return True;
292 }
293
294 /*----------------------------------------*/
295 /*-- substring search on byte data --*/
296 /*----------------------------------------*/
297
298 if (agg == 3/*equal ordered, aka substring search*/
299 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300 && !isSTRM) {
301
302 /* argL: haystack, argR: needle */
303 UInt ni, hi;
304 UChar* argL = (UChar*)argLV;
305 UChar* argR = (UChar*)argRV;
306 UInt boolRes = 0;
307 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
308 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
309 for (hi = 0; hi < 16; hi++) {
310 UInt m = 1;
311 for (ni = 0; ni < 16; ni++) {
312 if ((validR & (1 << ni)) == 0) break;
313 UInt i = ni + hi;
314 if (i >= 16) break;
315 if (argL[i] != argR[ni]) { m = 0; break; }
316 }
317 boolRes |= (m << hi);
318 if ((validL & (1 << hi)) == 0)
319 // run off the end of the haystack
320 break;
321 }
322
323 // boolRes is "pre-invalidated"
324 UInt intRes1 = boolRes & 0xFFFF;
325
326 // generate I-format output
327 pcmpXstrX_WRK_gen_output_fmt_I(
328 resV, resOSZACP,
329 intRes1, zmaskL, zmaskR, validL, pol, idx
330 );
331
332 return True;
333 }
334
335 /*----------------------------------------*/
336 /*-- ranges, unsigned byte data --*/
337 /*----------------------------------------*/
338
339 if (agg == 1/*ranges*/
340 && fmt == 0/*ub*/
341 && !isSTRM) {
342
343 /* argL: string, argR: range-pairs */
344 UInt ri, si;
345 UChar* argL = (UChar*)argLV;
346 UChar* argR = (UChar*)argRV;
347 UInt boolRes = 0;
348 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
349 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
350 for (si = 0; si < 16; si++) {
351 if ((validL & (1 << si)) == 0)
352 // run off the end of the string
353 break;
354 UInt m = 0;
355 for (ri = 0; ri < 16; ri += 2) {
356 if ((validR & (3 << ri)) != (3 << ri)) break;
357 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
358 m = 1; break;
359 }
360 }
361 boolRes |= (m << si);
362 }
363
364 // boolRes is "pre-invalidated"
365 UInt intRes1 = boolRes & 0xFFFF;
366
367 // generate I-format output
368 pcmpXstrX_WRK_gen_output_fmt_I(
369 resV, resOSZACP,
370 intRes1, zmaskL, zmaskR, validL, pol, idx
371 );
372
373 return True;
374 }
375
376 /*----------------------------------------*/
377 /*-- ranges, signed byte data --*/
378 /*----------------------------------------*/
379
380 if (agg == 1/*ranges*/
381 && fmt == 2/*sb*/
382 && !isSTRM) {
383
384 /* argL: string, argR: range-pairs */
385 UInt ri, si;
386 Char* argL = (Char*)argLV;
387 Char* argR = (Char*)argRV;
388 UInt boolRes = 0;
389 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
390 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
391 for (si = 0; si < 16; si++) {
392 if ((validL & (1 << si)) == 0)
393 // run off the end of the string
394 break;
395 UInt m = 0;
396 for (ri = 0; ri < 16; ri += 2) {
397 if ((validR & (3 << ri)) != (3 << ri)) break;
398 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
399 m = 1; break;
400 }
401 }
402 boolRes |= (m << si);
403 }
404
405 // boolRes is "pre-invalidated"
406 UInt intRes1 = boolRes & 0xFFFF;
407
408 // generate I-format output
409 pcmpXstrX_WRK_gen_output_fmt_I(
410 resV, resOSZACP,
411 intRes1, zmaskL, zmaskR, validL, pol, idx
412 );
413
414 return True;
415 }
416
417 return False;
418 }
419
420
421 //////////////////////////////////////////////////////////
422 // //
423 // ISTRI_4A //
424 // //
425 //////////////////////////////////////////////////////////
426
h_pcmpistri_4A(V128 * argL,V128 * argR)427 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
428 {
429 V128 block[2];
430 memcpy(&block[0], argL, sizeof(V128));
431 memcpy(&block[1], argR, sizeof(V128));
432 ULong res, flags;
433 __asm__ __volatile__(
434 "subq $1024, %%rsp" "\n\t"
435 "movdqu 0(%2), %%xmm2" "\n\t"
436 "movdqu 16(%2), %%xmm11" "\n\t"
437 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
438 "pushfq" "\n\t"
439 "popq %%rdx" "\n\t"
440 "movq %%rcx, %0" "\n\t"
441 "movq %%rdx, %1" "\n\t"
442 "addq $1024, %%rsp" "\n\t"
443 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
444 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
445 );
446 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
447 }
448
s_pcmpistri_4A(V128 * argLU,V128 * argRU)449 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
450 {
451 V128 resV;
452 UInt resOSZACP, resECX;
453 Bool ok
454 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
455 zmask_from_V128(argLU),
456 zmask_from_V128(argRU),
457 0x4A, False/*!isSTRM*/
458 );
459 assert(ok);
460 resECX = resV.uInt[0];
461 return (resOSZACP << 16) | resECX;
462 }
463
istri_4A(void)464 void istri_4A ( void )
465 {
466 char* wot = "4A";
467 UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
468 UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
469
470 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
471
472 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
473 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
474 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
475 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
476
477 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
478 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
479 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
480
481 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
482 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
483 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
484 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
485
486 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
487 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
488 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
489
490 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491
492 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
493 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
494 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
495
496 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
497 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
498 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
499
500 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
501 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
502 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
503
504 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
505 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
506 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
507
508 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
509 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
510 }
511
512 //////////////////////////////////////////////////////////
513 // //
514 // ISTRI_3A //
515 // //
516 //////////////////////////////////////////////////////////
517
h_pcmpistri_3A(V128 * argL,V128 * argR)518 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
519 {
520 V128 block[2];
521 memcpy(&block[0], argL, sizeof(V128));
522 memcpy(&block[1], argR, sizeof(V128));
523 ULong res, flags;
524 __asm__ __volatile__(
525 "subq $1024, %%rsp" "\n\t"
526 "movdqu 0(%2), %%xmm2" "\n\t"
527 "movdqu 16(%2), %%xmm11" "\n\t"
528 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
529 "pushfq" "\n\t"
530 "popq %%rdx" "\n\t"
531 "movq %%rcx, %0" "\n\t"
532 "movq %%rdx, %1" "\n\t"
533 "addq $1024, %%rsp" "\n\t"
534 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
535 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
536 );
537 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
538 }
539
s_pcmpistri_3A(V128 * argLU,V128 * argRU)540 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
541 {
542 V128 resV;
543 UInt resOSZACP, resECX;
544 Bool ok
545 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
546 zmask_from_V128(argLU),
547 zmask_from_V128(argRU),
548 0x3A, False/*!isSTRM*/
549 );
550 assert(ok);
551 resECX = resV.uInt[0];
552 return (resOSZACP << 16) | resECX;
553 }
554
istri_3A(void)555 void istri_3A ( void )
556 {
557 char* wot = "3A";
558 UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
559 UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
560
561 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
562
563 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
564 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
565 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
566 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
567
568 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
569 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
570 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
571
572 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
573 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
574 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
575 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
576
577 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
578 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
579 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
580
581 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582
583 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
584 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
585 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
586
587 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
588 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
589 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
590
591 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
592 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
593 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
594
595 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
596 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
597 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
598
599 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
600 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
601 }
602
603
604
605 //////////////////////////////////////////////////////////
606 // //
607 // ISTRI_0C //
608 // //
609 //////////////////////////////////////////////////////////
610
611 __attribute__((noinline))
h_pcmpistri_0C(V128 * argL,V128 * argR)612 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
613 {
614 V128 block[2];
615 memcpy(&block[0], argL, sizeof(V128));
616 memcpy(&block[1], argR, sizeof(V128));
617 ULong res = 0, flags = 0;
618 __asm__ __volatile__(
619 "movdqu 0(%2), %%xmm2" "\n\t"
620 "movdqu 16(%2), %%xmm11" "\n\t"
621 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
622 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
623 //"movd %%xmm0, %%ecx" "\n\t"
624 "pushfq" "\n\t"
625 "popq %%rdx" "\n\t"
626 "movq %%rcx, %0" "\n\t"
627 "movq %%rdx, %1" "\n\t"
628 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
629 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
630 );
631 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
632 }
633
s_pcmpistri_0C(V128 * argLU,V128 * argRU)634 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
635 {
636 V128 resV;
637 UInt resOSZACP, resECX;
638 Bool ok
639 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
640 zmask_from_V128(argLU),
641 zmask_from_V128(argRU),
642 0x0C, False/*!isSTRM*/
643 );
644 assert(ok);
645 resECX = resV.uInt[0];
646 return (resOSZACP << 16) | resECX;
647 }
648
istri_0C(void)649 void istri_0C ( void )
650 {
651 char* wot = "0C";
652 UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
653 UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
654
655 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
656
657 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
658
659 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
660 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
661 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
662
663 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
664
665 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
666 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
667 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
668 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
669 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
670
671 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
672 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
673 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
674
675 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
676 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
677
678 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
679 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
680 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
681
682 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
683 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
684 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
685
686 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
687 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
688 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
689 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
690 }
691
692
693 //////////////////////////////////////////////////////////
694 // //
695 // ISTRI_08 //
696 // //
697 //////////////////////////////////////////////////////////
698
h_pcmpistri_08(V128 * argL,V128 * argR)699 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
700 {
701 V128 block[2];
702 memcpy(&block[0], argL, sizeof(V128));
703 memcpy(&block[1], argR, sizeof(V128));
704 ULong res, flags;
705 __asm__ __volatile__(
706 "subq $1024, %%rsp" "\n\t"
707 "movdqu 0(%2), %%xmm2" "\n\t"
708 "movdqu 16(%2), %%xmm11" "\n\t"
709 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
710 "pushfq" "\n\t"
711 "popq %%rdx" "\n\t"
712 "movq %%rcx, %0" "\n\t"
713 "movq %%rdx, %1" "\n\t"
714 "addq $1024, %%rsp" "\n\t"
715 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
716 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
717 );
718 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
719 }
720
s_pcmpistri_08(V128 * argLU,V128 * argRU)721 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
722 {
723 V128 resV;
724 UInt resOSZACP, resECX;
725 Bool ok
726 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
727 zmask_from_V128(argLU),
728 zmask_from_V128(argRU),
729 0x08, False/*!isSTRM*/
730 );
731 assert(ok);
732 resECX = resV.uInt[0];
733 return (resOSZACP << 16) | resECX;
734 }
735
istri_08(void)736 void istri_08 ( void )
737 {
738 char* wot = "08";
739 UInt(*h)(V128*,V128*) = h_pcmpistri_08;
740 UInt(*s)(V128*,V128*) = s_pcmpistri_08;
741
742 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
743
744 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
745 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
746 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
747 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
748
749 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
750 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
751 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
752
753 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
754 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
755 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
756 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
757
758 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
759 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
760 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
761
762 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763
764 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
765 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
766 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
767
768 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
769 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
770 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
771
772 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
773 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
774 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
775
776 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
777 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
778 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
779
780 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
781 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
782 }
783
784
785
786 //////////////////////////////////////////////////////////
787 // //
788 // ISTRI_18 //
789 // //
790 //////////////////////////////////////////////////////////
791
h_pcmpistri_18(V128 * argL,V128 * argR)792 UInt h_pcmpistri_18 ( V128* argL, V128* argR )
793 {
794 V128 block[2];
795 memcpy(&block[0], argL, sizeof(V128));
796 memcpy(&block[1], argR, sizeof(V128));
797 ULong res, flags;
798 __asm__ __volatile__(
799 "subq $1024, %%rsp" "\n\t"
800 "movdqu 0(%2), %%xmm2" "\n\t"
801 "movdqu 16(%2), %%xmm11" "\n\t"
802 "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t"
803 "pushfq" "\n\t"
804 "popq %%rdx" "\n\t"
805 "movq %%rcx, %0" "\n\t"
806 "movq %%rdx, %1" "\n\t"
807 "addq $1024, %%rsp" "\n\t"
808 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
809 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
810 );
811 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
812 }
813
s_pcmpistri_18(V128 * argLU,V128 * argRU)814 UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
815 {
816 V128 resV;
817 UInt resOSZACP, resECX;
818 Bool ok
819 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
820 zmask_from_V128(argLU),
821 zmask_from_V128(argRU),
822 0x18, False/*!isSTRM*/
823 );
824 assert(ok);
825 resECX = resV.uInt[0];
826 return (resOSZACP << 16) | resECX;
827 }
828
istri_18(void)829 void istri_18 ( void )
830 {
831 char* wot = "18";
832 UInt(*h)(V128*,V128*) = h_pcmpistri_18;
833 UInt(*s)(V128*,V128*) = s_pcmpistri_18;
834
835 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
836
837 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
838 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
839 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
840 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
841
842 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
843 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
844 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
845
846 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
847 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
848 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
849 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
850
851 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
852 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
853 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
854
855 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856
857 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
858 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
859 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
860
861 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
862 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
863 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
864
865 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
866 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
867 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
868
869 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
870 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
871 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
872
873 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
874 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
875 }
876
877
878
879 //////////////////////////////////////////////////////////
880 // //
881 // ISTRI_1A //
882 // //
883 //////////////////////////////////////////////////////////
884
h_pcmpistri_1A(V128 * argL,V128 * argR)885 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
886 {
887 V128 block[2];
888 memcpy(&block[0], argL, sizeof(V128));
889 memcpy(&block[1], argR, sizeof(V128));
890 ULong res, flags;
891 __asm__ __volatile__(
892 "subq $1024, %%rsp" "\n\t"
893 "movdqu 0(%2), %%xmm2" "\n\t"
894 "movdqu 16(%2), %%xmm11" "\n\t"
895 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
896 "pushfq" "\n\t"
897 "popq %%rdx" "\n\t"
898 "movq %%rcx, %0" "\n\t"
899 "movq %%rdx, %1" "\n\t"
900 "addq $1024, %%rsp" "\n\t"
901 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
902 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
903 );
904 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
905 }
906
s_pcmpistri_1A(V128 * argLU,V128 * argRU)907 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
908 {
909 V128 resV;
910 UInt resOSZACP, resECX;
911 Bool ok
912 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
913 zmask_from_V128(argLU),
914 zmask_from_V128(argRU),
915 0x1A, False/*!isSTRM*/
916 );
917 assert(ok);
918 resECX = resV.uInt[0];
919 return (resOSZACP << 16) | resECX;
920 }
921
istri_1A(void)922 void istri_1A ( void )
923 {
924 char* wot = "1A";
925 UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
926 UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
927
928 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
929
930 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
931 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
932 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
933 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
934
935 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
936 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
937 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
938
939 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
940 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
941 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
942 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
943
944 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
945 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
946 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
947
948 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
949
950 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
951 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
952 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
953
954 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
955 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
956 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
957
958 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
959 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
960 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
961
962 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
963 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
964 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
965
966 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
967 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
968 }
969
970
971
972 //////////////////////////////////////////////////////////
973 // //
974 // ISTRI_02 //
975 // //
976 //////////////////////////////////////////////////////////
977
h_pcmpistri_02(V128 * argL,V128 * argR)978 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
979 {
980 V128 block[2];
981 memcpy(&block[0], argL, sizeof(V128));
982 memcpy(&block[1], argR, sizeof(V128));
983 ULong res, flags;
984 __asm__ __volatile__(
985 "subq $1024, %%rsp" "\n\t"
986 "movdqu 0(%2), %%xmm2" "\n\t"
987 "movdqu 16(%2), %%xmm11" "\n\t"
988 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
989 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
990 //"movd %%xmm0, %%ecx" "\n\t"
991 "pushfq" "\n\t"
992 "popq %%rdx" "\n\t"
993 "movq %%rcx, %0" "\n\t"
994 "movq %%rdx, %1" "\n\t"
995 "addq $1024, %%rsp" "\n\t"
996 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
997 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
998 );
999 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1000 }
1001
s_pcmpistri_02(V128 * argLU,V128 * argRU)1002 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1003 {
1004 V128 resV;
1005 UInt resOSZACP, resECX;
1006 Bool ok
1007 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1008 zmask_from_V128(argLU),
1009 zmask_from_V128(argRU),
1010 0x02, False/*!isSTRM*/
1011 );
1012 assert(ok);
1013 resECX = resV.uInt[0];
1014 return (resOSZACP << 16) | resECX;
1015 }
1016
istri_02(void)1017 void istri_02 ( void )
1018 {
1019 char* wot = "02";
1020 UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1021 UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1022
1023 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1024 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1025 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1026 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1027
1028 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1029 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1030 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1031 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1032 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1033
1034 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1036 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1037 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1038
1039 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1040 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1041
1042 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1043 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1044 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1045 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1046
1047 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1048
1049 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1050 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1051 }
1052
1053
1054 //////////////////////////////////////////////////////////
1055 // //
1056 // ISTRI_12 //
1057 // //
1058 //////////////////////////////////////////////////////////
1059
h_pcmpistri_12(V128 * argL,V128 * argR)1060 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1061 {
1062 V128 block[2];
1063 memcpy(&block[0], argL, sizeof(V128));
1064 memcpy(&block[1], argR, sizeof(V128));
1065 ULong res, flags;
1066 __asm__ __volatile__(
1067 "subq $1024, %%rsp" "\n\t"
1068 "movdqu 0(%2), %%xmm2" "\n\t"
1069 "movdqu 16(%2), %%xmm11" "\n\t"
1070 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
1071 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
1072 //"movd %%xmm0, %%ecx" "\n\t"
1073 "pushfq" "\n\t"
1074 "popq %%rdx" "\n\t"
1075 "movq %%rcx, %0" "\n\t"
1076 "movq %%rdx, %1" "\n\t"
1077 "addq $1024, %%rsp" "\n\t"
1078 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1079 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1080 );
1081 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1082 }
1083
s_pcmpistri_12(V128 * argLU,V128 * argRU)1084 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1085 {
1086 V128 resV;
1087 UInt resOSZACP, resECX;
1088 Bool ok
1089 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1090 zmask_from_V128(argLU),
1091 zmask_from_V128(argRU),
1092 0x12, False/*!isSTRM*/
1093 );
1094 assert(ok);
1095 resECX = resV.uInt[0];
1096 return (resOSZACP << 16) | resECX;
1097 }
1098
istri_12(void)1099 void istri_12 ( void )
1100 {
1101 char* wot = "12";
1102 UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1103 UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1104
1105 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1106 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1107 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1108 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1109
1110 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1111 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1112 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1113 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1114 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1115
1116 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1118 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1119 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1120
1121 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1122 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1123
1124 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1125 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1126 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1127 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1128
1129 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1130
1131 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1132 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1133 }
1134
1135
1136
1137 //////////////////////////////////////////////////////////
1138 // //
1139 // ISTRI_44 //
1140 // //
1141 //////////////////////////////////////////////////////////
1142
h_pcmpistri_44(V128 * argL,V128 * argR)1143 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1144 {
1145 V128 block[2];
1146 memcpy(&block[0], argL, sizeof(V128));
1147 memcpy(&block[1], argR, sizeof(V128));
1148 ULong res, flags;
1149 __asm__ __volatile__(
1150 "subq $1024, %%rsp" "\n\t"
1151 "movdqu 0(%2), %%xmm2" "\n\t"
1152 "movdqu 16(%2), %%xmm11" "\n\t"
1153 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
1154 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
1155 //"movd %%xmm0, %%ecx" "\n\t"
1156 "pushfq" "\n\t"
1157 "popq %%rdx" "\n\t"
1158 "movq %%rcx, %0" "\n\t"
1159 "movq %%rdx, %1" "\n\t"
1160 "addq $1024, %%rsp" "\n\t"
1161 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1162 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1163 );
1164 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1165 }
1166
s_pcmpistri_44(V128 * argLU,V128 * argRU)1167 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1168 {
1169 V128 resV;
1170 UInt resOSZACP, resECX;
1171 Bool ok
1172 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1173 zmask_from_V128(argLU),
1174 zmask_from_V128(argRU),
1175 0x44, False/*!isSTRM*/
1176 );
1177 assert(ok);
1178 resECX = resV.uInt[0];
1179 return (resOSZACP << 16) | resECX;
1180 }
1181
istri_44(void)1182 void istri_44 ( void )
1183 {
1184 char* wot = "44";
1185 UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1186 UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1187
1188 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1189 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1190 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1191 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1192
1193 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1194 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1195 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1196 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1197 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1198
1199 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1200
1201 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1202 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1203 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1204
1205 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1206 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1207 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1208
1209 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1210 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1211
1212 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1213 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1214
1215 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1216 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1217 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1218 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1219 }
1220
1221
1222 //////////////////////////////////////////////////////////
1223 // //
1224 // ISTRI_00 //
1225 // //
1226 //////////////////////////////////////////////////////////
1227
h_pcmpistri_00(V128 * argL,V128 * argR)1228 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1229 {
1230 V128 block[2];
1231 memcpy(&block[0], argL, sizeof(V128));
1232 memcpy(&block[1], argR, sizeof(V128));
1233 ULong res, flags;
1234 __asm__ __volatile__(
1235 "subq $1024, %%rsp" "\n\t"
1236 "movdqu 0(%2), %%xmm2" "\n\t"
1237 "movdqu 16(%2), %%xmm11" "\n\t"
1238 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
1239 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
1240 //"movd %%xmm0, %%ecx" "\n\t"
1241 "pushfq" "\n\t"
1242 "popq %%rdx" "\n\t"
1243 "movq %%rcx, %0" "\n\t"
1244 "movq %%rdx, %1" "\n\t"
1245 "addq $1024, %%rsp" "\n\t"
1246 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1247 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1248 );
1249 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1250 }
1251
s_pcmpistri_00(V128 * argLU,V128 * argRU)1252 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1253 {
1254 V128 resV;
1255 UInt resOSZACP, resECX;
1256 Bool ok
1257 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1258 zmask_from_V128(argLU),
1259 zmask_from_V128(argRU),
1260 0x00, False/*!isSTRM*/
1261 );
1262 assert(ok);
1263 resECX = resV.uInt[0];
1264 return (resOSZACP << 16) | resECX;
1265 }
1266
istri_00(void)1267 void istri_00 ( void )
1268 {
1269 char* wot = "00";
1270 UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1271 UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1272
1273 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1274 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1275 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1276 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1277
1278 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1279 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1280 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1281 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1282 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1283
1284 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1286 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1287 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1288
1289 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1290 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1291
1292 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1293 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1294 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1295 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1296
1297 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1298
1299 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1300 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1301 }
1302
1303
1304 //////////////////////////////////////////////////////////
1305 // //
1306 // ISTRI_38 //
1307 // //
1308 //////////////////////////////////////////////////////////
1309
h_pcmpistri_38(V128 * argL,V128 * argR)1310 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1311 {
1312 V128 block[2];
1313 memcpy(&block[0], argL, sizeof(V128));
1314 memcpy(&block[1], argR, sizeof(V128));
1315 ULong res, flags;
1316 __asm__ __volatile__(
1317 "subq $1024, %%rsp" "\n\t"
1318 "movdqu 0(%2), %%xmm2" "\n\t"
1319 "movdqu 16(%2), %%xmm11" "\n\t"
1320 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t"
1321 "pushfq" "\n\t"
1322 "popq %%rdx" "\n\t"
1323 "movq %%rcx, %0" "\n\t"
1324 "movq %%rdx, %1" "\n\t"
1325 "addq $1024, %%rsp" "\n\t"
1326 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1327 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1328 );
1329 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1330 }
1331
s_pcmpistri_38(V128 * argLU,V128 * argRU)1332 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1333 {
1334 V128 resV;
1335 UInt resOSZACP, resECX;
1336 Bool ok
1337 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1338 zmask_from_V128(argLU),
1339 zmask_from_V128(argRU),
1340 0x38, False/*!isSTRM*/
1341 );
1342 assert(ok);
1343 resECX = resV.uInt[0];
1344 return (resOSZACP << 16) | resECX;
1345 }
1346
istri_38(void)1347 void istri_38 ( void )
1348 {
1349 char* wot = "38";
1350 UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1351 UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1352
1353 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1354
1355 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1356 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1357 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1358 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1359
1360 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1361 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1362 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1363
1364 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1365 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1366 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1367 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1368
1369 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1370 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1371 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1372
1373 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374
1375 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1376 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1377 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1378
1379 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1380 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1381 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1382
1383 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1384 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1385 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1386
1387 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1388 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1389 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1390
1391 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1392 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1393 }
1394
1395
1396
1397 //////////////////////////////////////////////////////////
1398 // //
1399 // ISTRI_46 //
1400 // //
1401 //////////////////////////////////////////////////////////
1402
h_pcmpistri_46(V128 * argL,V128 * argR)1403 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1404 {
1405 V128 block[2];
1406 memcpy(&block[0], argL, sizeof(V128));
1407 memcpy(&block[1], argR, sizeof(V128));
1408 ULong res, flags;
1409 __asm__ __volatile__(
1410 "subq $1024, %%rsp" "\n\t"
1411 "movdqu 0(%2), %%xmm2" "\n\t"
1412 "movdqu 16(%2), %%xmm11" "\n\t"
1413 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t"
1414 "pushfq" "\n\t"
1415 "popq %%rdx" "\n\t"
1416 "movq %%rcx, %0" "\n\t"
1417 "movq %%rdx, %1" "\n\t"
1418 "addq $1024, %%rsp" "\n\t"
1419 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1420 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1421 );
1422 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1423 }
1424
s_pcmpistri_46(V128 * argLU,V128 * argRU)1425 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1426 {
1427 V128 resV;
1428 UInt resOSZACP, resECX;
1429 Bool ok
1430 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1431 zmask_from_V128(argLU),
1432 zmask_from_V128(argRU),
1433 0x46, False/*!isSTRM*/
1434 );
1435 assert(ok);
1436 resECX = resV.uInt[0];
1437 return (resOSZACP << 16) | resECX;
1438 }
1439
istri_46(void)1440 void istri_46 ( void )
1441 {
1442 char* wot = "46";
1443 UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1444 UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1445
1446 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1447 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1448 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1449 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1450
1451 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1452 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1453 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1454 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1455 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1456
1457 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1458
1459 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1460 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1461 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1462
1463 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1464 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1465 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1466
1467 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1468 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1469
1470 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1471 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1472
1473 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1474 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1475 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1476 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1477 }
1478
1479
1480 //////////////////////////////////////////////////////////
1481 // //
1482 // ISTRI_30 //
1483 // //
1484 //////////////////////////////////////////////////////////
1485
h_pcmpistri_30(V128 * argL,V128 * argR)1486 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1487 {
1488 V128 block[2];
1489 memcpy(&block[0], argL, sizeof(V128));
1490 memcpy(&block[1], argR, sizeof(V128));
1491 ULong res, flags;
1492 __asm__ __volatile__(
1493 "subq $1024, %%rsp" "\n\t"
1494 "movdqu 0(%2), %%xmm2" "\n\t"
1495 "movdqu 16(%2), %%xmm11" "\n\t"
1496 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t"
1497 "pushfq" "\n\t"
1498 "popq %%rdx" "\n\t"
1499 "movq %%rcx, %0" "\n\t"
1500 "movq %%rdx, %1" "\n\t"
1501 "addq $1024, %%rsp" "\n\t"
1502 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1503 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1504 );
1505 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1506 }
1507
s_pcmpistri_30(V128 * argLU,V128 * argRU)1508 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1509 {
1510 V128 resV;
1511 UInt resOSZACP, resECX;
1512 Bool ok
1513 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1514 zmask_from_V128(argLU),
1515 zmask_from_V128(argRU),
1516 0x30, False/*!isSTRM*/
1517 );
1518 assert(ok);
1519 resECX = resV.uInt[0];
1520 return (resOSZACP << 16) | resECX;
1521 }
1522
istri_30(void)1523 void istri_30 ( void )
1524 {
1525 char* wot = "30";
1526 UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1527 UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1528
1529 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1530 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1531 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1532 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1533
1534 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1535 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1536 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1537 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1538 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1539
1540 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1542 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1543 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1544
1545 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1546 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1547
1548 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1549 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1550 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1551 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1552
1553 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1554
1555 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1556 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1557 }
1558
1559
1560 //////////////////////////////////////////////////////////
1561 // //
1562 // ISTRI_40 //
1563 // //
1564 //////////////////////////////////////////////////////////
1565
h_pcmpistri_40(V128 * argL,V128 * argR)1566 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1567 {
1568 V128 block[2];
1569 memcpy(&block[0], argL, sizeof(V128));
1570 memcpy(&block[1], argR, sizeof(V128));
1571 ULong res, flags;
1572 __asm__ __volatile__(
1573 "subq $1024, %%rsp" "\n\t"
1574 "movdqu 0(%2), %%xmm2" "\n\t"
1575 "movdqu 16(%2), %%xmm11" "\n\t"
1576 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t"
1577 "pushfq" "\n\t"
1578 "popq %%rdx" "\n\t"
1579 "movq %%rcx, %0" "\n\t"
1580 "movq %%rdx, %1" "\n\t"
1581 "addq $1024, %%rsp" "\n\t"
1582 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1583 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1584 );
1585 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1586 }
1587
s_pcmpistri_40(V128 * argLU,V128 * argRU)1588 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1589 {
1590 V128 resV;
1591 UInt resOSZACP, resECX;
1592 Bool ok
1593 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1594 zmask_from_V128(argLU),
1595 zmask_from_V128(argRU),
1596 0x40, False/*!isSTRM*/
1597 );
1598 assert(ok);
1599 resECX = resV.uInt[0];
1600 return (resOSZACP << 16) | resECX;
1601 }
1602
istri_40(void)1603 void istri_40 ( void )
1604 {
1605 char* wot = "40";
1606 UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1607 UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1608
1609 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1610 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1611 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1612 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1613
1614 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1615 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1616 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1617 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1618 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1619
1620 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1622 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1623 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1624
1625 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1626 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1627
1628 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1629 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1630 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1631 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1632
1633 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1634
1635 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1636 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1637 }
1638
1639
1640 //////////////////////////////////////////////////////////
1641 // //
1642 // ISTRI_42 //
1643 // //
1644 //////////////////////////////////////////////////////////
1645
h_pcmpistri_42(V128 * argL,V128 * argR)1646 UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1647 {
1648 V128 block[2];
1649 memcpy(&block[0], argL, sizeof(V128));
1650 memcpy(&block[1], argR, sizeof(V128));
1651 ULong res, flags;
1652 __asm__ __volatile__(
1653 "subq $1024, %%rsp" "\n\t"
1654 "movdqu 0(%2), %%xmm2" "\n\t"
1655 "movdqu 16(%2), %%xmm11" "\n\t"
1656 "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t"
1657 "pushfq" "\n\t"
1658 "popq %%rdx" "\n\t"
1659 "movq %%rcx, %0" "\n\t"
1660 "movq %%rdx, %1" "\n\t"
1661 "addq $1024, %%rsp" "\n\t"
1662 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1663 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1664 );
1665 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1666 }
1667
s_pcmpistri_42(V128 * argLU,V128 * argRU)1668 UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1669 {
1670 V128 resV;
1671 UInt resOSZACP, resECX;
1672 Bool ok
1673 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1674 zmask_from_V128(argLU),
1675 zmask_from_V128(argRU),
1676 0x42, False/*!isSTRM*/
1677 );
1678 assert(ok);
1679 resECX = resV.uInt[0];
1680 return (resOSZACP << 16) | resECX;
1681 }
1682
istri_42(void)1683 void istri_42 ( void )
1684 {
1685 char* wot = "42";
1686 UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1687 UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1688
1689 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1690 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1691 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1692 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1693
1694 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1695 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1696 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1697 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1698 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1699
1700 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1702 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1703 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1704
1705 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1706 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1707
1708 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1709 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1710 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1711 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1712
1713 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1714
1715 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1716 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1717 }
1718
1719
1720 //////////////////////////////////////////////////////////
1721 // //
1722 // ISTRI_0E //
1723 // //
1724 //////////////////////////////////////////////////////////
1725
1726 __attribute__((noinline))
h_pcmpistri_0E(V128 * argL,V128 * argR)1727 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1728 {
1729 V128 block[2];
1730 memcpy(&block[0], argL, sizeof(V128));
1731 memcpy(&block[1], argR, sizeof(V128));
1732 ULong res = 0, flags = 0;
1733 __asm__ __volatile__(
1734 "movdqu 0(%2), %%xmm2" "\n\t"
1735 "movdqu 16(%2), %%xmm11" "\n\t"
1736 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t"
1737 "pushfq" "\n\t"
1738 "popq %%rdx" "\n\t"
1739 "movq %%rcx, %0" "\n\t"
1740 "movq %%rdx, %1" "\n\t"
1741 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1742 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1743 );
1744 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1745 }
1746
s_pcmpistri_0E(V128 * argLU,V128 * argRU)1747 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1748 {
1749 V128 resV;
1750 UInt resOSZACP, resECX;
1751 Bool ok
1752 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1753 zmask_from_V128(argLU),
1754 zmask_from_V128(argRU),
1755 0x0E, False/*!isSTRM*/
1756 );
1757 assert(ok);
1758 resECX = resV.uInt[0];
1759 return (resOSZACP << 16) | resECX;
1760 }
1761
istri_0E(void)1762 void istri_0E ( void )
1763 {
1764 char* wot = "0E";
1765 UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1766 UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1767
1768 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1769
1770 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1771
1772 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1773 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1774 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1775
1776 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1777
1778 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1779 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1780 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1781 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1782 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1783
1784 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1785 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1786 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1787
1788 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1789 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1790
1791 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1792 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1793 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1794
1795 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1796 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1797 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1798
1799 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1800 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1801 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1802 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1803 }
1804
1805
1806 //////////////////////////////////////////////////////////
1807 // //
1808 // ISTRI_34 //
1809 // //
1810 //////////////////////////////////////////////////////////
1811
h_pcmpistri_34(V128 * argL,V128 * argR)1812 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1813 {
1814 V128 block[2];
1815 memcpy(&block[0], argL, sizeof(V128));
1816 memcpy(&block[1], argR, sizeof(V128));
1817 ULong res, flags;
1818 __asm__ __volatile__(
1819 "subq $1024, %%rsp" "\n\t"
1820 "movdqu 0(%2), %%xmm2" "\n\t"
1821 "movdqu 16(%2), %%xmm11" "\n\t"
1822 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t"
1823 "pushfq" "\n\t"
1824 "popq %%rdx" "\n\t"
1825 "movq %%rcx, %0" "\n\t"
1826 "movq %%rdx, %1" "\n\t"
1827 "addq $1024, %%rsp" "\n\t"
1828 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1829 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1830 );
1831 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1832 }
1833
s_pcmpistri_34(V128 * argLU,V128 * argRU)1834 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1835 {
1836 V128 resV;
1837 UInt resOSZACP, resECX;
1838 Bool ok
1839 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1840 zmask_from_V128(argLU),
1841 zmask_from_V128(argRU),
1842 0x34, False/*!isSTRM*/
1843 );
1844 assert(ok);
1845 resECX = resV.uInt[0];
1846 return (resOSZACP << 16) | resECX;
1847 }
1848
istri_34(void)1849 void istri_34 ( void )
1850 {
1851 char* wot = "34";
1852 UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1853 UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1854
1855 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1856 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1857 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1858 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1859
1860 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1861 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1862 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1863 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1864 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1865
1866 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1867
1868 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1869 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1870 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1871
1872 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1873 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1874 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1875
1876 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1877 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1878
1879 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1880 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1881
1882 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1883 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1884 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1885 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1886 }
1887
1888
1889 //////////////////////////////////////////////////////////
1890 // //
1891 // ISTRI_14 //
1892 // //
1893 //////////////////////////////////////////////////////////
1894
h_pcmpistri_14(V128 * argL,V128 * argR)1895 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1896 {
1897 V128 block[2];
1898 memcpy(&block[0], argL, sizeof(V128));
1899 memcpy(&block[1], argR, sizeof(V128));
1900 ULong res, flags;
1901 __asm__ __volatile__(
1902 "subq $1024, %%rsp" "\n\t"
1903 "movdqu 0(%2), %%xmm2" "\n\t"
1904 "movdqu 16(%2), %%xmm11" "\n\t"
1905 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t"
1906 "pushfq" "\n\t"
1907 "popq %%rdx" "\n\t"
1908 "movq %%rcx, %0" "\n\t"
1909 "movq %%rdx, %1" "\n\t"
1910 "addq $1024, %%rsp" "\n\t"
1911 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1912 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1913 );
1914 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1915 }
1916
s_pcmpistri_14(V128 * argLU,V128 * argRU)1917 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1918 {
1919 V128 resV;
1920 UInt resOSZACP, resECX;
1921 Bool ok
1922 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1923 zmask_from_V128(argLU),
1924 zmask_from_V128(argRU),
1925 0x14, False/*!isSTRM*/
1926 );
1927 assert(ok);
1928 resECX = resV.uInt[0];
1929 return (resOSZACP << 16) | resECX;
1930 }
1931
istri_14(void)1932 void istri_14 ( void )
1933 {
1934 char* wot = "14";
1935 UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1936 UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1937
1938 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1939 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1940 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1941 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1942
1943 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1944 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1945 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1946 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1947 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1948
1949 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1950
1951 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1952 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1953 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1954
1955 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1956 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1957 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1958
1959 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1960 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1961
1962 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1963 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1964
1965 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1966 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1967 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1968 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1969 }
1970
1971
1972 //////////////////////////////////////////////////////////
1973 // //
1974 // main //
1975 // //
1976 //////////////////////////////////////////////////////////
1977
main(void)1978 int main ( void )
1979 {
1980 istri_4A();
1981 istri_3A();
1982 istri_08();
1983 istri_18();
1984 istri_1A();
1985 istri_02();
1986 istri_0C();
1987 istri_12();
1988 istri_44();
1989 istri_00();
1990 istri_38();
1991 istri_46();
1992 istri_30();
1993 istri_40();
1994 istri_42();
1995 istri_0E();
1996 istri_14();
1997 istri_34();
1998 return 0;
1999 }
2000