1 
2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3    check the core arithmetic in any detail.  */
4 
5 #include <string.h>
6 #include <stdio.h>
7 #include <assert.h>
8 
9 typedef  unsigned char  V128[16];
10 typedef  unsigned int   UInt;
11 typedef  signed int     Int;
12 typedef  unsigned char  UChar;
13 typedef  unsigned long long int ULong;
14 typedef  UChar          Bool;
15 #define False ((Bool)0)
16 #define True  ((Bool)1)
17 
show_V128(V128 * vec)18 void show_V128 ( V128* vec )
19 {
20    Int i;
21    for (i = 15; i >= 0; i--)
22       printf("%02x", (UInt)( (*vec)[i] ));
23 }
24 
expand(V128 * dst,char * summary)25 void expand ( V128* dst, char* summary )
26 {
27    Int i;
28    assert( strlen(summary) == 16 );
29    for (i = 0; i < 16; i++) {
30       UChar xx = 0;
31       UChar x = summary[15-i];
32       if      (x >= '0' && x <= '9') { xx = x - '0'; }
33       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
34       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
35       else assert(0);
36 
37       assert(xx < 16);
38       xx = (xx << 4) | xx;
39       assert(xx < 256);
40       (*dst)[i] = xx;
41    }
42 }
43 
one_test(char * summL,ULong rdxIN,char * summR,ULong raxIN)44 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
45 {
46    V128 argL, argR;
47    expand( &argL, summL );
48    expand( &argR, summR );
49    printf("\n");
50    printf("rdx %016llx  argL ", rdxIN);
51    show_V128(&argL);
52    printf("  rax %016llx  argR ", raxIN);
53    show_V128(&argR);
54    printf("\n");
55 
56    ULong block[ 2/*in:argL*/          // 0  0
57                 + 2/*in:argR*/        // 2  16
58                 + 1/*in:rdx*/         // 4  32
59                 + 1/*in:rax*/         // 5  40
60                 + 2/*inout:xmm0*/     // 6  48
61                 + 1/*inout:rcx*/      // 8  64
62                 + 1/*out:rflags*/ ];  // 9  72
63    assert(sizeof(block) == 80);
64 
65    UChar* blockC = (UChar*)&block[0];
66 
67    /* ---------------- ISTRI_4A ---------------- */
68    memset(blockC, 0x55, 80);
69    memcpy(blockC + 0,  &argL,  16);
70    memcpy(blockC + 16, &argR,  16);
71    memcpy(blockC + 24, &rdxIN, 8);
72    memcpy(blockC + 32, &raxIN, 8);
73    memcpy(blockC + 40, &rdxIN, 8);
74    __asm__ __volatile__(
75       "movupd    0(%0), %%xmm2"           "\n\t"
76       "movupd    16(%0), %%xmm13"         "\n\t"
77       "movq      32(%0), %%rdx"           "\n\t"
78       "movq      40(%0), %%rax"           "\n\t"
79       "movupd    48(%0), %%xmm0"          "\n\t"
80       "movw      64(%0), %%cx"            "\n\t"
81       "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
82       "movupd    %%xmm0, 48(%0)"          "\n\t"
83       "movw      %%cx, 64(%0)"            "\n\t"
84       "pushfq"                            "\n\t"
85       "popq      %%r15"                   "\n\t"
86       "movq      %%r15, 72(%0)"           "\n\t"
87       : /*out*/
88       : /*in*/"r"(blockC)
89       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
90    );
91    printf("  istri $0x4A:  ");
92    printf("    xmm0 ");
93    show_V128( (V128*)(blockC+48) );
94    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
95 
96    /* ---------------- ISTRI_0A ---------------- */
97    memset(blockC, 0x55, 80);
98    memcpy(blockC + 0,  &argL,  16);
99    memcpy(blockC + 16, &argR,  16);
100    memcpy(blockC + 24, &rdxIN, 8);
101    memcpy(blockC + 32, &raxIN, 8);
102    memcpy(blockC + 40, &rdxIN, 8);
103    __asm__ __volatile__(
104       "movupd    0(%0), %%xmm2"           "\n\t"
105       "movupd    16(%0), %%xmm13"         "\n\t"
106       "movq      32(%0), %%rdx"           "\n\t"
107       "movq      40(%0), %%rax"           "\n\t"
108       "movupd    48(%0), %%xmm0"          "\n\t"
109       "movw      64(%0), %%cx"            "\n\t"
110       "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
111       "movupd    %%xmm0, 48(%0)"          "\n\t"
112       "movw      %%cx, 64(%0)"            "\n\t"
113       "pushfq"                            "\n\t"
114       "popq      %%r15"                   "\n\t"
115       "movq      %%r15, 72(%0)"           "\n\t"
116       : /*out*/
117       : /*in*/"r"(blockC)
118       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
119    );
120    printf("  istri $0x0A:  ");
121    printf("    xmm0 ");
122    show_V128( (V128*)(blockC+48) );
123    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
124 
125    /* ---------------- ISTRM_4A ---------------- */
126    memset(blockC, 0x55, 80);
127    memcpy(blockC + 0,  &argL,  16);
128    memcpy(blockC + 16, &argR,  16);
129    memcpy(blockC + 24, &rdxIN, 8);
130    memcpy(blockC + 32, &raxIN, 8);
131    memcpy(blockC + 40, &rdxIN, 8);
132    __asm__ __volatile__(
133       "movupd    0(%0), %%xmm2"           "\n\t"
134       "movupd    16(%0), %%xmm13"         "\n\t"
135       "movq      32(%0), %%rdx"           "\n\t"
136       "movq      40(%0), %%rax"           "\n\t"
137       "movupd    48(%0), %%xmm0"          "\n\t"
138       "movw      64(%0), %%cx"            "\n\t"
139       "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
140       "movupd    %%xmm0, 48(%0)"          "\n\t"
141       "movw      %%cx, 64(%0)"            "\n\t"
142       "pushfq"                            "\n\t"
143       "popq      %%r15"                   "\n\t"
144       "movq      %%r15, 72(%0)"           "\n\t"
145       : /*out*/
146       : /*in*/"r"(blockC)
147       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
148    );
149    printf("  istrm $0x4A:  ");
150    printf("    xmm0 ");
151    show_V128( (V128*)(blockC+48) );
152    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
153 
154    /* ---------------- ISTRM_0A ---------------- */
155    memset(blockC, 0x55, 80);
156    memcpy(blockC + 0,  &argL,  16);
157    memcpy(blockC + 16, &argR,  16);
158    memcpy(blockC + 24, &rdxIN, 8);
159    memcpy(blockC + 32, &raxIN, 8);
160    memcpy(blockC + 40, &rdxIN, 8);
161    __asm__ __volatile__(
162       "movupd    0(%0), %%xmm2"           "\n\t"
163       "movupd    16(%0), %%xmm13"         "\n\t"
164       "movq      32(%0), %%rdx"           "\n\t"
165       "movq      40(%0), %%rax"           "\n\t"
166       "movupd    48(%0), %%xmm0"          "\n\t"
167       "movw      64(%0), %%cx"            "\n\t"
168       "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
169       "movupd    %%xmm0, 48(%0)"          "\n\t"
170       "movw      %%cx, 64(%0)"            "\n\t"
171       "pushfq"                            "\n\t"
172       "popq      %%r15"                   "\n\t"
173       "movq      %%r15, 72(%0)"           "\n\t"
174       : /*out*/
175       : /*in*/"r"(blockC)
176       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
177    );
178    printf("  istrm $0x0A:  ");
179    printf("    xmm0 ");
180    show_V128( (V128*)(blockC+48) );
181    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
182 
183    /* ---------------- ESTRI_4A ---------------- */
184    memset(blockC, 0x55, 80);
185    memcpy(blockC + 0,  &argL,  16);
186    memcpy(blockC + 16, &argR,  16);
187    memcpy(blockC + 24, &rdxIN, 8);
188    memcpy(blockC + 32, &raxIN, 8);
189    memcpy(blockC + 40, &rdxIN, 8);
190    __asm__ __volatile__(
191       "movupd    0(%0), %%xmm2"           "\n\t"
192       "movupd    16(%0), %%xmm13"         "\n\t"
193       "movq      32(%0), %%rdx"           "\n\t"
194       "movq      40(%0), %%rax"           "\n\t"
195       "movupd    48(%0), %%xmm0"          "\n\t"
196       "movw      64(%0), %%cx"            "\n\t"
197       "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
198       "movupd    %%xmm0, 48(%0)"          "\n\t"
199       "movw      %%cx, 64(%0)"            "\n\t"
200       "pushfq"                            "\n\t"
201       "popq      %%r15"                   "\n\t"
202       "movq      %%r15, 72(%0)"           "\n\t"
203       : /*out*/
204       : /*in*/"r"(blockC)
205       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
206    );
207    printf("  estri $0x4A:  ");
208    printf("    xmm0 ");
209    show_V128( (V128*)(blockC+48) );
210    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
211 
212    /* ---------------- ESTRI_0A ---------------- */
213    memset(blockC, 0x55, 80);
214    memcpy(blockC + 0,  &argL,  16);
215    memcpy(blockC + 16, &argR,  16);
216    memcpy(blockC + 24, &rdxIN, 8);
217    memcpy(blockC + 32, &raxIN, 8);
218    memcpy(blockC + 40, &rdxIN, 8);
219    __asm__ __volatile__(
220       "movupd    0(%0), %%xmm2"           "\n\t"
221       "movupd    16(%0), %%xmm13"         "\n\t"
222       "movq      32(%0), %%rdx"           "\n\t"
223       "movq      40(%0), %%rax"           "\n\t"
224       "movupd    48(%0), %%xmm0"          "\n\t"
225       "movw      64(%0), %%cx"            "\n\t"
226       "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
227       "movupd    %%xmm0, 48(%0)"          "\n\t"
228       "movw      %%cx, 64(%0)"            "\n\t"
229       "pushfq"                            "\n\t"
230       "popq      %%r15"                   "\n\t"
231       "movq      %%r15, 72(%0)"           "\n\t"
232       : /*out*/
233       : /*in*/"r"(blockC)
234       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
235    );
236    printf("  estri $0x0A:  ");
237    printf("    xmm0 ");
238    show_V128( (V128*)(blockC+48) );
239    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
240 
241    /* ---------------- ESTRM_4A ---------------- */
242    memset(blockC, 0x55, 80);
243    memcpy(blockC + 0,  &argL,  16);
244    memcpy(blockC + 16, &argR,  16);
245    memcpy(blockC + 24, &rdxIN, 8);
246    memcpy(blockC + 32, &raxIN, 8);
247    memcpy(blockC + 40, &rdxIN, 8);
248    __asm__ __volatile__(
249       "movupd    0(%0), %%xmm2"           "\n\t"
250       "movupd    16(%0), %%xmm13"         "\n\t"
251       "movq      32(%0), %%rdx"           "\n\t"
252       "movq      40(%0), %%rax"           "\n\t"
253       "movupd    48(%0), %%xmm0"          "\n\t"
254       "movw      64(%0), %%cx"            "\n\t"
255       "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
256       "movupd    %%xmm0, 48(%0)"          "\n\t"
257       "movw      %%cx, 64(%0)"            "\n\t"
258       "pushfq"                            "\n\t"
259       "popq      %%r15"                   "\n\t"
260       "movq      %%r15, 72(%0)"           "\n\t"
261       : /*out*/
262       : /*in*/"r"(blockC)
263       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
264    );
265    printf("  estrm $0x4A:  ");
266    printf("    xmm0 ");
267    show_V128( (V128*)(blockC+48) );
268    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
269 
270    /* ---------------- ESTRM_0A ---------------- */
271    memset(blockC, 0x55, 80);
272    memcpy(blockC + 0,  &argL,  16);
273    memcpy(blockC + 16, &argR,  16);
274    memcpy(blockC + 24, &rdxIN, 8);
275    memcpy(blockC + 32, &raxIN, 8);
276    memcpy(blockC + 40, &rdxIN, 8);
277    __asm__ __volatile__(
278       "movupd    0(%0), %%xmm2"           "\n\t"
279       "movupd    16(%0), %%xmm13"         "\n\t"
280       "movq      32(%0), %%rdx"           "\n\t"
281       "movq      40(%0), %%rax"           "\n\t"
282       "movupd    48(%0), %%xmm0"          "\n\t"
283       "movw      64(%0), %%cx"            "\n\t"
284       "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
285       "movupd    %%xmm0, 48(%0)"          "\n\t"
286       "movw      %%cx, 64(%0)"            "\n\t"
287       "pushfq"                            "\n\t"
288       "popq      %%r15"                   "\n\t"
289       "movq      %%r15, 72(%0)"           "\n\t"
290       : /*out*/
291       : /*in*/"r"(blockC)
292       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
293    );
294    printf("  estrm $0x0A:  ");
295    printf("    xmm0 ");
296    show_V128( (V128*)(blockC+48) );
297    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
298 
299 
300 
301 
302 }
303 
main(void)304 int main ( void )
305 {
306    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307    one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
308 
309    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
312 
313    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
317 
318    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
322 
323    one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
324    one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325    one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326    one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
327 
328    one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
329    one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330    one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331    one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
332 
333    return 0;
334 }
335