• Home
  • History
  • Annotate
  • Raw
  • Download

Lines Matching +full:- +full:t

7 //     http://www.apache.org/licenses/LICENSE-2.0
52 // A 4x4 block of accumulators is stored in 32bit in xmm4--xmm7. in Run()
54 // +-------+-------+-------+-------+ in Run()
56 // Rhs +-------+---------------+-------+ in Run()
58 // +-------+-------+-------+-------+ in Run()
64 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
69 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
76 "pxor %%xmm4 , %%xmm4 \n\t" in Run()
77 "pxor %%xmm5 , %%xmm5 \n\t" in Run()
78 "pxor %%xmm6 , %%xmm6 \n\t" in Run()
79 "pxor %%xmm7 , %%xmm7 \n\t" in Run()
81 "movl %[run_depth_cells], %%eax\n\t" in Run()
82 "subl $2, %%eax\n\t" in Run()
83 "js outerLoop1%=\n\t" in Run()
86 "outerLoop2%=:\n\t" in Run()
90 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
93 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
94 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
95 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
96 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
97 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
98 "paddd %%xmm2, %%xmm4 \n\t" in Run()
99 "paddd %%xmm3, %%xmm5 \n\t" in Run()
101 "prefetcht0 0x80(%[lhs_ptr]) \n\t" in Run()
103 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
104 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
105 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
106 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
108 "prefetcht0 0x80(%[rhs_ptr]) \n\t" in Run()
112 "pmovzxbw 0x08(%[rhs_ptr]), %%xmm1\n\t" in Run()
114 "paddd %%xmm2, %%xmm6 \n\t" in Run()
115 "paddd %%xmm3, %%xmm7 \n\t" in Run()
118 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
119 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
120 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
121 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
122 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
123 "paddd %%xmm2, %%xmm4 \n\t" in Run()
124 "paddd %%xmm3, %%xmm5 \n\t" in Run()
125 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
126 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
128 "addl $0x10, %[lhs_ptr] \n\t" in Run()
129 "addl $0x10, %[rhs_ptr] \n\t" in Run()
131 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
132 "paddd %%xmm3, %%xmm7 \n\t" in Run()
133 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
134 "paddd %%xmm2, %%xmm6 \n\t" in Run()
136 "subl $2, %[run_depth_cells]\n\t" in Run()
137 "ja outerLoop2%=\n\t" in Run()
139 "movl %[run_depth_cells], %%eax\n\t" in Run()
140 "decl %%eax\n\t" in Run()
141 "js finish%=\n\t" in Run()
144 "outerLoop1%=:\n\t" in Run()
147 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
150 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
151 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
152 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
153 "paddd %%xmm2, %%xmm4 \n\t" in Run()
154 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
155 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
156 "paddd %%xmm3, %%xmm5 \n\t" in Run()
158 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
159 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
160 "paddd %%xmm2, %%xmm6 \n\t" in Run()
161 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
162 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
163 "paddd %%xmm3, %%xmm7 \n\t" in Run()
165 "addl $0x08, %[lhs_ptr]\n\t" in Run()
166 "addl $0x08, %[rhs_ptr]\n\t" in Run()
168 "decl %[run_depth_cells]\n\t" in Run()
169 "jnz outerLoop1%=\n\t" in Run()
171 "finish%=:\n\t" in Run()
173 "movl %[dst_col_stride], %%eax\n\t" in Run()
174 "shll $2, %%eax\n\t" in Run()
176 "movl %[start_depth], %%ecx\n\t" in Run()
177 "test %%ecx, %%ecx\n\t" in Run()
178 "jz storeDst%=\n\t" in Run()
180 "leal (%%eax,%%eax,0x2), %%ecx\n\t" in Run()
181 "paddd 0x00(%[dst_ptr]) , %%xmm4 \n\t" in Run()
182 "paddd 0x00(%[dst_ptr], %%eax, 1) , %%xmm5 \n\t" in Run()
183 "paddd 0x00(%[dst_ptr], %%eax, 2) , %%xmm6 \n\t" in Run()
184 "paddd 0x00(%[dst_ptr], %%ecx, 1) , %%xmm7 \n\t" in Run()
186 "storeDst%=:\n\t" in Run()
188 "leal (%%eax,%%eax,0x2), %%ecx\n\t" in Run()
189 "movdqu %%xmm4 , 0x00(%[dst_ptr]) \n\t" in Run()
190 "movdqu %%xmm5 , 0x00(%[dst_ptr], %%eax, 1)\n\t" in Run()
191 "movdqu %%xmm6 , 0x00(%[dst_ptr], %%eax, 2)\n\t" in Run()
192 "movdqu %%xmm7 , 0x00(%[dst_ptr], %%ecx, 1)\n\t" in Run()
230 // A 12x4 block of accumulators is stored in 32bit in xmm4--xmm15. in Run()
232 // +-------+-------+-------+-------+ in Run()
234 // Rhs +-------+---------------+-------+ in Run()
236 // +-------+-------+-------+-------+ in Run()
242 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
247 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
252 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
257 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
264 "movq %[dst_col_stride_q], %%r12\n\t" in Run()
265 "shlq $2, %%r12\n\t" in Run()
266 "leaq (%%r12,%%r12,0x2), %%r13\n\t" in Run()
269 "pxor %%xmm4 , %%xmm4 \n\t" in Run()
270 "pxor %%xmm5 , %%xmm5 \n\t" in Run()
271 "pxor %%xmm6 , %%xmm6 \n\t" in Run()
272 "pxor %%xmm7 , %%xmm7 \n\t" in Run()
273 "pxor %%xmm8 , %%xmm8 \n\t" in Run()
274 "pxor %%xmm9 , %%xmm9 \n\t" in Run()
275 "pxor %%xmm10 , %%xmm10\n\t" in Run()
276 "pxor %%xmm11 , %%xmm11\n\t" in Run()
277 "pxor %%xmm12 , %%xmm12\n\t" in Run()
278 "pxor %%xmm13 , %%xmm13\n\t" in Run()
279 "pxor %%xmm14 , %%xmm14\n\t" in Run()
280 "pxor %%xmm15 , %%xmm15\n\t" in Run()
282 "movq %[run_depth_cells], %%r14\n\t" in Run()
283 "subq $2, %%r14\n\t" in Run()
284 "js outerLoop1%=\n\t" in Run()
287 "outerLoop2%=:\n\t" in Run()
292 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
295 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
296 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
297 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
298 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
299 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
300 "paddd %%xmm2, %%xmm4 \n\t" in Run()
301 "paddd %%xmm3, %%xmm5 \n\t" in Run()
303 "prefetcht0 0x80(%[lhs_ptr]) \n\t" in Run()
305 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
306 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
307 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
308 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
311 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
313 "paddd %%xmm2, %%xmm6 \n\t" in Run()
314 "paddd %%xmm3, %%xmm7 \n\t" in Run()
316 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
317 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
318 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
319 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
320 "paddd %%xmm2, %%xmm8 \n\t" in Run()
321 "paddd %%xmm3, %%xmm9 \n\t" in Run()
323 "prefetcht0 0x80(%[rhs_ptr]) \n\t" in Run()
325 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
326 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
327 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
328 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
329 "paddd %%xmm2, %%xmm10 \n\t" in Run()
330 "paddd %%xmm3, %%xmm11 \n\t" in Run()
333 "pmovzxbw 0x10(%[lhs_ptr]), %%xmm0\n\t" in Run()
334 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
335 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
336 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
337 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
338 "paddd %%xmm2, %%xmm12 \n\t" in Run()
339 "paddd %%xmm3, %%xmm13 \n\t" in Run()
341 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
342 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
343 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
344 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
345 "paddd %%xmm2, %%xmm14 \n\t" in Run()
346 "paddd %%xmm3, %%xmm15 \n\t" in Run()
350 "pmovzxbw 0x08(%[rhs_ptr]), %%xmm1\n\t" in Run()
353 "pmovzxbw 0x18(%[lhs_ptr]), %%xmm0\n\t" in Run()
354 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
355 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
356 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
357 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
358 "paddd %%xmm2, %%xmm4 \n\t" in Run()
359 "paddd %%xmm3, %%xmm5 \n\t" in Run()
361 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
362 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
363 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
364 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
365 "paddd %%xmm2, %%xmm6 \n\t" in Run()
366 "paddd %%xmm3, %%xmm7 \n\t" in Run()
369 "pmovzxbw 0x20(%[lhs_ptr]), %%xmm0\n\t" in Run()
370 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
371 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
372 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
373 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
374 "paddd %%xmm2, %%xmm8 \n\t" in Run()
375 "paddd %%xmm3, %%xmm9 \n\t" in Run()
377 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
378 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
379 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
380 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
381 "paddd %%xmm2, %%xmm10 \n\t" in Run()
382 "paddd %%xmm3, %%xmm11 \n\t" in Run()
385 "pmovzxbw 0x28(%[lhs_ptr]), %%xmm0\n\t" in Run()
387 "addq $0x30, %[lhs_ptr] \n\t" in Run()
388 "addq $0x10, %[rhs_ptr] \n\t" in Run()
390 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
391 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
392 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
393 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
394 "paddd %%xmm2, %%xmm12 \n\t" in Run()
395 "paddd %%xmm3, %%xmm13 \n\t" in Run()
397 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
398 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
399 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
400 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
401 "paddd %%xmm2, %%xmm14 \n\t" in Run()
402 "paddd %%xmm3, %%xmm15 \n\t" in Run()
404 "subq $2, %[run_depth_cells]\n\t" in Run()
405 "ja outerLoop2%=\n\t" in Run()
407 "movq %[run_depth_cells], %%r14\n\t" in Run()
408 "decq %%r14\n\t" in Run()
409 "js finish%=\n\t" in Run()
412 "outerLoop1%=:\n\t" in Run()
415 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
418 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
419 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
420 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
421 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
422 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
423 "paddd %%xmm2, %%xmm4 \n\t" in Run()
424 "paddd %%xmm3, %%xmm5 \n\t" in Run()
425 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
426 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
427 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
428 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
429 "paddd %%xmm2, %%xmm6 \n\t" in Run()
430 "paddd %%xmm3, %%xmm7 \n\t" in Run()
433 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
434 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
435 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
436 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
437 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
438 "paddd %%xmm2, %%xmm8 \n\t" in Run()
439 "paddd %%xmm3, %%xmm9 \n\t" in Run()
440 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
441 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
442 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
443 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
444 "paddd %%xmm2, %%xmm10 \n\t" in Run()
445 "paddd %%xmm3, %%xmm11 \n\t" in Run()
448 "pmovzxbw 0x10(%[lhs_ptr]), %%xmm0\n\t" in Run()
450 "addq $0x18, %[lhs_ptr] \n\t" in Run()
451 "addq $0x08, %[rhs_ptr] \n\t" in Run()
453 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
454 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
455 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
456 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
457 "paddd %%xmm2, %%xmm12 \n\t" in Run()
458 "paddd %%xmm3, %%xmm13 \n\t" in Run()
459 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
460 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
461 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
462 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
463 "paddd %%xmm2, %%xmm14 \n\t" in Run()
464 "paddd %%xmm3, %%xmm15 \n\t" in Run()
466 "decq %[run_depth_cells]\n\t" in Run()
467 "jnz outerLoop1%=\n\t" in Run()
469 "finish%=:\n\t" in Run()
471 "test %[start_depth], %[start_depth]\n\t" in Run()
472 "jz storeDst%=\n\t" in Run()
474 "paddd 0x00(%[dst_ptr]) , %%xmm4 \n\t" in Run()
475 "paddd 0x10(%[dst_ptr]) , %%xmm8 \n\t" in Run()
476 "paddd 0x20(%[dst_ptr]) , %%xmm12\n\t" in Run()
477 "paddd 0x00(%[dst_ptr], %%r12, 1) , %%xmm5 \n\t" in Run()
478 "paddd 0x10(%[dst_ptr], %%r12, 1) , %%xmm9 \n\t" in Run()
479 "paddd 0x20(%[dst_ptr], %%r12, 1) , %%xmm13\n\t" in Run()
480 "paddd 0x00(%[dst_ptr], %%r12, 2) , %%xmm6 \n\t" in Run()
481 "paddd 0x10(%[dst_ptr], %%r12, 2) , %%xmm10\n\t" in Run()
482 "paddd 0x20(%[dst_ptr], %%r12, 2) , %%xmm14\n\t" in Run()
483 "paddd 0x00(%[dst_ptr], %%r13, 1) , %%xmm7 \n\t" in Run()
484 "paddd 0x10(%[dst_ptr], %%r13, 1) , %%xmm11\n\t" in Run()
485 "paddd 0x20(%[dst_ptr], %%r13, 1) , %%xmm15\n\t" in Run()
487 "storeDst%=:\n\t" in Run()
489 "movdqu %%xmm4 , 0x00(%[dst_ptr]) \n\t" in Run()
490 "movdqu %%xmm8 , 0x10(%[dst_ptr]) \n\t" in Run()
491 "movdqu %%xmm12 , 0x20(%[dst_ptr]) \n\t" in Run()
492 "movdqu %%xmm5 , 0x00(%[dst_ptr], %%r12, 1)\n\t" in Run()
493 "movdqu %%xmm9 , 0x10(%[dst_ptr], %%r12, 1)\n\t" in Run()
494 "movdqu %%xmm13 , 0x20(%[dst_ptr], %%r12, 1)\n\t" in Run()
495 "movdqu %%xmm6 , 0x00(%[dst_ptr], %%r12, 2)\n\t" in Run()
496 "movdqu %%xmm10 , 0x10(%[dst_ptr], %%r12, 2)\n\t" in Run()
497 "movdqu %%xmm14 , 0x20(%[dst_ptr], %%r12, 2)\n\t" in Run()
498 "movdqu %%xmm7 , 0x00(%[dst_ptr], %%r13, 1)\n\t" in Run()
499 "movdqu %%xmm11 , 0x10(%[dst_ptr], %%r13, 1)\n\t" in Run()
500 "movdqu %%xmm15 , 0x20(%[dst_ptr], %%r13, 1)\n\t" in Run()