Lines Matching full:n
123 "pcmpeqb %%xmm5,%%xmm5 \n" in I400ToARGBRow_SSE2()
124 "pslld $0x18,%%xmm5 \n" in I400ToARGBRow_SSE2()
125 ".p2align 4 \n" in I400ToARGBRow_SSE2()
126 "1: \n" in I400ToARGBRow_SSE2()
127 "movq (%0),%%xmm0 \n" in I400ToARGBRow_SSE2()
128 "lea 0x8(%0),%0 \n" in I400ToARGBRow_SSE2()
129 "punpcklbw %%xmm0,%%xmm0 \n" in I400ToARGBRow_SSE2()
130 "movdqa %%xmm0,%%xmm1 \n" in I400ToARGBRow_SSE2()
131 "punpcklwd %%xmm0,%%xmm0 \n" in I400ToARGBRow_SSE2()
132 "punpckhwd %%xmm1,%%xmm1 \n" in I400ToARGBRow_SSE2()
133 "por %%xmm5,%%xmm0 \n" in I400ToARGBRow_SSE2()
134 "por %%xmm5,%%xmm1 \n" in I400ToARGBRow_SSE2()
135 "movdqa %%xmm0,(%1) \n" in I400ToARGBRow_SSE2()
136 "movdqa %%xmm1,0x10(%1) \n" in I400ToARGBRow_SSE2()
137 "lea 0x20(%1),%1 \n" in I400ToARGBRow_SSE2()
138 "sub $0x8,%2 \n" in I400ToARGBRow_SSE2()
139 "jg 1b \n" in I400ToARGBRow_SSE2()
153 "movdqa %3,%%xmm5 \n" in ABGRToARGBRow_SSSE3()
154 "sub %0,%1 \n" in ABGRToARGBRow_SSSE3()
155 ".p2align 4 \n" in ABGRToARGBRow_SSSE3()
156 "1: \n" in ABGRToARGBRow_SSSE3()
157 "movdqa (%0),%%xmm0 \n" in ABGRToARGBRow_SSSE3()
158 "pshufb %%xmm5,%%xmm0 \n" in ABGRToARGBRow_SSSE3()
159 "sub $0x4,%2 \n" in ABGRToARGBRow_SSSE3()
160 "movdqa %%xmm0,(%0,%1,1) \n" in ABGRToARGBRow_SSSE3()
161 "lea 0x10(%0),%0 \n" in ABGRToARGBRow_SSSE3()
162 "jg 1b \n" in ABGRToARGBRow_SSSE3()
177 "movdqa %3,%%xmm5 \n" in BGRAToARGBRow_SSSE3()
178 "sub %0,%1 \n" in BGRAToARGBRow_SSSE3()
179 ".p2align 4 \n" in BGRAToARGBRow_SSSE3()
180 "1: \n" in BGRAToARGBRow_SSSE3()
181 "movdqa (%0),%%xmm0 \n" in BGRAToARGBRow_SSSE3()
182 "pshufb %%xmm5,%%xmm0 \n" in BGRAToARGBRow_SSSE3()
183 "sub $0x4,%2 \n" in BGRAToARGBRow_SSSE3()
184 "movdqa %%xmm0,(%0,%1,1) \n" in BGRAToARGBRow_SSSE3()
185 "lea 0x10(%0),%0 \n" in BGRAToARGBRow_SSSE3()
186 "jg 1b \n" in BGRAToARGBRow_SSSE3()
200 "movdqa %3,%%xmm5 \n" in RGBAToARGBRow_SSSE3()
201 "sub %0,%1 \n" in RGBAToARGBRow_SSSE3()
202 ".p2align 4 \n" in RGBAToARGBRow_SSSE3()
203 "1: \n" in RGBAToARGBRow_SSSE3()
204 "movdqa (%0),%%xmm0 \n" in RGBAToARGBRow_SSSE3()
205 "pshufb %%xmm5,%%xmm0 \n" in RGBAToARGBRow_SSSE3()
206 "sub $0x4,%2 \n" in RGBAToARGBRow_SSSE3()
207 "movdqa %%xmm0,(%0,%1,1) \n" in RGBAToARGBRow_SSSE3()
208 "lea 0x10(%0),%0 \n" in RGBAToARGBRow_SSSE3()
209 "jg 1b \n" in RGBAToARGBRow_SSSE3()
224 "movdqa %3,%%xmm5 \n" in ARGBToRGBARow_SSSE3()
225 "sub %0,%1 \n" in ARGBToRGBARow_SSSE3()
226 ".p2align 4 \n" in ARGBToRGBARow_SSSE3()
227 "1: \n" in ARGBToRGBARow_SSSE3()
228 "movdqa (%0),%%xmm0 \n" in ARGBToRGBARow_SSSE3()
229 "pshufb %%xmm5,%%xmm0 \n" in ARGBToRGBARow_SSSE3()
230 "sub $0x4,%2 \n" in ARGBToRGBARow_SSSE3()
231 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBToRGBARow_SSSE3()
232 "lea 0x10(%0),%0 \n" in ARGBToRGBARow_SSSE3()
233 "jg 1b \n" in ARGBToRGBARow_SSSE3()
248 "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 in RGB24ToARGBRow_SSSE3()
249 "pslld $0x18,%%xmm5 \n" in RGB24ToARGBRow_SSSE3()
250 "movdqa %3,%%xmm4 \n" in RGB24ToARGBRow_SSSE3()
251 ".p2align 4 \n" in RGB24ToARGBRow_SSSE3()
252 "1: \n" in RGB24ToARGBRow_SSSE3()
253 "movdqu (%0),%%xmm0 \n" in RGB24ToARGBRow_SSSE3()
254 "movdqu 0x10(%0),%%xmm1 \n" in RGB24ToARGBRow_SSSE3()
255 "movdqu 0x20(%0),%%xmm3 \n" in RGB24ToARGBRow_SSSE3()
256 "lea 0x30(%0),%0 \n" in RGB24ToARGBRow_SSSE3()
257 "movdqa %%xmm3,%%xmm2 \n" in RGB24ToARGBRow_SSSE3()
258 "palignr $0x8,%%xmm1,%%xmm2 \n" in RGB24ToARGBRow_SSSE3()
259 "pshufb %%xmm4,%%xmm2 \n" in RGB24ToARGBRow_SSSE3()
260 "por %%xmm5,%%xmm2 \n" in RGB24ToARGBRow_SSSE3()
261 "palignr $0xc,%%xmm0,%%xmm1 \n" in RGB24ToARGBRow_SSSE3()
262 "pshufb %%xmm4,%%xmm0 \n" in RGB24ToARGBRow_SSSE3()
263 "movdqa %%xmm2,0x20(%1) \n" in RGB24ToARGBRow_SSSE3()
264 "por %%xmm5,%%xmm0 \n" in RGB24ToARGBRow_SSSE3()
265 "pshufb %%xmm4,%%xmm1 \n" in RGB24ToARGBRow_SSSE3()
266 "movdqa %%xmm0,(%1) \n" in RGB24ToARGBRow_SSSE3()
267 "por %%xmm5,%%xmm1 \n" in RGB24ToARGBRow_SSSE3()
268 "palignr $0x4,%%xmm3,%%xmm3 \n" in RGB24ToARGBRow_SSSE3()
269 "pshufb %%xmm4,%%xmm3 \n" in RGB24ToARGBRow_SSSE3()
270 "movdqa %%xmm1,0x10(%1) \n" in RGB24ToARGBRow_SSSE3()
271 "por %%xmm5,%%xmm3 \n" in RGB24ToARGBRow_SSSE3()
272 "sub $0x10,%2 \n" in RGB24ToARGBRow_SSSE3()
273 "movdqa %%xmm3,0x30(%1) \n" in RGB24ToARGBRow_SSSE3()
274 "lea 0x40(%1),%1 \n" in RGB24ToARGBRow_SSSE3()
275 "jg 1b \n" in RGB24ToARGBRow_SSSE3()
289 "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 in RAWToARGBRow_SSSE3()
290 "pslld $0x18,%%xmm5 \n" in RAWToARGBRow_SSSE3()
291 "movdqa %3,%%xmm4 \n" in RAWToARGBRow_SSSE3()
292 ".p2align 4 \n" in RAWToARGBRow_SSSE3()
293 "1: \n" in RAWToARGBRow_SSSE3()
294 "movdqu (%0),%%xmm0 \n" in RAWToARGBRow_SSSE3()
295 "movdqu 0x10(%0),%%xmm1 \n" in RAWToARGBRow_SSSE3()
296 "movdqu 0x20(%0),%%xmm3 \n" in RAWToARGBRow_SSSE3()
297 "lea 0x30(%0),%0 \n" in RAWToARGBRow_SSSE3()
298 "movdqa %%xmm3,%%xmm2 \n" in RAWToARGBRow_SSSE3()
299 "palignr $0x8,%%xmm1,%%xmm2 \n" in RAWToARGBRow_SSSE3()
300 "pshufb %%xmm4,%%xmm2 \n" in RAWToARGBRow_SSSE3()
301 "por %%xmm5,%%xmm2 \n" in RAWToARGBRow_SSSE3()
302 "palignr $0xc,%%xmm0,%%xmm1 \n" in RAWToARGBRow_SSSE3()
303 "pshufb %%xmm4,%%xmm0 \n" in RAWToARGBRow_SSSE3()
304 "movdqa %%xmm2,0x20(%1) \n" in RAWToARGBRow_SSSE3()
305 "por %%xmm5,%%xmm0 \n" in RAWToARGBRow_SSSE3()
306 "pshufb %%xmm4,%%xmm1 \n" in RAWToARGBRow_SSSE3()
307 "movdqa %%xmm0,(%1) \n" in RAWToARGBRow_SSSE3()
308 "por %%xmm5,%%xmm1 \n" in RAWToARGBRow_SSSE3()
309 "palignr $0x4,%%xmm3,%%xmm3 \n" in RAWToARGBRow_SSSE3()
310 "pshufb %%xmm4,%%xmm3 \n" in RAWToARGBRow_SSSE3()
311 "movdqa %%xmm1,0x10(%1) \n" in RAWToARGBRow_SSSE3()
312 "por %%xmm5,%%xmm3 \n" in RAWToARGBRow_SSSE3()
313 "sub $0x10,%2 \n" in RAWToARGBRow_SSSE3()
314 "movdqa %%xmm3,0x30(%1) \n" in RAWToARGBRow_SSSE3()
315 "lea 0x40(%1),%1 \n" in RAWToARGBRow_SSSE3()
316 "jg 1b \n" in RAWToARGBRow_SSSE3()
330 "mov $0x1080108,%%eax \n" in RGB565ToARGBRow_SSE2()
331 "movd %%eax,%%xmm5 \n" in RGB565ToARGBRow_SSE2()
332 "pshufd $0x0,%%xmm5,%%xmm5 \n" in RGB565ToARGBRow_SSE2()
333 "mov $0x20802080,%%eax \n" in RGB565ToARGBRow_SSE2()
334 "movd %%eax,%%xmm6 \n" in RGB565ToARGBRow_SSE2()
335 "pshufd $0x0,%%xmm6,%%xmm6 \n" in RGB565ToARGBRow_SSE2()
336 "pcmpeqb %%xmm3,%%xmm3 \n" in RGB565ToARGBRow_SSE2()
337 "psllw $0xb,%%xmm3 \n" in RGB565ToARGBRow_SSE2()
338 "pcmpeqb %%xmm4,%%xmm4 \n" in RGB565ToARGBRow_SSE2()
339 "psllw $0xa,%%xmm4 \n" in RGB565ToARGBRow_SSE2()
340 "psrlw $0x5,%%xmm4 \n" in RGB565ToARGBRow_SSE2()
341 "pcmpeqb %%xmm7,%%xmm7 \n" in RGB565ToARGBRow_SSE2()
342 "psllw $0x8,%%xmm7 \n" in RGB565ToARGBRow_SSE2()
343 "sub %0,%1 \n" in RGB565ToARGBRow_SSE2()
344 "sub %0,%1 \n" in RGB565ToARGBRow_SSE2()
345 ".p2align 4 \n" in RGB565ToARGBRow_SSE2()
346 "1: \n" in RGB565ToARGBRow_SSE2()
347 "movdqu (%0),%%xmm0 \n" in RGB565ToARGBRow_SSE2()
348 "movdqa %%xmm0,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
349 "movdqa %%xmm0,%%xmm2 \n" in RGB565ToARGBRow_SSE2()
350 "pand %%xmm3,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
351 "psllw $0xb,%%xmm2 \n" in RGB565ToARGBRow_SSE2()
352 "pmulhuw %%xmm5,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
353 "pmulhuw %%xmm5,%%xmm2 \n" in RGB565ToARGBRow_SSE2()
354 "psllw $0x8,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
355 "por %%xmm2,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
356 "pand %%xmm4,%%xmm0 \n" in RGB565ToARGBRow_SSE2()
357 "pmulhuw %%xmm6,%%xmm0 \n" in RGB565ToARGBRow_SSE2()
358 "por %%xmm7,%%xmm0 \n" in RGB565ToARGBRow_SSE2()
359 "movdqa %%xmm1,%%xmm2 \n" in RGB565ToARGBRow_SSE2()
360 "punpcklbw %%xmm0,%%xmm1 \n" in RGB565ToARGBRow_SSE2()
361 "punpckhbw %%xmm0,%%xmm2 \n" in RGB565ToARGBRow_SSE2()
362 "movdqa %%xmm1,(%1,%0,2) \n" in RGB565ToARGBRow_SSE2()
363 "movdqa %%xmm2,0x10(%1,%0,2) \n" in RGB565ToARGBRow_SSE2()
364 "lea 0x10(%0),%0 \n" in RGB565ToARGBRow_SSE2()
365 "sub $0x8,%2 \n" in RGB565ToARGBRow_SSE2()
366 "jg 1b \n" in RGB565ToARGBRow_SSE2()
380 "mov $0x1080108,%%eax \n" in ARGB1555ToARGBRow_SSE2()
381 "movd %%eax,%%xmm5 \n" in ARGB1555ToARGBRow_SSE2()
382 "pshufd $0x0,%%xmm5,%%xmm5 \n" in ARGB1555ToARGBRow_SSE2()
383 "mov $0x42004200,%%eax \n" in ARGB1555ToARGBRow_SSE2()
384 "movd %%eax,%%xmm6 \n" in ARGB1555ToARGBRow_SSE2()
385 "pshufd $0x0,%%xmm6,%%xmm6 \n" in ARGB1555ToARGBRow_SSE2()
386 "pcmpeqb %%xmm3,%%xmm3 \n" in ARGB1555ToARGBRow_SSE2()
387 "psllw $0xb,%%xmm3 \n" in ARGB1555ToARGBRow_SSE2()
388 "movdqa %%xmm3,%%xmm4 \n" in ARGB1555ToARGBRow_SSE2()
389 "psrlw $0x6,%%xmm4 \n" in ARGB1555ToARGBRow_SSE2()
390 "pcmpeqb %%xmm7,%%xmm7 \n" in ARGB1555ToARGBRow_SSE2()
391 "psllw $0x8,%%xmm7 \n" in ARGB1555ToARGBRow_SSE2()
392 "sub %0,%1 \n" in ARGB1555ToARGBRow_SSE2()
393 "sub %0,%1 \n" in ARGB1555ToARGBRow_SSE2()
394 ".p2align 4 \n" in ARGB1555ToARGBRow_SSE2()
395 "1: \n" in ARGB1555ToARGBRow_SSE2()
396 "movdqu (%0),%%xmm0 \n" in ARGB1555ToARGBRow_SSE2()
397 "movdqa %%xmm0,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
398 "movdqa %%xmm0,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
399 "psllw $0x1,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
400 "psllw $0xb,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
401 "pand %%xmm3,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
402 "pmulhuw %%xmm5,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
403 "pmulhuw %%xmm5,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
404 "psllw $0x8,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
405 "por %%xmm2,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
406 "movdqa %%xmm0,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
407 "pand %%xmm4,%%xmm0 \n" in ARGB1555ToARGBRow_SSE2()
408 "psraw $0x8,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
409 "pmulhuw %%xmm6,%%xmm0 \n" in ARGB1555ToARGBRow_SSE2()
410 "pand %%xmm7,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
411 "por %%xmm2,%%xmm0 \n" in ARGB1555ToARGBRow_SSE2()
412 "movdqa %%xmm1,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
413 "punpcklbw %%xmm0,%%xmm1 \n" in ARGB1555ToARGBRow_SSE2()
414 "punpckhbw %%xmm0,%%xmm2 \n" in ARGB1555ToARGBRow_SSE2()
415 "movdqa %%xmm1,(%1,%0,2) \n" in ARGB1555ToARGBRow_SSE2()
416 "movdqa %%xmm2,0x10(%1,%0,2) \n" in ARGB1555ToARGBRow_SSE2()
417 "lea 0x10(%0),%0 \n" in ARGB1555ToARGBRow_SSE2()
418 "sub $0x8,%2 \n" in ARGB1555ToARGBRow_SSE2()
419 "jg 1b \n" in ARGB1555ToARGBRow_SSE2()
433 "mov $0xf0f0f0f,%%eax \n" in ARGB4444ToARGBRow_SSE2()
434 "movd %%eax,%%xmm4 \n" in ARGB4444ToARGBRow_SSE2()
435 "pshufd $0x0,%%xmm4,%%xmm4 \n" in ARGB4444ToARGBRow_SSE2()
436 "movdqa %%xmm4,%%xmm5 \n" in ARGB4444ToARGBRow_SSE2()
437 "pslld $0x4,%%xmm5 \n" in ARGB4444ToARGBRow_SSE2()
438 "sub %0,%1 \n" in ARGB4444ToARGBRow_SSE2()
439 "sub %0,%1 \n" in ARGB4444ToARGBRow_SSE2()
440 ".p2align 4 \n" in ARGB4444ToARGBRow_SSE2()
441 "1: \n" in ARGB4444ToARGBRow_SSE2()
442 "movdqu (%0),%%xmm0 \n" in ARGB4444ToARGBRow_SSE2()
443 "movdqa %%xmm0,%%xmm2 \n" in ARGB4444ToARGBRow_SSE2()
444 "pand %%xmm4,%%xmm0 \n" in ARGB4444ToARGBRow_SSE2()
445 "pand %%xmm5,%%xmm2 \n" in ARGB4444ToARGBRow_SSE2()
446 "movdqa %%xmm0,%%xmm1 \n" in ARGB4444ToARGBRow_SSE2()
447 "movdqa %%xmm2,%%xmm3 \n" in ARGB4444ToARGBRow_SSE2()
448 "psllw $0x4,%%xmm1 \n" in ARGB4444ToARGBRow_SSE2()
449 "psrlw $0x4,%%xmm3 \n" in ARGB4444ToARGBRow_SSE2()
450 "por %%xmm1,%%xmm0 \n" in ARGB4444ToARGBRow_SSE2()
451 "por %%xmm3,%%xmm2 \n" in ARGB4444ToARGBRow_SSE2()
452 "movdqa %%xmm0,%%xmm1 \n" in ARGB4444ToARGBRow_SSE2()
453 "punpcklbw %%xmm2,%%xmm0 \n" in ARGB4444ToARGBRow_SSE2()
454 "punpckhbw %%xmm2,%%xmm1 \n" in ARGB4444ToARGBRow_SSE2()
455 "movdqa %%xmm0,(%1,%0,2) \n" in ARGB4444ToARGBRow_SSE2()
456 "movdqa %%xmm1,0x10(%1,%0,2) \n" in ARGB4444ToARGBRow_SSE2()
457 "lea 0x10(%0),%0 \n" in ARGB4444ToARGBRow_SSE2()
458 "sub $0x8,%2 \n" in ARGB4444ToARGBRow_SSE2()
459 "jg 1b \n" in ARGB4444ToARGBRow_SSE2()
473 "movdqa %3,%%xmm6 \n" in ARGBToRGB24Row_SSSE3()
474 ".p2align 4 \n" in ARGBToRGB24Row_SSSE3()
475 "1: \n" in ARGBToRGB24Row_SSSE3()
476 "movdqa (%0),%%xmm0 \n" in ARGBToRGB24Row_SSSE3()
477 "movdqa 0x10(%0),%%xmm1 \n" in ARGBToRGB24Row_SSSE3()
478 "movdqa 0x20(%0),%%xmm2 \n" in ARGBToRGB24Row_SSSE3()
479 "movdqa 0x30(%0),%%xmm3 \n" in ARGBToRGB24Row_SSSE3()
480 "lea 0x40(%0),%0 \n" in ARGBToRGB24Row_SSSE3()
481 "pshufb %%xmm6,%%xmm0 \n" in ARGBToRGB24Row_SSSE3()
482 "pshufb %%xmm6,%%xmm1 \n" in ARGBToRGB24Row_SSSE3()
483 "pshufb %%xmm6,%%xmm2 \n" in ARGBToRGB24Row_SSSE3()
484 "pshufb %%xmm6,%%xmm3 \n" in ARGBToRGB24Row_SSSE3()
485 "movdqa %%xmm1,%%xmm4 \n" in ARGBToRGB24Row_SSSE3()
486 "psrldq $0x4,%%xmm1 \n" in ARGBToRGB24Row_SSSE3()
487 "pslldq $0xc,%%xmm4 \n" in ARGBToRGB24Row_SSSE3()
488 "movdqa %%xmm2,%%xmm5 \n" in ARGBToRGB24Row_SSSE3()
489 "por %%xmm4,%%xmm0 \n" in ARGBToRGB24Row_SSSE3()
490 "pslldq $0x8,%%xmm5 \n" in ARGBToRGB24Row_SSSE3()
491 "movdqa %%xmm0,(%1) \n" in ARGBToRGB24Row_SSSE3()
492 "por %%xmm5,%%xmm1 \n" in ARGBToRGB24Row_SSSE3()
493 "psrldq $0x8,%%xmm2 \n" in ARGBToRGB24Row_SSSE3()
494 "pslldq $0x4,%%xmm3 \n" in ARGBToRGB24Row_SSSE3()
495 "por %%xmm3,%%xmm2 \n" in ARGBToRGB24Row_SSSE3()
496 "movdqa %%xmm1,0x10(%1) \n" in ARGBToRGB24Row_SSSE3()
497 "movdqa %%xmm2,0x20(%1) \n" in ARGBToRGB24Row_SSSE3()
498 "lea 0x30(%1),%1 \n" in ARGBToRGB24Row_SSSE3()
499 "sub $0x10,%2 \n" in ARGBToRGB24Row_SSSE3()
500 "jg 1b \n" in ARGBToRGB24Row_SSSE3()
514 "movdqa %3,%%xmm6 \n" in ARGBToRAWRow_SSSE3()
515 ".p2align 4 \n" in ARGBToRAWRow_SSSE3()
516 "1: \n" in ARGBToRAWRow_SSSE3()
517 "movdqa (%0),%%xmm0 \n" in ARGBToRAWRow_SSSE3()
518 "movdqa 0x10(%0),%%xmm1 \n" in ARGBToRAWRow_SSSE3()
519 "movdqa 0x20(%0),%%xmm2 \n" in ARGBToRAWRow_SSSE3()
520 "movdqa 0x30(%0),%%xmm3 \n" in ARGBToRAWRow_SSSE3()
521 "lea 0x40(%0),%0 \n" in ARGBToRAWRow_SSSE3()
522 "pshufb %%xmm6,%%xmm0 \n" in ARGBToRAWRow_SSSE3()
523 "pshufb %%xmm6,%%xmm1 \n" in ARGBToRAWRow_SSSE3()
524 "pshufb %%xmm6,%%xmm2 \n" in ARGBToRAWRow_SSSE3()
525 "pshufb %%xmm6,%%xmm3 \n" in ARGBToRAWRow_SSSE3()
526 "movdqa %%xmm1,%%xmm4 \n" in ARGBToRAWRow_SSSE3()
527 "psrldq $0x4,%%xmm1 \n" in ARGBToRAWRow_SSSE3()
528 "pslldq $0xc,%%xmm4 \n" in ARGBToRAWRow_SSSE3()
529 "movdqa %%xmm2,%%xmm5 \n" in ARGBToRAWRow_SSSE3()
530 "por %%xmm4,%%xmm0 \n" in ARGBToRAWRow_SSSE3()
531 "pslldq $0x8,%%xmm5 \n" in ARGBToRAWRow_SSSE3()
532 "movdqa %%xmm0,(%1) \n" in ARGBToRAWRow_SSSE3()
533 "por %%xmm5,%%xmm1 \n" in ARGBToRAWRow_SSSE3()
534 "psrldq $0x8,%%xmm2 \n" in ARGBToRAWRow_SSSE3()
535 "pslldq $0x4,%%xmm3 \n" in ARGBToRAWRow_SSSE3()
536 "por %%xmm3,%%xmm2 \n" in ARGBToRAWRow_SSSE3()
537 "movdqa %%xmm1,0x10(%1) \n" in ARGBToRAWRow_SSSE3()
538 "movdqa %%xmm2,0x20(%1) \n" in ARGBToRAWRow_SSSE3()
539 "lea 0x30(%1),%1 \n" in ARGBToRAWRow_SSSE3()
540 "sub $0x10,%2 \n" in ARGBToRAWRow_SSSE3()
541 "jg 1b \n" in ARGBToRAWRow_SSSE3()
555 "pcmpeqb %%xmm3,%%xmm3 \n" in ARGBToRGB565Row_SSE2()
556 "psrld $0x1b,%%xmm3 \n" in ARGBToRGB565Row_SSE2()
557 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBToRGB565Row_SSE2()
558 "psrld $0x1a,%%xmm4 \n" in ARGBToRGB565Row_SSE2()
559 "pslld $0x5,%%xmm4 \n" in ARGBToRGB565Row_SSE2()
560 "pcmpeqb %%xmm5,%%xmm5 \n" in ARGBToRGB565Row_SSE2()
561 "pslld $0xb,%%xmm5 \n" in ARGBToRGB565Row_SSE2()
562 ".p2align 4 \n" in ARGBToRGB565Row_SSE2()
563 "1: \n" in ARGBToRGB565Row_SSE2()
564 "movdqa (%0),%%xmm0 \n" in ARGBToRGB565Row_SSE2()
565 "movdqa %%xmm0,%%xmm1 \n" in ARGBToRGB565Row_SSE2()
566 "movdqa %%xmm0,%%xmm2 \n" in ARGBToRGB565Row_SSE2()
567 "pslld $0x8,%%xmm0 \n" in ARGBToRGB565Row_SSE2()
568 "psrld $0x3,%%xmm1 \n" in ARGBToRGB565Row_SSE2()
569 "psrld $0x5,%%xmm2 \n" in ARGBToRGB565Row_SSE2()
570 "psrad $0x10,%%xmm0 \n" in ARGBToRGB565Row_SSE2()
571 "pand %%xmm3,%%xmm1 \n" in ARGBToRGB565Row_SSE2()
572 "pand %%xmm4,%%xmm2 \n" in ARGBToRGB565Row_SSE2()
573 "pand %%xmm5,%%xmm0 \n" in ARGBToRGB565Row_SSE2()
574 "por %%xmm2,%%xmm1 \n" in ARGBToRGB565Row_SSE2()
575 "por %%xmm1,%%xmm0 \n" in ARGBToRGB565Row_SSE2()
576 "packssdw %%xmm0,%%xmm0 \n" in ARGBToRGB565Row_SSE2()
577 "lea 0x10(%0),%0 \n" in ARGBToRGB565Row_SSE2()
578 "movq %%xmm0,(%1) \n" in ARGBToRGB565Row_SSE2()
579 "lea 0x8(%1),%1 \n" in ARGBToRGB565Row_SSE2()
580 "sub $0x4,%2 \n" in ARGBToRGB565Row_SSE2()
581 "jg 1b \n" in ARGBToRGB565Row_SSE2()
595 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBToARGB1555Row_SSE2()
596 "psrld $0x1b,%%xmm4 \n" in ARGBToARGB1555Row_SSE2()
597 "movdqa %%xmm4,%%xmm5 \n" in ARGBToARGB1555Row_SSE2()
598 "pslld $0x5,%%xmm5 \n" in ARGBToARGB1555Row_SSE2()
599 "movdqa %%xmm4,%%xmm6 \n" in ARGBToARGB1555Row_SSE2()
600 "pslld $0xa,%%xmm6 \n" in ARGBToARGB1555Row_SSE2()
601 "pcmpeqb %%xmm7,%%xmm7 \n" in ARGBToARGB1555Row_SSE2()
602 "pslld $0xf,%%xmm7 \n" in ARGBToARGB1555Row_SSE2()
603 ".p2align 4 \n" in ARGBToARGB1555Row_SSE2()
604 "1: \n" in ARGBToARGB1555Row_SSE2()
605 "movdqa (%0),%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
606 "movdqa %%xmm0,%%xmm1 \n" in ARGBToARGB1555Row_SSE2()
607 "movdqa %%xmm0,%%xmm2 \n" in ARGBToARGB1555Row_SSE2()
608 "movdqa %%xmm0,%%xmm3 \n" in ARGBToARGB1555Row_SSE2()
609 "psrad $0x10,%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
610 "psrld $0x3,%%xmm1 \n" in ARGBToARGB1555Row_SSE2()
611 "psrld $0x6,%%xmm2 \n" in ARGBToARGB1555Row_SSE2()
612 "psrld $0x9,%%xmm3 \n" in ARGBToARGB1555Row_SSE2()
613 "pand %%xmm7,%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
614 "pand %%xmm4,%%xmm1 \n" in ARGBToARGB1555Row_SSE2()
615 "pand %%xmm5,%%xmm2 \n" in ARGBToARGB1555Row_SSE2()
616 "pand %%xmm6,%%xmm3 \n" in ARGBToARGB1555Row_SSE2()
617 "por %%xmm1,%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
618 "por %%xmm3,%%xmm2 \n" in ARGBToARGB1555Row_SSE2()
619 "por %%xmm2,%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
620 "packssdw %%xmm0,%%xmm0 \n" in ARGBToARGB1555Row_SSE2()
621 "lea 0x10(%0),%0 \n" in ARGBToARGB1555Row_SSE2()
622 "movq %%xmm0,(%1) \n" in ARGBToARGB1555Row_SSE2()
623 "lea 0x8(%1),%1 \n" in ARGBToARGB1555Row_SSE2()
624 "sub $0x4,%2 \n" in ARGBToARGB1555Row_SSE2()
625 "jg 1b \n" in ARGBToARGB1555Row_SSE2()
639 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBToARGB4444Row_SSE2()
640 "psllw $0xc,%%xmm4 \n" in ARGBToARGB4444Row_SSE2()
641 "movdqa %%xmm4,%%xmm3 \n" in ARGBToARGB4444Row_SSE2()
642 "psrlw $0x8,%%xmm3 \n" in ARGBToARGB4444Row_SSE2()
643 ".p2align 4 \n" in ARGBToARGB4444Row_SSE2()
644 "1: \n" in ARGBToARGB4444Row_SSE2()
645 "movdqa (%0),%%xmm0 \n" in ARGBToARGB4444Row_SSE2()
646 "movdqa %%xmm0,%%xmm1 \n" in ARGBToARGB4444Row_SSE2()
647 "pand %%xmm3,%%xmm0 \n" in ARGBToARGB4444Row_SSE2()
648 "pand %%xmm4,%%xmm1 \n" in ARGBToARGB4444Row_SSE2()
649 "psrlq $0x4,%%xmm0 \n" in ARGBToARGB4444Row_SSE2()
650 "psrlq $0x8,%%xmm1 \n" in ARGBToARGB4444Row_SSE2()
651 "por %%xmm1,%%xmm0 \n" in ARGBToARGB4444Row_SSE2()
652 "packuswb %%xmm0,%%xmm0 \n" in ARGBToARGB4444Row_SSE2()
653 "lea 0x10(%0),%0 \n" in ARGBToARGB4444Row_SSE2()
654 "movq %%xmm0,(%1) \n" in ARGBToARGB4444Row_SSE2()
655 "lea 0x8(%1),%1 \n" in ARGBToARGB4444Row_SSE2()
656 "sub $0x4,%2 \n" in ARGBToARGB4444Row_SSE2()
657 "jg 1b \n" in ARGBToARGB4444Row_SSE2()
671 "movdqa %4,%%xmm5 \n" in ARGBToYRow_SSSE3()
672 "movdqa %3,%%xmm4 \n" in ARGBToYRow_SSSE3()
673 ".p2align 4 \n" in ARGBToYRow_SSSE3()
674 "1: \n" in ARGBToYRow_SSSE3()
675 "movdqa (%0),%%xmm0 \n" in ARGBToYRow_SSSE3()
676 "movdqa 0x10(%0),%%xmm1 \n" in ARGBToYRow_SSSE3()
677 "movdqa 0x20(%0),%%xmm2 \n" in ARGBToYRow_SSSE3()
678 "movdqa 0x30(%0),%%xmm3 \n" in ARGBToYRow_SSSE3()
679 "pmaddubsw %%xmm4,%%xmm0 \n" in ARGBToYRow_SSSE3()
680 "pmaddubsw %%xmm4,%%xmm1 \n" in ARGBToYRow_SSSE3()
681 "pmaddubsw %%xmm4,%%xmm2 \n" in ARGBToYRow_SSSE3()
682 "pmaddubsw %%xmm4,%%xmm3 \n" in ARGBToYRow_SSSE3()
683 "lea 0x40(%0),%0 \n" in ARGBToYRow_SSSE3()
684 "phaddw %%xmm1,%%xmm0 \n" in ARGBToYRow_SSSE3()
685 "phaddw %%xmm3,%%xmm2 \n" in ARGBToYRow_SSSE3()
686 "psrlw $0x7,%%xmm0 \n" in ARGBToYRow_SSSE3()
687 "psrlw $0x7,%%xmm2 \n" in ARGBToYRow_SSSE3()
688 "packuswb %%xmm2,%%xmm0 \n" in ARGBToYRow_SSSE3()
689 "paddb %%xmm5,%%xmm0 \n" in ARGBToYRow_SSSE3()
690 "sub $0x10,%2 \n" in ARGBToYRow_SSSE3()
691 "movdqa %%xmm0,(%1) \n" in ARGBToYRow_SSSE3()
692 "lea 0x10(%1),%1 \n" in ARGBToYRow_SSSE3()
693 "jg 1b \n" in ARGBToYRow_SSSE3()
708 "movdqa %4,%%xmm5 \n" in ARGBToYRow_Unaligned_SSSE3()
709 "movdqa %3,%%xmm4 \n" in ARGBToYRow_Unaligned_SSSE3()
710 ".p2align 4 \n" in ARGBToYRow_Unaligned_SSSE3()
711 "1: \n" in ARGBToYRow_Unaligned_SSSE3()
712 "movdqu (%0),%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
713 "movdqu 0x10(%0),%%xmm1 \n" in ARGBToYRow_Unaligned_SSSE3()
714 "movdqu 0x20(%0),%%xmm2 \n" in ARGBToYRow_Unaligned_SSSE3()
715 "movdqu 0x30(%0),%%xmm3 \n" in ARGBToYRow_Unaligned_SSSE3()
716 "pmaddubsw %%xmm4,%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
717 "pmaddubsw %%xmm4,%%xmm1 \n" in ARGBToYRow_Unaligned_SSSE3()
718 "pmaddubsw %%xmm4,%%xmm2 \n" in ARGBToYRow_Unaligned_SSSE3()
719 "pmaddubsw %%xmm4,%%xmm3 \n" in ARGBToYRow_Unaligned_SSSE3()
720 "lea 0x40(%0),%0 \n" in ARGBToYRow_Unaligned_SSSE3()
721 "phaddw %%xmm1,%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
722 "phaddw %%xmm3,%%xmm2 \n" in ARGBToYRow_Unaligned_SSSE3()
723 "psrlw $0x7,%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
724 "psrlw $0x7,%%xmm2 \n" in ARGBToYRow_Unaligned_SSSE3()
725 "packuswb %%xmm2,%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
726 "paddb %%xmm5,%%xmm0 \n" in ARGBToYRow_Unaligned_SSSE3()
727 "sub $0x10,%2 \n" in ARGBToYRow_Unaligned_SSSE3()
728 "movdqu %%xmm0,(%1) \n" in ARGBToYRow_Unaligned_SSSE3()
729 "lea 0x10(%1),%1 \n" in ARGBToYRow_Unaligned_SSSE3()
730 "jg 1b \n" in ARGBToYRow_Unaligned_SSSE3()
751 "movdqa %0,%%xmm4 \n" in ARGBToUVRow_SSSE3()
752 "movdqa %1,%%xmm3 \n" in ARGBToUVRow_SSSE3()
753 "movdqa %2,%%xmm5 \n" in ARGBToUVRow_SSSE3()
760 "sub %1,%2 \n" in ARGBToUVRow_SSSE3()
761 ".p2align 4 \n" in ARGBToUVRow_SSSE3()
762 "1: \n" in ARGBToUVRow_SSSE3()
763 "movdqa (%0),%%xmm0 \n" in ARGBToUVRow_SSSE3()
764 "movdqa 0x10(%0),%%xmm1 \n" in ARGBToUVRow_SSSE3()
765 "movdqa 0x20(%0),%%xmm2 \n" in ARGBToUVRow_SSSE3()
766 "movdqa 0x30(%0),%%xmm6 \n" in ARGBToUVRow_SSSE3()
767 "pavgb (%0,%4,1),%%xmm0 \n" in ARGBToUVRow_SSSE3()
768 "pavgb 0x10(%0,%4,1),%%xmm1 \n" in ARGBToUVRow_SSSE3()
769 "pavgb 0x20(%0,%4,1),%%xmm2 \n" in ARGBToUVRow_SSSE3()
770 "pavgb 0x30(%0,%4,1),%%xmm6 \n" in ARGBToUVRow_SSSE3()
771 "lea 0x40(%0),%0 \n" in ARGBToUVRow_SSSE3()
772 "movdqa %%xmm0,%%xmm7 \n" in ARGBToUVRow_SSSE3()
773 "shufps $0x88,%%xmm1,%%xmm0 \n" in ARGBToUVRow_SSSE3()
774 "shufps $0xdd,%%xmm1,%%xmm7 \n" in ARGBToUVRow_SSSE3()
775 "pavgb %%xmm7,%%xmm0 \n" in ARGBToUVRow_SSSE3()
776 "movdqa %%xmm2,%%xmm7 \n" in ARGBToUVRow_SSSE3()
777 "shufps $0x88,%%xmm6,%%xmm2 \n" in ARGBToUVRow_SSSE3()
778 "shufps $0xdd,%%xmm6,%%xmm7 \n" in ARGBToUVRow_SSSE3()
779 "pavgb %%xmm7,%%xmm2 \n" in ARGBToUVRow_SSSE3()
780 "movdqa %%xmm0,%%xmm1 \n" in ARGBToUVRow_SSSE3()
781 "movdqa %%xmm2,%%xmm6 \n" in ARGBToUVRow_SSSE3()
782 "pmaddubsw %%xmm4,%%xmm0 \n" in ARGBToUVRow_SSSE3()
783 "pmaddubsw %%xmm4,%%xmm2 \n" in ARGBToUVRow_SSSE3()
784 "pmaddubsw %%xmm3,%%xmm1 \n" in ARGBToUVRow_SSSE3()
785 "pmaddubsw %%xmm3,%%xmm6 \n" in ARGBToUVRow_SSSE3()
786 "phaddw %%xmm2,%%xmm0 \n" in ARGBToUVRow_SSSE3()
787 "phaddw %%xmm6,%%xmm1 \n" in ARGBToUVRow_SSSE3()
788 "psraw $0x8,%%xmm0 \n" in ARGBToUVRow_SSSE3()
789 "psraw $0x8,%%xmm1 \n" in ARGBToUVRow_SSSE3()
790 "packsswb %%xmm1,%%xmm0 \n" in ARGBToUVRow_SSSE3()
791 "paddb %%xmm5,%%xmm0 \n" in ARGBToUVRow_SSSE3()
792 "sub $0x10,%3 \n" in ARGBToUVRow_SSSE3()
793 "movlps %%xmm0,(%1) \n" in ARGBToUVRow_SSSE3()
794 "movhps %%xmm0,(%1,%2,1) \n" in ARGBToUVRow_SSSE3()
795 "lea 0x8(%1),%1 \n" in ARGBToUVRow_SSSE3()
796 "jg 1b \n" in ARGBToUVRow_SSSE3()
812 "movdqa %0,%%xmm4 \n" in ARGBToUVRow_Unaligned_SSSE3()
813 "movdqa %1,%%xmm3 \n" in ARGBToUVRow_Unaligned_SSSE3()
814 "movdqa %2,%%xmm5 \n" in ARGBToUVRow_Unaligned_SSSE3()
821 "sub %1,%2 \n" in ARGBToUVRow_Unaligned_SSSE3()
822 ".p2align 4 \n" in ARGBToUVRow_Unaligned_SSSE3()
823 "1: \n" in ARGBToUVRow_Unaligned_SSSE3()
824 "movdqu (%0),%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
825 "movdqu 0x10(%0),%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
826 "movdqu 0x20(%0),%%xmm2 \n" in ARGBToUVRow_Unaligned_SSSE3()
827 "movdqu 0x30(%0),%%xmm6 \n" in ARGBToUVRow_Unaligned_SSSE3()
828 "movdqu (%0,%4,1),%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
829 "pavgb %%xmm7,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
830 "movdqu 0x10(%0,%4,1),%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
831 "pavgb %%xmm7,%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
832 "movdqu 0x20(%0,%4,1),%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
833 "pavgb %%xmm7,%%xmm2 \n" in ARGBToUVRow_Unaligned_SSSE3()
834 "movdqu 0x30(%0,%4,1),%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
835 "pavgb %%xmm7,%%xmm6 \n" in ARGBToUVRow_Unaligned_SSSE3()
836 "lea 0x40(%0),%0 \n" in ARGBToUVRow_Unaligned_SSSE3()
837 "movdqa %%xmm0,%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
838 "shufps $0x88,%%xmm1,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
839 "shufps $0xdd,%%xmm1,%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
840 "pavgb %%xmm7,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
841 "movdqa %%xmm2,%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
842 "shufps $0x88,%%xmm6,%%xmm2 \n" in ARGBToUVRow_Unaligned_SSSE3()
843 "shufps $0xdd,%%xmm6,%%xmm7 \n" in ARGBToUVRow_Unaligned_SSSE3()
844 "pavgb %%xmm7,%%xmm2 \n" in ARGBToUVRow_Unaligned_SSSE3()
845 "movdqa %%xmm0,%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
846 "movdqa %%xmm2,%%xmm6 \n" in ARGBToUVRow_Unaligned_SSSE3()
847 "pmaddubsw %%xmm4,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
848 "pmaddubsw %%xmm4,%%xmm2 \n" in ARGBToUVRow_Unaligned_SSSE3()
849 "pmaddubsw %%xmm3,%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
850 "pmaddubsw %%xmm3,%%xmm6 \n" in ARGBToUVRow_Unaligned_SSSE3()
851 "phaddw %%xmm2,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
852 "phaddw %%xmm6,%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
853 "psraw $0x8,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
854 "psraw $0x8,%%xmm1 \n" in ARGBToUVRow_Unaligned_SSSE3()
855 "packsswb %%xmm1,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
856 "paddb %%xmm5,%%xmm0 \n" in ARGBToUVRow_Unaligned_SSSE3()
857 "sub $0x10,%3 \n" in ARGBToUVRow_Unaligned_SSSE3()
858 "movlps %%xmm0,(%1) \n" in ARGBToUVRow_Unaligned_SSSE3()
859 "movhps %%xmm0,(%1,%2,1) \n" in ARGBToUVRow_Unaligned_SSSE3()
860 "lea 0x8(%1),%1 \n" in ARGBToUVRow_Unaligned_SSSE3()
861 "jg 1b \n" in ARGBToUVRow_Unaligned_SSSE3()
876 "movdqa %4,%%xmm5 \n" in BGRAToYRow_SSSE3()
877 "movdqa %3,%%xmm4 \n" in BGRAToYRow_SSSE3()
878 ".p2align 4 \n" in BGRAToYRow_SSSE3()
879 "1: \n" in BGRAToYRow_SSSE3()
880 "movdqa (%0),%%xmm0 \n" in BGRAToYRow_SSSE3()
881 "movdqa 0x10(%0),%%xmm1 \n" in BGRAToYRow_SSSE3()
882 "movdqa 0x20(%0),%%xmm2 \n" in BGRAToYRow_SSSE3()
883 "movdqa 0x30(%0),%%xmm3 \n" in BGRAToYRow_SSSE3()
884 "pmaddubsw %%xmm4,%%xmm0 \n" in BGRAToYRow_SSSE3()
885 "pmaddubsw %%xmm4,%%xmm1 \n" in BGRAToYRow_SSSE3()
886 "pmaddubsw %%xmm4,%%xmm2 \n" in BGRAToYRow_SSSE3()
887 "pmaddubsw %%xmm4,%%xmm3 \n" in BGRAToYRow_SSSE3()
888 "lea 0x40(%0),%0 \n" in BGRAToYRow_SSSE3()
889 "phaddw %%xmm1,%%xmm0 \n" in BGRAToYRow_SSSE3()
890 "phaddw %%xmm3,%%xmm2 \n" in BGRAToYRow_SSSE3()
891 "psrlw $0x7,%%xmm0 \n" in BGRAToYRow_SSSE3()
892 "psrlw $0x7,%%xmm2 \n" in BGRAToYRow_SSSE3()
893 "packuswb %%xmm2,%%xmm0 \n" in BGRAToYRow_SSSE3()
894 "paddb %%xmm5,%%xmm0 \n" in BGRAToYRow_SSSE3()
895 "sub $0x10,%2 \n" in BGRAToYRow_SSSE3()
896 "movdqa %%xmm0,(%1) \n" in BGRAToYRow_SSSE3()
897 "lea 0x10(%1),%1 \n" in BGRAToYRow_SSSE3()
898 "jg 1b \n" in BGRAToYRow_SSSE3()
913 "movdqa %4,%%xmm5 \n" in BGRAToYRow_Unaligned_SSSE3()
914 "movdqa %3,%%xmm4 \n" in BGRAToYRow_Unaligned_SSSE3()
915 ".p2align 4 \n" in BGRAToYRow_Unaligned_SSSE3()
916 "1: \n" in BGRAToYRow_Unaligned_SSSE3()
917 "movdqu (%0),%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
918 "movdqu 0x10(%0),%%xmm1 \n" in BGRAToYRow_Unaligned_SSSE3()
919 "movdqu 0x20(%0),%%xmm2 \n" in BGRAToYRow_Unaligned_SSSE3()
920 "movdqu 0x30(%0),%%xmm3 \n" in BGRAToYRow_Unaligned_SSSE3()
921 "pmaddubsw %%xmm4,%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
922 "pmaddubsw %%xmm4,%%xmm1 \n" in BGRAToYRow_Unaligned_SSSE3()
923 "pmaddubsw %%xmm4,%%xmm2 \n" in BGRAToYRow_Unaligned_SSSE3()
924 "pmaddubsw %%xmm4,%%xmm3 \n" in BGRAToYRow_Unaligned_SSSE3()
925 "lea 0x40(%0),%0 \n" in BGRAToYRow_Unaligned_SSSE3()
926 "phaddw %%xmm1,%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
927 "phaddw %%xmm3,%%xmm2 \n" in BGRAToYRow_Unaligned_SSSE3()
928 "psrlw $0x7,%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
929 "psrlw $0x7,%%xmm2 \n" in BGRAToYRow_Unaligned_SSSE3()
930 "packuswb %%xmm2,%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
931 "paddb %%xmm5,%%xmm0 \n" in BGRAToYRow_Unaligned_SSSE3()
932 "sub $0x10,%2 \n" in BGRAToYRow_Unaligned_SSSE3()
933 "movdqu %%xmm0,(%1) \n" in BGRAToYRow_Unaligned_SSSE3()
934 "lea 0x10(%1),%1 \n" in BGRAToYRow_Unaligned_SSSE3()
935 "jg 1b \n" in BGRAToYRow_Unaligned_SSSE3()
951 "movdqa %0,%%xmm4 \n" in BGRAToUVRow_SSSE3()
952 "movdqa %1,%%xmm3 \n" in BGRAToUVRow_SSSE3()
953 "movdqa %2,%%xmm5 \n" in BGRAToUVRow_SSSE3()
960 "sub %1,%2 \n" in BGRAToUVRow_SSSE3()
961 ".p2align 4 \n" in BGRAToUVRow_SSSE3()
962 "1: \n" in BGRAToUVRow_SSSE3()
963 "movdqa (%0),%%xmm0 \n" in BGRAToUVRow_SSSE3()
964 "movdqa 0x10(%0),%%xmm1 \n" in BGRAToUVRow_SSSE3()
965 "movdqa 0x20(%0),%%xmm2 \n" in BGRAToUVRow_SSSE3()
966 "movdqa 0x30(%0),%%xmm6 \n" in BGRAToUVRow_SSSE3()
967 "pavgb (%0,%4,1),%%xmm0 \n" in BGRAToUVRow_SSSE3()
968 "pavgb 0x10(%0,%4,1),%%xmm1 \n" in BGRAToUVRow_SSSE3()
969 "pavgb 0x20(%0,%4,1),%%xmm2 \n" in BGRAToUVRow_SSSE3()
970 "pavgb 0x30(%0,%4,1),%%xmm6 \n" in BGRAToUVRow_SSSE3()
971 "lea 0x40(%0),%0 \n" in BGRAToUVRow_SSSE3()
972 "movdqa %%xmm0,%%xmm7 \n" in BGRAToUVRow_SSSE3()
973 "shufps $0x88,%%xmm1,%%xmm0 \n" in BGRAToUVRow_SSSE3()
974 "shufps $0xdd,%%xmm1,%%xmm7 \n" in BGRAToUVRow_SSSE3()
975 "pavgb %%xmm7,%%xmm0 \n" in BGRAToUVRow_SSSE3()
976 "movdqa %%xmm2,%%xmm7 \n" in BGRAToUVRow_SSSE3()
977 "shufps $0x88,%%xmm6,%%xmm2 \n" in BGRAToUVRow_SSSE3()
978 "shufps $0xdd,%%xmm6,%%xmm7 \n" in BGRAToUVRow_SSSE3()
979 "pavgb %%xmm7,%%xmm2 \n" in BGRAToUVRow_SSSE3()
980 "movdqa %%xmm0,%%xmm1 \n" in BGRAToUVRow_SSSE3()
981 "movdqa %%xmm2,%%xmm6 \n" in BGRAToUVRow_SSSE3()
982 "pmaddubsw %%xmm4,%%xmm0 \n" in BGRAToUVRow_SSSE3()
983 "pmaddubsw %%xmm4,%%xmm2 \n" in BGRAToUVRow_SSSE3()
984 "pmaddubsw %%xmm3,%%xmm1 \n" in BGRAToUVRow_SSSE3()
985 "pmaddubsw %%xmm3,%%xmm6 \n" in BGRAToUVRow_SSSE3()
986 "phaddw %%xmm2,%%xmm0 \n" in BGRAToUVRow_SSSE3()
987 "phaddw %%xmm6,%%xmm1 \n" in BGRAToUVRow_SSSE3()
988 "psraw $0x8,%%xmm0 \n" in BGRAToUVRow_SSSE3()
989 "psraw $0x8,%%xmm1 \n" in BGRAToUVRow_SSSE3()
990 "packsswb %%xmm1,%%xmm0 \n" in BGRAToUVRow_SSSE3()
991 "paddb %%xmm5,%%xmm0 \n" in BGRAToUVRow_SSSE3()
992 "sub $0x10,%3 \n" in BGRAToUVRow_SSSE3()
993 "movlps %%xmm0,(%1) \n" in BGRAToUVRow_SSSE3()
994 "movhps %%xmm0,(%1,%2,1) \n" in BGRAToUVRow_SSSE3()
995 "lea 0x8(%1),%1 \n" in BGRAToUVRow_SSSE3()
996 "jg 1b \n" in BGRAToUVRow_SSSE3()
1012 "movdqa %0,%%xmm4 \n" in BGRAToUVRow_Unaligned_SSSE3()
1013 "movdqa %1,%%xmm3 \n" in BGRAToUVRow_Unaligned_SSSE3()
1014 "movdqa %2,%%xmm5 \n" in BGRAToUVRow_Unaligned_SSSE3()
1021 "sub %1,%2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1022 ".p2align 4 \n" in BGRAToUVRow_Unaligned_SSSE3()
1023 "1: \n" in BGRAToUVRow_Unaligned_SSSE3()
1024 "movdqu (%0),%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1025 "movdqu 0x10(%0),%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1026 "movdqu 0x20(%0),%%xmm2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1027 "movdqu 0x30(%0),%%xmm6 \n" in BGRAToUVRow_Unaligned_SSSE3()
1028 "movdqu (%0,%4,1),%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1029 "pavgb %%xmm7,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1030 "movdqu 0x10(%0,%4,1),%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1031 "pavgb %%xmm7,%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1032 "movdqu 0x20(%0,%4,1),%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1033 "pavgb %%xmm7,%%xmm2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1034 "movdqu 0x30(%0,%4,1),%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1035 "pavgb %%xmm7,%%xmm6 \n" in BGRAToUVRow_Unaligned_SSSE3()
1036 "lea 0x40(%0),%0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1037 "movdqa %%xmm0,%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1038 "shufps $0x88,%%xmm1,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1039 "shufps $0xdd,%%xmm1,%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1040 "pavgb %%xmm7,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1041 "movdqa %%xmm2,%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1042 "shufps $0x88,%%xmm6,%%xmm2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1043 "shufps $0xdd,%%xmm6,%%xmm7 \n" in BGRAToUVRow_Unaligned_SSSE3()
1044 "pavgb %%xmm7,%%xmm2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1045 "movdqa %%xmm0,%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1046 "movdqa %%xmm2,%%xmm6 \n" in BGRAToUVRow_Unaligned_SSSE3()
1047 "pmaddubsw %%xmm4,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1048 "pmaddubsw %%xmm4,%%xmm2 \n" in BGRAToUVRow_Unaligned_SSSE3()
1049 "pmaddubsw %%xmm3,%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1050 "pmaddubsw %%xmm3,%%xmm6 \n" in BGRAToUVRow_Unaligned_SSSE3()
1051 "phaddw %%xmm2,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1052 "phaddw %%xmm6,%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1053 "psraw $0x8,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1054 "psraw $0x8,%%xmm1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1055 "packsswb %%xmm1,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1056 "paddb %%xmm5,%%xmm0 \n" in BGRAToUVRow_Unaligned_SSSE3()
1057 "sub $0x10,%3 \n" in BGRAToUVRow_Unaligned_SSSE3()
1058 "movlps %%xmm0,(%1) \n" in BGRAToUVRow_Unaligned_SSSE3()
1059 "movhps %%xmm0,(%1,%2,1) \n" in BGRAToUVRow_Unaligned_SSSE3()
1060 "lea 0x8(%1),%1 \n" in BGRAToUVRow_Unaligned_SSSE3()
1061 "jg 1b \n" in BGRAToUVRow_Unaligned_SSSE3()
1076 "movdqa %4,%%xmm5 \n" in ABGRToYRow_SSSE3()
1077 "movdqa %3,%%xmm4 \n" in ABGRToYRow_SSSE3()
1078 ".p2align 4 \n" in ABGRToYRow_SSSE3()
1079 "1: \n" in ABGRToYRow_SSSE3()
1080 "movdqa (%0),%%xmm0 \n" in ABGRToYRow_SSSE3()
1081 "movdqa 0x10(%0),%%xmm1 \n" in ABGRToYRow_SSSE3()
1082 "movdqa 0x20(%0),%%xmm2 \n" in ABGRToYRow_SSSE3()
1083 "movdqa 0x30(%0),%%xmm3 \n" in ABGRToYRow_SSSE3()
1084 "pmaddubsw %%xmm4,%%xmm0 \n" in ABGRToYRow_SSSE3()
1085 "pmaddubsw %%xmm4,%%xmm1 \n" in ABGRToYRow_SSSE3()
1086 "pmaddubsw %%xmm4,%%xmm2 \n" in ABGRToYRow_SSSE3()
1087 "pmaddubsw %%xmm4,%%xmm3 \n" in ABGRToYRow_SSSE3()
1088 "lea 0x40(%0),%0 \n" in ABGRToYRow_SSSE3()
1089 "phaddw %%xmm1,%%xmm0 \n" in ABGRToYRow_SSSE3()
1090 "phaddw %%xmm3,%%xmm2 \n" in ABGRToYRow_SSSE3()
1091 "psrlw $0x7,%%xmm0 \n" in ABGRToYRow_SSSE3()
1092 "psrlw $0x7,%%xmm2 \n" in ABGRToYRow_SSSE3()
1093 "packuswb %%xmm2,%%xmm0 \n" in ABGRToYRow_SSSE3()
1094 "paddb %%xmm5,%%xmm0 \n" in ABGRToYRow_SSSE3()
1095 "sub $0x10,%2 \n" in ABGRToYRow_SSSE3()
1096 "movdqa %%xmm0,(%1) \n" in ABGRToYRow_SSSE3()
1097 "lea 0x10(%1),%1 \n" in ABGRToYRow_SSSE3()
1098 "jg 1b \n" in ABGRToYRow_SSSE3()
1113 "movdqa %4,%%xmm5 \n" in ABGRToYRow_Unaligned_SSSE3()
1114 "movdqa %3,%%xmm4 \n" in ABGRToYRow_Unaligned_SSSE3()
1115 ".p2align 4 \n" in ABGRToYRow_Unaligned_SSSE3()
1116 "1: \n" in ABGRToYRow_Unaligned_SSSE3()
1117 "movdqu (%0),%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1118 "movdqu 0x10(%0),%%xmm1 \n" in ABGRToYRow_Unaligned_SSSE3()
1119 "movdqu 0x20(%0),%%xmm2 \n" in ABGRToYRow_Unaligned_SSSE3()
1120 "movdqu 0x30(%0),%%xmm3 \n" in ABGRToYRow_Unaligned_SSSE3()
1121 "pmaddubsw %%xmm4,%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1122 "pmaddubsw %%xmm4,%%xmm1 \n" in ABGRToYRow_Unaligned_SSSE3()
1123 "pmaddubsw %%xmm4,%%xmm2 \n" in ABGRToYRow_Unaligned_SSSE3()
1124 "pmaddubsw %%xmm4,%%xmm3 \n" in ABGRToYRow_Unaligned_SSSE3()
1125 "lea 0x40(%0),%0 \n" in ABGRToYRow_Unaligned_SSSE3()
1126 "phaddw %%xmm1,%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1127 "phaddw %%xmm3,%%xmm2 \n" in ABGRToYRow_Unaligned_SSSE3()
1128 "psrlw $0x7,%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1129 "psrlw $0x7,%%xmm2 \n" in ABGRToYRow_Unaligned_SSSE3()
1130 "packuswb %%xmm2,%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1131 "paddb %%xmm5,%%xmm0 \n" in ABGRToYRow_Unaligned_SSSE3()
1132 "sub $0x10,%2 \n" in ABGRToYRow_Unaligned_SSSE3()
1133 "movdqu %%xmm0,(%1) \n" in ABGRToYRow_Unaligned_SSSE3()
1134 "lea 0x10(%1),%1 \n" in ABGRToYRow_Unaligned_SSSE3()
1135 "jg 1b \n" in ABGRToYRow_Unaligned_SSSE3()
1151 "movdqa %0,%%xmm4 \n" in ABGRToUVRow_SSSE3()
1152 "movdqa %1,%%xmm3 \n" in ABGRToUVRow_SSSE3()
1153 "movdqa %2,%%xmm5 \n" in ABGRToUVRow_SSSE3()
1160 "sub %1,%2 \n" in ABGRToUVRow_SSSE3()
1161 ".p2align 4 \n" in ABGRToUVRow_SSSE3()
1162 "1: \n" in ABGRToUVRow_SSSE3()
1163 "movdqa (%0),%%xmm0 \n" in ABGRToUVRow_SSSE3()
1164 "movdqa 0x10(%0),%%xmm1 \n" in ABGRToUVRow_SSSE3()
1165 "movdqa 0x20(%0),%%xmm2 \n" in ABGRToUVRow_SSSE3()
1166 "movdqa 0x30(%0),%%xmm6 \n" in ABGRToUVRow_SSSE3()
1167 "pavgb (%0,%4,1),%%xmm0 \n" in ABGRToUVRow_SSSE3()
1168 "pavgb 0x10(%0,%4,1),%%xmm1 \n" in ABGRToUVRow_SSSE3()
1169 "pavgb 0x20(%0,%4,1),%%xmm2 \n" in ABGRToUVRow_SSSE3()
1170 "pavgb 0x30(%0,%4,1),%%xmm6 \n" in ABGRToUVRow_SSSE3()
1171 "lea 0x40(%0),%0 \n" in ABGRToUVRow_SSSE3()
1172 "movdqa %%xmm0,%%xmm7 \n" in ABGRToUVRow_SSSE3()
1173 "shufps $0x88,%%xmm1,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1174 "shufps $0xdd,%%xmm1,%%xmm7 \n" in ABGRToUVRow_SSSE3()
1175 "pavgb %%xmm7,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1176 "movdqa %%xmm2,%%xmm7 \n" in ABGRToUVRow_SSSE3()
1177 "shufps $0x88,%%xmm6,%%xmm2 \n" in ABGRToUVRow_SSSE3()
1178 "shufps $0xdd,%%xmm6,%%xmm7 \n" in ABGRToUVRow_SSSE3()
1179 "pavgb %%xmm7,%%xmm2 \n" in ABGRToUVRow_SSSE3()
1180 "movdqa %%xmm0,%%xmm1 \n" in ABGRToUVRow_SSSE3()
1181 "movdqa %%xmm2,%%xmm6 \n" in ABGRToUVRow_SSSE3()
1182 "pmaddubsw %%xmm4,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1183 "pmaddubsw %%xmm4,%%xmm2 \n" in ABGRToUVRow_SSSE3()
1184 "pmaddubsw %%xmm3,%%xmm1 \n" in ABGRToUVRow_SSSE3()
1185 "pmaddubsw %%xmm3,%%xmm6 \n" in ABGRToUVRow_SSSE3()
1186 "phaddw %%xmm2,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1187 "phaddw %%xmm6,%%xmm1 \n" in ABGRToUVRow_SSSE3()
1188 "psraw $0x8,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1189 "psraw $0x8,%%xmm1 \n" in ABGRToUVRow_SSSE3()
1190 "packsswb %%xmm1,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1191 "paddb %%xmm5,%%xmm0 \n" in ABGRToUVRow_SSSE3()
1192 "sub $0x10,%3 \n" in ABGRToUVRow_SSSE3()
1193 "movlps %%xmm0,(%1) \n" in ABGRToUVRow_SSSE3()
1194 "movhps %%xmm0,(%1,%2,1) \n" in ABGRToUVRow_SSSE3()
1195 "lea 0x8(%1),%1 \n" in ABGRToUVRow_SSSE3()
1196 "jg 1b \n" in ABGRToUVRow_SSSE3()
1212 "movdqa %0,%%xmm4 \n" in ABGRToUVRow_Unaligned_SSSE3()
1213 "movdqa %1,%%xmm3 \n" in ABGRToUVRow_Unaligned_SSSE3()
1214 "movdqa %2,%%xmm5 \n" in ABGRToUVRow_Unaligned_SSSE3()
1221 "sub %1,%2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1222 ".p2align 4 \n" in ABGRToUVRow_Unaligned_SSSE3()
1223 "1: \n" in ABGRToUVRow_Unaligned_SSSE3()
1224 "movdqu (%0),%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1225 "movdqu 0x10(%0),%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1226 "movdqu 0x20(%0),%%xmm2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1227 "movdqu 0x30(%0),%%xmm6 \n" in ABGRToUVRow_Unaligned_SSSE3()
1228 "movdqu (%0,%4,1),%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1229 "pavgb %%xmm7,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1230 "movdqu 0x10(%0,%4,1),%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1231 "pavgb %%xmm7,%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1232 "movdqu 0x20(%0,%4,1),%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1233 "pavgb %%xmm7,%%xmm2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1234 "movdqu 0x30(%0,%4,1),%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1235 "pavgb %%xmm7,%%xmm6 \n" in ABGRToUVRow_Unaligned_SSSE3()
1236 "lea 0x40(%0),%0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1237 "movdqa %%xmm0,%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1238 "shufps $0x88,%%xmm1,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1239 "shufps $0xdd,%%xmm1,%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1240 "pavgb %%xmm7,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1241 "movdqa %%xmm2,%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1242 "shufps $0x88,%%xmm6,%%xmm2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1243 "shufps $0xdd,%%xmm6,%%xmm7 \n" in ABGRToUVRow_Unaligned_SSSE3()
1244 "pavgb %%xmm7,%%xmm2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1245 "movdqa %%xmm0,%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1246 "movdqa %%xmm2,%%xmm6 \n" in ABGRToUVRow_Unaligned_SSSE3()
1247 "pmaddubsw %%xmm4,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1248 "pmaddubsw %%xmm4,%%xmm2 \n" in ABGRToUVRow_Unaligned_SSSE3()
1249 "pmaddubsw %%xmm3,%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1250 "pmaddubsw %%xmm3,%%xmm6 \n" in ABGRToUVRow_Unaligned_SSSE3()
1251 "phaddw %%xmm2,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1252 "phaddw %%xmm6,%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1253 "psraw $0x8,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1254 "psraw $0x8,%%xmm1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1255 "packsswb %%xmm1,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1256 "paddb %%xmm5,%%xmm0 \n" in ABGRToUVRow_Unaligned_SSSE3()
1257 "sub $0x10,%3 \n" in ABGRToUVRow_Unaligned_SSSE3()
1258 "movlps %%xmm0,(%1) \n" in ABGRToUVRow_Unaligned_SSSE3()
1259 "movhps %%xmm0,(%1,%2,1) \n" in ABGRToUVRow_Unaligned_SSSE3()
1260 "lea 0x8(%1),%1 \n" in ABGRToUVRow_Unaligned_SSSE3()
1261 "jg 1b \n" in ABGRToUVRow_Unaligned_SSSE3()
1320 "movq (%[u_buf]),%%xmm0 \n" \
1321 "movq (%[u_buf],%[v_buf],1),%%xmm1 \n" \
1322 "lea 0x8(%[u_buf]),%[u_buf] \n" \
1323 "punpcklbw %%xmm1,%%xmm0 \n" \
1327 "movd (%[u_buf]),%%xmm0 \n" \
1328 "movd (%[u_buf],%[v_buf],1),%%xmm1 \n" \
1329 "lea 0x4(%[u_buf]),%[u_buf] \n" \
1330 "punpcklbw %%xmm1,%%xmm0 \n" \
1331 "punpcklwd %%xmm0,%%xmm0 \n" \
1335 "movd (%[u_buf]),%%xmm0 \n" \
1336 "movd (%[u_buf],%[v_buf],1),%%xmm1 \n" \
1337 "lea 0x2(%[u_buf]),%[u_buf] \n" \
1338 "punpcklbw %%xmm1,%%xmm0 \n" \
1339 "punpcklwd %%xmm0,%%xmm0 \n" \
1340 "punpckldq %%xmm0,%%xmm0 \n" \
1344 "movq (%[uv_buf]),%%xmm0 \n" \
1345 "lea 0x8(%[uv_buf]),%[uv_buf] \n" \
1346 "punpcklwd %%xmm0,%%xmm0 \n" \
1350 "movdqa %%xmm0,%%xmm1 \n" \
1351 "movdqa %%xmm0,%%xmm2 \n" \
1352 "pmaddubsw (%[kYuvConstants]),%%xmm0 \n" \
1353 "pmaddubsw 16(%[kYuvConstants]),%%xmm1 \n" \
1354 "pmaddubsw 32(%[kYuvConstants]),%%xmm2 \n" \
1355 "psubw 48(%[kYuvConstants]),%%xmm0 \n" \
1356 "psubw 64(%[kYuvConstants]),%%xmm1 \n" \
1357 "psubw 80(%[kYuvConstants]),%%xmm2 \n" \
1358 "movq (%[y_buf]),%%xmm3 \n" \
1359 "lea 0x8(%[y_buf]),%[y_buf] \n" \
1360 "punpcklbw %%xmm4,%%xmm3 \n" \
1361 "psubsw 96(%[kYuvConstants]),%%xmm3 \n" \
1362 "pmullw 112(%[kYuvConstants]),%%xmm3 \n" \
1363 "paddsw %%xmm3,%%xmm0 \n" \
1364 "paddsw %%xmm3,%%xmm1 \n" \
1365 "paddsw %%xmm3,%%xmm2 \n" \
1366 "psraw $0x6,%%xmm0 \n" \
1367 "psraw $0x6,%%xmm1 \n" \
1368 "psraw $0x6,%%xmm2 \n" \
1369 "packuswb %%xmm0,%%xmm0 \n" \
1370 "packuswb %%xmm1,%%xmm1 \n" \
1371 "packuswb %%xmm2,%%xmm2 \n" \
1375 "movdqa %%xmm0,%%xmm1 \n" \
1376 "movdqa %%xmm0,%%xmm2 \n" \
1377 "pmaddubsw 128(%[kYuvConstants]),%%xmm0 \n" \
1378 "pmaddubsw 144(%[kYuvConstants]),%%xmm1 \n" \
1379 "pmaddubsw 160(%[kYuvConstants]),%%xmm2 \n" \
1380 "psubw 48(%[kYuvConstants]),%%xmm0 \n" \
1381 "psubw 64(%[kYuvConstants]),%%xmm1 \n" \
1382 "psubw 80(%[kYuvConstants]),%%xmm2 \n" \
1383 "movq (%[y_buf]),%%xmm3 \n" \
1384 "lea 0x8(%[y_buf]),%[y_buf] \n" \
1385 "punpcklbw %%xmm4,%%xmm3 \n" \
1386 "psubsw 96(%[kYuvConstants]),%%xmm3 \n" \
1387 "pmullw 112(%[kYuvConstants]),%%xmm3 \n" \
1388 "paddsw %%xmm3,%%xmm0 \n" \
1389 "paddsw %%xmm3,%%xmm1 \n" \
1390 "paddsw %%xmm3,%%xmm2 \n" \
1391 "psraw $0x6,%%xmm0 \n" \
1392 "psraw $0x6,%%xmm1 \n" \
1393 "psraw $0x6,%%xmm2 \n" \
1394 "packuswb %%xmm0,%%xmm0 \n" \
1395 "packuswb %%xmm1,%%xmm1 \n" \
1396 "packuswb %%xmm2,%%xmm2 \n" \
1404 "sub %[u_buf],%[v_buf] \n" in I444ToARGBRow_SSSE3()
1405 "pcmpeqb %%xmm5,%%xmm5 \n" in I444ToARGBRow_SSSE3()
1406 "pxor %%xmm4,%%xmm4 \n" in I444ToARGBRow_SSSE3()
1407 ".p2align 4 \n" in I444ToARGBRow_SSSE3()
1408 "1: \n" in I444ToARGBRow_SSSE3()
1411 "punpcklbw %%xmm1,%%xmm0 \n" in I444ToARGBRow_SSSE3()
1412 "punpcklbw %%xmm5,%%xmm2 \n" in I444ToARGBRow_SSSE3()
1413 "movdqa %%xmm0,%%xmm1 \n" in I444ToARGBRow_SSSE3()
1414 "punpcklwd %%xmm2,%%xmm0 \n" in I444ToARGBRow_SSSE3()
1415 "punpckhwd %%xmm2,%%xmm1 \n" in I444ToARGBRow_SSSE3()
1416 "movdqa %%xmm0,(%[argb_buf]) \n" in I444ToARGBRow_SSSE3()
1417 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in I444ToARGBRow_SSSE3()
1418 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I444ToARGBRow_SSSE3()
1419 "sub $0x8,%[width] \n" in I444ToARGBRow_SSSE3()
1420 "jg 1b \n" in I444ToARGBRow_SSSE3()
1440 "sub %[u_buf],%[v_buf] \n" in I422ToARGBRow_SSSE3()
1441 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToARGBRow_SSSE3()
1442 "pxor %%xmm4,%%xmm4 \n" in I422ToARGBRow_SSSE3()
1443 ".p2align 4 \n" in I422ToARGBRow_SSSE3()
1444 "1: \n" in I422ToARGBRow_SSSE3()
1447 "punpcklbw %%xmm1,%%xmm0 \n" in I422ToARGBRow_SSSE3()
1448 "punpcklbw %%xmm5,%%xmm2 \n" in I422ToARGBRow_SSSE3()
1449 "movdqa %%xmm0,%%xmm1 \n" in I422ToARGBRow_SSSE3()
1450 "punpcklwd %%xmm2,%%xmm0 \n" in I422ToARGBRow_SSSE3()
1451 "punpckhwd %%xmm2,%%xmm1 \n" in I422ToARGBRow_SSSE3()
1452 "movdqa %%xmm0,(%[argb_buf]) \n" in I422ToARGBRow_SSSE3()
1453 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in I422ToARGBRow_SSSE3()
1454 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToARGBRow_SSSE3()
1455 "sub $0x8,%[width] \n" in I422ToARGBRow_SSSE3()
1456 "jg 1b \n" in I422ToARGBRow_SSSE3()
1476 "sub %[u_buf],%[v_buf] \n" in I411ToARGBRow_SSSE3()
1477 "pcmpeqb %%xmm5,%%xmm5 \n" in I411ToARGBRow_SSSE3()
1478 "pxor %%xmm4,%%xmm4 \n" in I411ToARGBRow_SSSE3()
1479 ".p2align 4 \n" in I411ToARGBRow_SSSE3()
1480 "1: \n" in I411ToARGBRow_SSSE3()
1483 "punpcklbw %%xmm1,%%xmm0 \n" in I411ToARGBRow_SSSE3()
1484 "punpcklbw %%xmm5,%%xmm2 \n" in I411ToARGBRow_SSSE3()
1485 "movdqa %%xmm0,%%xmm1 \n" in I411ToARGBRow_SSSE3()
1486 "punpcklwd %%xmm2,%%xmm0 \n" in I411ToARGBRow_SSSE3()
1487 "punpckhwd %%xmm2,%%xmm1 \n" in I411ToARGBRow_SSSE3()
1488 "movdqa %%xmm0,(%[argb_buf]) \n" in I411ToARGBRow_SSSE3()
1489 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in I411ToARGBRow_SSSE3()
1490 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I411ToARGBRow_SSSE3()
1491 "sub $0x8,%[width] \n" in I411ToARGBRow_SSSE3()
1492 "jg 1b \n" in I411ToARGBRow_SSSE3()
1511 "pcmpeqb %%xmm5,%%xmm5 \n" in NV12ToARGBRow_SSSE3()
1512 "pxor %%xmm4,%%xmm4 \n" in NV12ToARGBRow_SSSE3()
1513 ".p2align 4 \n" in NV12ToARGBRow_SSSE3()
1514 "1: \n" in NV12ToARGBRow_SSSE3()
1517 "punpcklbw %%xmm1,%%xmm0 \n" in NV12ToARGBRow_SSSE3()
1518 "punpcklbw %%xmm5,%%xmm2 \n" in NV12ToARGBRow_SSSE3()
1519 "movdqa %%xmm0,%%xmm1 \n" in NV12ToARGBRow_SSSE3()
1520 "punpcklwd %%xmm2,%%xmm0 \n" in NV12ToARGBRow_SSSE3()
1521 "punpckhwd %%xmm2,%%xmm1 \n" in NV12ToARGBRow_SSSE3()
1522 "movdqa %%xmm0,(%[argb_buf]) \n" in NV12ToARGBRow_SSSE3()
1523 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in NV12ToARGBRow_SSSE3()
1524 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in NV12ToARGBRow_SSSE3()
1525 "sub $0x8,%[width] \n" in NV12ToARGBRow_SSSE3()
1526 "jg 1b \n" in NV12ToARGBRow_SSSE3()
1544 "pcmpeqb %%xmm5,%%xmm5 \n" in NV21ToARGBRow_SSSE3()
1545 "pxor %%xmm4,%%xmm4 \n" in NV21ToARGBRow_SSSE3()
1546 ".p2align 4 \n" in NV21ToARGBRow_SSSE3()
1547 "1: \n" in NV21ToARGBRow_SSSE3()
1550 "punpcklbw %%xmm1,%%xmm0 \n" in NV21ToARGBRow_SSSE3()
1551 "punpcklbw %%xmm5,%%xmm2 \n" in NV21ToARGBRow_SSSE3()
1552 "movdqa %%xmm0,%%xmm1 \n" in NV21ToARGBRow_SSSE3()
1553 "punpcklwd %%xmm2,%%xmm0 \n" in NV21ToARGBRow_SSSE3()
1554 "punpckhwd %%xmm2,%%xmm1 \n" in NV21ToARGBRow_SSSE3()
1555 "movdqa %%xmm0,(%[argb_buf]) \n" in NV21ToARGBRow_SSSE3()
1556 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in NV21ToARGBRow_SSSE3()
1557 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in NV21ToARGBRow_SSSE3()
1558 "sub $0x8,%[width] \n" in NV21ToARGBRow_SSSE3()
1559 "jg 1b \n" in NV21ToARGBRow_SSSE3()
1578 "sub %[u_buf],%[v_buf] \n" in I444ToARGBRow_Unaligned_SSSE3()
1579 "pcmpeqb %%xmm5,%%xmm5 \n" in I444ToARGBRow_Unaligned_SSSE3()
1580 "pxor %%xmm4,%%xmm4 \n" in I444ToARGBRow_Unaligned_SSSE3()
1581 ".p2align 4 \n" in I444ToARGBRow_Unaligned_SSSE3()
1582 "1: \n" in I444ToARGBRow_Unaligned_SSSE3()
1585 "punpcklbw %%xmm1,%%xmm0 \n" in I444ToARGBRow_Unaligned_SSSE3()
1586 "punpcklbw %%xmm5,%%xmm2 \n" in I444ToARGBRow_Unaligned_SSSE3()
1587 "movdqa %%xmm0,%%xmm1 \n" in I444ToARGBRow_Unaligned_SSSE3()
1588 "punpcklwd %%xmm2,%%xmm0 \n" in I444ToARGBRow_Unaligned_SSSE3()
1589 "punpckhwd %%xmm2,%%xmm1 \n" in I444ToARGBRow_Unaligned_SSSE3()
1590 "movdqu %%xmm0,(%[argb_buf]) \n" in I444ToARGBRow_Unaligned_SSSE3()
1591 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in I444ToARGBRow_Unaligned_SSSE3()
1592 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I444ToARGBRow_Unaligned_SSSE3()
1593 "sub $0x8,%[width] \n" in I444ToARGBRow_Unaligned_SSSE3()
1594 "jg 1b \n" in I444ToARGBRow_Unaligned_SSSE3()
1614 "sub %[u_buf],%[v_buf] \n" in I422ToARGBRow_Unaligned_SSSE3()
1615 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToARGBRow_Unaligned_SSSE3()
1616 "pxor %%xmm4,%%xmm4 \n" in I422ToARGBRow_Unaligned_SSSE3()
1617 ".p2align 4 \n" in I422ToARGBRow_Unaligned_SSSE3()
1618 "1: \n" in I422ToARGBRow_Unaligned_SSSE3()
1621 "punpcklbw %%xmm1,%%xmm0 \n" in I422ToARGBRow_Unaligned_SSSE3()
1622 "punpcklbw %%xmm5,%%xmm2 \n" in I422ToARGBRow_Unaligned_SSSE3()
1623 "movdqa %%xmm0,%%xmm1 \n" in I422ToARGBRow_Unaligned_SSSE3()
1624 "punpcklwd %%xmm2,%%xmm0 \n" in I422ToARGBRow_Unaligned_SSSE3()
1625 "punpckhwd %%xmm2,%%xmm1 \n" in I422ToARGBRow_Unaligned_SSSE3()
1626 "movdqu %%xmm0,(%[argb_buf]) \n" in I422ToARGBRow_Unaligned_SSSE3()
1627 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in I422ToARGBRow_Unaligned_SSSE3()
1628 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToARGBRow_Unaligned_SSSE3()
1629 "sub $0x8,%[width] \n" in I422ToARGBRow_Unaligned_SSSE3()
1630 "jg 1b \n" in I422ToARGBRow_Unaligned_SSSE3()
1650 "sub %[u_buf],%[v_buf] \n" in I411ToARGBRow_Unaligned_SSSE3()
1651 "pcmpeqb %%xmm5,%%xmm5 \n" in I411ToARGBRow_Unaligned_SSSE3()
1652 "pxor %%xmm4,%%xmm4 \n" in I411ToARGBRow_Unaligned_SSSE3()
1653 ".p2align 4 \n" in I411ToARGBRow_Unaligned_SSSE3()
1654 "1: \n" in I411ToARGBRow_Unaligned_SSSE3()
1657 "punpcklbw %%xmm1,%%xmm0 \n" in I411ToARGBRow_Unaligned_SSSE3()
1658 "punpcklbw %%xmm5,%%xmm2 \n" in I411ToARGBRow_Unaligned_SSSE3()
1659 "movdqa %%xmm0,%%xmm1 \n" in I411ToARGBRow_Unaligned_SSSE3()
1660 "punpcklwd %%xmm2,%%xmm0 \n" in I411ToARGBRow_Unaligned_SSSE3()
1661 "punpckhwd %%xmm2,%%xmm1 \n" in I411ToARGBRow_Unaligned_SSSE3()
1662 "movdqu %%xmm0,(%[argb_buf]) \n" in I411ToARGBRow_Unaligned_SSSE3()
1663 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in I411ToARGBRow_Unaligned_SSSE3()
1664 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I411ToARGBRow_Unaligned_SSSE3()
1665 "sub $0x8,%[width] \n" in I411ToARGBRow_Unaligned_SSSE3()
1666 "jg 1b \n" in I411ToARGBRow_Unaligned_SSSE3()
1685 "pcmpeqb %%xmm5,%%xmm5 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1686 "pxor %%xmm4,%%xmm4 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1687 ".p2align 4 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1688 "1: \n" in NV12ToARGBRow_Unaligned_SSSE3()
1691 "punpcklbw %%xmm1,%%xmm0 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1692 "punpcklbw %%xmm5,%%xmm2 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1693 "movdqa %%xmm0,%%xmm1 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1694 "punpcklwd %%xmm2,%%xmm0 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1695 "punpckhwd %%xmm2,%%xmm1 \n" in NV12ToARGBRow_Unaligned_SSSE3()
1696 "movdqu %%xmm0,(%[argb_buf]) \n" in NV12ToARGBRow_Unaligned_SSSE3()
1697 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in NV12ToARGBRow_Unaligned_SSSE3()
1698 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in NV12ToARGBRow_Unaligned_SSSE3()
1699 "sub $0x8,%[width] \n" in NV12ToARGBRow_Unaligned_SSSE3()
1700 "jg 1b \n" in NV12ToARGBRow_Unaligned_SSSE3()
1718 "pcmpeqb %%xmm5,%%xmm5 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1719 "pxor %%xmm4,%%xmm4 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1720 ".p2align 4 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1721 "1: \n" in NV21ToARGBRow_Unaligned_SSSE3()
1724 "punpcklbw %%xmm1,%%xmm0 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1725 "punpcklbw %%xmm5,%%xmm2 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1726 "movdqa %%xmm0,%%xmm1 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1727 "punpcklwd %%xmm2,%%xmm0 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1728 "punpckhwd %%xmm2,%%xmm1 \n" in NV21ToARGBRow_Unaligned_SSSE3()
1729 "movdqu %%xmm0,(%[argb_buf]) \n" in NV21ToARGBRow_Unaligned_SSSE3()
1730 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in NV21ToARGBRow_Unaligned_SSSE3()
1731 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in NV21ToARGBRow_Unaligned_SSSE3()
1732 "sub $0x8,%[width] \n" in NV21ToARGBRow_Unaligned_SSSE3()
1733 "jg 1b \n" in NV21ToARGBRow_Unaligned_SSSE3()
1752 "sub %[u_buf],%[v_buf] \n" in I422ToBGRARow_SSSE3()
1753 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToBGRARow_SSSE3()
1754 "pxor %%xmm4,%%xmm4 \n" in I422ToBGRARow_SSSE3()
1755 ".p2align 4 \n" in I422ToBGRARow_SSSE3()
1756 "1: \n" in I422ToBGRARow_SSSE3()
1759 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToBGRARow_SSSE3()
1760 "punpcklbw %%xmm0,%%xmm1 \n" in I422ToBGRARow_SSSE3()
1761 "punpcklbw %%xmm2,%%xmm5 \n" in I422ToBGRARow_SSSE3()
1762 "movdqa %%xmm5,%%xmm0 \n" in I422ToBGRARow_SSSE3()
1763 "punpcklwd %%xmm1,%%xmm5 \n" in I422ToBGRARow_SSSE3()
1764 "punpckhwd %%xmm1,%%xmm0 \n" in I422ToBGRARow_SSSE3()
1765 "movdqa %%xmm5,(%[argb_buf]) \n" in I422ToBGRARow_SSSE3()
1766 "movdqa %%xmm0,0x10(%[argb_buf]) \n" in I422ToBGRARow_SSSE3()
1767 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToBGRARow_SSSE3()
1768 "sub $0x8,%[width] \n" in I422ToBGRARow_SSSE3()
1769 "jg 1b \n" in I422ToBGRARow_SSSE3()
1789 "sub %[u_buf],%[v_buf] \n" in I422ToABGRRow_SSSE3()
1790 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToABGRRow_SSSE3()
1791 "pxor %%xmm4,%%xmm4 \n" in I422ToABGRRow_SSSE3()
1792 ".p2align 4 \n" in I422ToABGRRow_SSSE3()
1793 "1: \n" in I422ToABGRRow_SSSE3()
1796 "punpcklbw %%xmm1,%%xmm2 \n" in I422ToABGRRow_SSSE3()
1797 "punpcklbw %%xmm5,%%xmm0 \n" in I422ToABGRRow_SSSE3()
1798 "movdqa %%xmm2,%%xmm1 \n" in I422ToABGRRow_SSSE3()
1799 "punpcklwd %%xmm0,%%xmm2 \n" in I422ToABGRRow_SSSE3()
1800 "punpckhwd %%xmm0,%%xmm1 \n" in I422ToABGRRow_SSSE3()
1801 "movdqa %%xmm2,(%[argb_buf]) \n" in I422ToABGRRow_SSSE3()
1802 "movdqa %%xmm1,0x10(%[argb_buf]) \n" in I422ToABGRRow_SSSE3()
1803 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToABGRRow_SSSE3()
1804 "sub $0x8,%[width] \n" in I422ToABGRRow_SSSE3()
1805 "jg 1b \n" in I422ToABGRRow_SSSE3()
1825 "sub %[u_buf],%[v_buf] \n" in I422ToBGRARow_Unaligned_SSSE3()
1826 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToBGRARow_Unaligned_SSSE3()
1827 "pxor %%xmm4,%%xmm4 \n" in I422ToBGRARow_Unaligned_SSSE3()
1828 ".p2align 4 \n" in I422ToBGRARow_Unaligned_SSSE3()
1829 "1: \n" in I422ToBGRARow_Unaligned_SSSE3()
1832 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToBGRARow_Unaligned_SSSE3()
1833 "punpcklbw %%xmm0,%%xmm1 \n" in I422ToBGRARow_Unaligned_SSSE3()
1834 "punpcklbw %%xmm2,%%xmm5 \n" in I422ToBGRARow_Unaligned_SSSE3()
1835 "movdqa %%xmm5,%%xmm0 \n" in I422ToBGRARow_Unaligned_SSSE3()
1836 "punpcklwd %%xmm1,%%xmm5 \n" in I422ToBGRARow_Unaligned_SSSE3()
1837 "punpckhwd %%xmm1,%%xmm0 \n" in I422ToBGRARow_Unaligned_SSSE3()
1838 "movdqu %%xmm5,(%[argb_buf]) \n" in I422ToBGRARow_Unaligned_SSSE3()
1839 "movdqu %%xmm0,0x10(%[argb_buf]) \n" in I422ToBGRARow_Unaligned_SSSE3()
1840 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToBGRARow_Unaligned_SSSE3()
1841 "sub $0x8,%[width] \n" in I422ToBGRARow_Unaligned_SSSE3()
1842 "jg 1b \n" in I422ToBGRARow_Unaligned_SSSE3()
1862 "sub %[u_buf],%[v_buf] \n" in I422ToABGRRow_Unaligned_SSSE3()
1863 "pcmpeqb %%xmm5,%%xmm5 \n" in I422ToABGRRow_Unaligned_SSSE3()
1864 "pxor %%xmm4,%%xmm4 \n" in I422ToABGRRow_Unaligned_SSSE3()
1865 ".p2align 4 \n" in I422ToABGRRow_Unaligned_SSSE3()
1866 "1: \n" in I422ToABGRRow_Unaligned_SSSE3()
1869 "punpcklbw %%xmm1,%%xmm2 \n" in I422ToABGRRow_Unaligned_SSSE3()
1870 "punpcklbw %%xmm5,%%xmm0 \n" in I422ToABGRRow_Unaligned_SSSE3()
1871 "movdqa %%xmm2,%%xmm1 \n" in I422ToABGRRow_Unaligned_SSSE3()
1872 "punpcklwd %%xmm0,%%xmm2 \n" in I422ToABGRRow_Unaligned_SSSE3()
1873 "punpckhwd %%xmm0,%%xmm1 \n" in I422ToABGRRow_Unaligned_SSSE3()
1874 "movdqu %%xmm2,(%[argb_buf]) \n" in I422ToABGRRow_Unaligned_SSSE3()
1875 "movdqu %%xmm1,0x10(%[argb_buf]) \n" in I422ToABGRRow_Unaligned_SSSE3()
1876 "lea 0x20(%[argb_buf]),%[argb_buf] \n" in I422ToABGRRow_Unaligned_SSSE3()
1877 "sub $0x8,%[width] \n" in I422ToABGRRow_Unaligned_SSSE3()
1878 "jg 1b \n" in I422ToABGRRow_Unaligned_SSSE3()
1898 "pcmpeqb %%xmm4,%%xmm4 \n" in YToARGBRow_SSE2()
1899 "pslld $0x18,%%xmm4 \n" in YToARGBRow_SSE2()
1900 "mov $0x10001000,%%eax \n" in YToARGBRow_SSE2()
1901 "movd %%eax,%%xmm3 \n" in YToARGBRow_SSE2()
1902 "pshufd $0x0,%%xmm3,%%xmm3 \n" in YToARGBRow_SSE2()
1903 "mov $0x012a012a,%%eax \n" in YToARGBRow_SSE2()
1904 "movd %%eax,%%xmm2 \n" in YToARGBRow_SSE2()
1905 "pshufd $0x0,%%xmm2,%%xmm2 \n" in YToARGBRow_SSE2()
1906 ".p2align 4 \n" in YToARGBRow_SSE2()
1907 "1: \n" in YToARGBRow_SSE2()
1909 "movq (%0),%%xmm0 \n" in YToARGBRow_SSE2()
1910 "lea 0x8(%0),%0 \n" in YToARGBRow_SSE2()
1911 "punpcklbw %%xmm0,%%xmm0 \n" in YToARGBRow_SSE2()
1912 "psubusw %%xmm3,%%xmm0 \n" in YToARGBRow_SSE2()
1913 "pmulhuw %%xmm2,%%xmm0 \n" in YToARGBRow_SSE2()
1914 "packuswb %%xmm0,%%xmm0 \n" in YToARGBRow_SSE2()
1917 "punpcklbw %%xmm0,%%xmm0 \n" in YToARGBRow_SSE2()
1918 "movdqa %%xmm0,%%xmm1 \n" in YToARGBRow_SSE2()
1919 "punpcklwd %%xmm0,%%xmm0 \n" in YToARGBRow_SSE2()
1920 "punpckhwd %%xmm1,%%xmm1 \n" in YToARGBRow_SSE2()
1921 "por %%xmm4,%%xmm0 \n" in YToARGBRow_SSE2()
1922 "por %%xmm4,%%xmm1 \n" in YToARGBRow_SSE2()
1923 "movdqa %%xmm0,(%1) \n" in YToARGBRow_SSE2()
1924 "movdqa %%xmm1,16(%1) \n" in YToARGBRow_SSE2()
1925 "lea 32(%1),%1 \n" in YToARGBRow_SSE2()
1927 "sub $0x8,%2 \n" in YToARGBRow_SSE2()
1928 "jg 1b \n" in YToARGBRow_SSE2()
1950 "movdqa %3,%%xmm5 \n" in MirrorRow_SSSE3()
1951 "lea -0x10(%0),%0 \n" in MirrorRow_SSSE3()
1952 ".p2align 4 \n" in MirrorRow_SSSE3()
1953 "1: \n" in MirrorRow_SSSE3()
1954 "movdqa (%0,%2),%%xmm0 \n" in MirrorRow_SSSE3()
1955 "pshufb %%xmm5,%%xmm0 \n" in MirrorRow_SSSE3()
1956 "sub $0x10,%2 \n" in MirrorRow_SSSE3()
1957 "movdqa %%xmm0,(%1) \n" in MirrorRow_SSSE3()
1958 "lea 0x10(%1),%1 \n" in MirrorRow_SSSE3()
1959 "jg 1b \n" in MirrorRow_SSSE3()
1976 "lea -0x10(%0),%0 \n" in MirrorRow_SSE2()
1977 ".p2align 4 \n" in MirrorRow_SSE2()
1978 "1: \n" in MirrorRow_SSE2()
1979 "movdqu (%0,%2),%%xmm0 \n" in MirrorRow_SSE2()
1980 "movdqa %%xmm0,%%xmm1 \n" in MirrorRow_SSE2()
1981 "psllw $0x8,%%xmm0 \n" in MirrorRow_SSE2()
1982 "psrlw $0x8,%%xmm1 \n" in MirrorRow_SSE2()
1983 "por %%xmm1,%%xmm0 \n" in MirrorRow_SSE2()
1984 "pshuflw $0x1b,%%xmm0,%%xmm0 \n" in MirrorRow_SSE2()
1985 "pshufhw $0x1b,%%xmm0,%%xmm0 \n" in MirrorRow_SSE2()
1986 "pshufd $0x4e,%%xmm0,%%xmm0 \n" in MirrorRow_SSE2()
1987 "sub $0x10,%2 \n" in MirrorRow_SSE2()
1988 "movdqu %%xmm0,(%1) \n" in MirrorRow_SSE2()
1989 "lea 0x10(%1),%1 \n" in MirrorRow_SSE2()
1990 "jg 1b \n" in MirrorRow_SSE2()
2012 "movdqa %4,%%xmm1 \n" in MirrorRowUV_SSSE3()
2013 "lea -16(%0,%3,2),%0 \n" in MirrorRowUV_SSSE3()
2014 "sub %1,%2 \n" in MirrorRowUV_SSSE3()
2015 ".p2align 4 \n" in MirrorRowUV_SSSE3()
2016 "1: \n" in MirrorRowUV_SSSE3()
2017 "movdqa (%0),%%xmm0 \n" in MirrorRowUV_SSSE3()
2018 "lea -16(%0),%0 \n" in MirrorRowUV_SSSE3()
2019 "pshufb %%xmm1,%%xmm0 \n" in MirrorRowUV_SSSE3()
2020 "sub $8,%3 \n" in MirrorRowUV_SSSE3()
2021 "movlpd %%xmm0,(%1) \n" in MirrorRowUV_SSSE3()
2022 "movhpd %%xmm0,(%1,%2) \n" in MirrorRowUV_SSSE3()
2023 "lea 8(%1),%1 \n" in MirrorRowUV_SSSE3()
2024 "jg 1b \n" in MirrorRowUV_SSSE3()
2047 "movdqa %3,%%xmm5 \n" in ARGBMirrorRow_SSSE3()
2048 "lea -0x10(%0),%0 \n" in ARGBMirrorRow_SSSE3()
2049 ".p2align 4 \n" in ARGBMirrorRow_SSSE3()
2050 "1: \n" in ARGBMirrorRow_SSSE3()
2051 "movdqa (%0,%2,4),%%xmm0 \n" in ARGBMirrorRow_SSSE3()
2052 "pshufb %%xmm5,%%xmm0 \n" in ARGBMirrorRow_SSSE3()
2053 "sub $0x4,%2 \n" in ARGBMirrorRow_SSSE3()
2054 "movdqa %%xmm0,(%1) \n" in ARGBMirrorRow_SSSE3()
2055 "lea 0x10(%1),%1 \n" in ARGBMirrorRow_SSSE3()
2056 "jg 1b \n" in ARGBMirrorRow_SSSE3()
2072 "pcmpeqb %%xmm5,%%xmm5 \n" in SplitUV_SSE2()
2073 "psrlw $0x8,%%xmm5 \n" in SplitUV_SSE2()
2074 "sub %1,%2 \n" in SplitUV_SSE2()
2075 ".p2align 4 \n" in SplitUV_SSE2()
2076 "1: \n" in SplitUV_SSE2()
2077 "movdqa (%0),%%xmm0 \n" in SplitUV_SSE2()
2078 "movdqa 0x10(%0),%%xmm1 \n" in SplitUV_SSE2()
2079 "lea 0x20(%0),%0 \n" in SplitUV_SSE2()
2080 "movdqa %%xmm0,%%xmm2 \n" in SplitUV_SSE2()
2081 "movdqa %%xmm1,%%xmm3 \n" in SplitUV_SSE2()
2082 "pand %%xmm5,%%xmm0 \n" in SplitUV_SSE2()
2083 "pand %%xmm5,%%xmm1 \n" in SplitUV_SSE2()
2084 "packuswb %%xmm1,%%xmm0 \n" in SplitUV_SSE2()
2085 "psrlw $0x8,%%xmm2 \n" in SplitUV_SSE2()
2086 "psrlw $0x8,%%xmm3 \n" in SplitUV_SSE2()
2087 "packuswb %%xmm3,%%xmm2 \n" in SplitUV_SSE2()
2088 "movdqa %%xmm0,(%1) \n" in SplitUV_SSE2()
2089 "movdqa %%xmm2,(%1,%2) \n" in SplitUV_SSE2()
2090 "lea 0x10(%1),%1 \n" in SplitUV_SSE2()
2091 "sub $0x10,%3 \n" in SplitUV_SSE2()
2092 "jg 1b \n" in SplitUV_SSE2()
2109 "sub %0,%1 \n" in CopyRow_SSE2()
2110 ".p2align 4 \n" in CopyRow_SSE2()
2111 "1: \n" in CopyRow_SSE2()
2112 "movdqa (%0),%%xmm0 \n" in CopyRow_SSE2()
2113 "movdqa 0x10(%0),%%xmm1 \n" in CopyRow_SSE2()
2114 "movdqa %%xmm0,(%0,%1) \n" in CopyRow_SSE2()
2115 "movdqa %%xmm1,0x10(%0,%1) \n" in CopyRow_SSE2()
2116 "lea 0x20(%0),%0 \n" in CopyRow_SSE2()
2117 "sub $0x20,%2 \n" in CopyRow_SSE2()
2118 "jg 1b \n" in CopyRow_SSE2()
2135 "shr $0x2,%2 \n" in CopyRow_X86()
2136 "rep movsl \n" in CopyRow_X86()
2150 "shr $0x2,%1 \n" in SetRow8_X86()
2151 "rep stosl \n" in SetRow8_X86()
2164 "rep stosl \n" in SetRows32_X86()
2177 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToYRow_SSE2()
2178 "psrlw $0x8,%%xmm5 \n" in YUY2ToYRow_SSE2()
2179 ".p2align 4 \n" in YUY2ToYRow_SSE2()
2180 "1: \n" in YUY2ToYRow_SSE2()
2181 "movdqa (%0),%%xmm0 \n" in YUY2ToYRow_SSE2()
2182 "movdqa 0x10(%0),%%xmm1 \n" in YUY2ToYRow_SSE2()
2183 "lea 0x20(%0),%0 \n" in YUY2ToYRow_SSE2()
2184 "pand %%xmm5,%%xmm0 \n" in YUY2ToYRow_SSE2()
2185 "pand %%xmm5,%%xmm1 \n" in YUY2ToYRow_SSE2()
2186 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToYRow_SSE2()
2187 "movdqa %%xmm0,(%1) \n" in YUY2ToYRow_SSE2()
2188 "lea 0x10(%1),%1 \n" in YUY2ToYRow_SSE2()
2189 "sub $0x10,%2 \n" in YUY2ToYRow_SSE2()
2190 "jg 1b \n" in YUY2ToYRow_SSE2()
2205 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToUVRow_SSE2()
2206 "psrlw $0x8,%%xmm5 \n" in YUY2ToUVRow_SSE2()
2207 "sub %1,%2 \n" in YUY2ToUVRow_SSE2()
2208 ".p2align 4 \n" in YUY2ToUVRow_SSE2()
2209 "1: \n" in YUY2ToUVRow_SSE2()
2210 "movdqa (%0),%%xmm0 \n" in YUY2ToUVRow_SSE2()
2211 "movdqa 0x10(%0),%%xmm1 \n" in YUY2ToUVRow_SSE2()
2212 "movdqa (%0,%4,1),%%xmm2 \n" in YUY2ToUVRow_SSE2()
2213 "movdqa 0x10(%0,%4,1),%%xmm3 \n" in YUY2ToUVRow_SSE2()
2214 "lea 0x20(%0),%0 \n" in YUY2ToUVRow_SSE2()
2215 "pavgb %%xmm2,%%xmm0 \n" in YUY2ToUVRow_SSE2()
2216 "pavgb %%xmm3,%%xmm1 \n" in YUY2ToUVRow_SSE2()
2217 "psrlw $0x8,%%xmm0 \n" in YUY2ToUVRow_SSE2()
2218 "psrlw $0x8,%%xmm1 \n" in YUY2ToUVRow_SSE2()
2219 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToUVRow_SSE2()
2220 "movdqa %%xmm0,%%xmm1 \n" in YUY2ToUVRow_SSE2()
2221 "pand %%xmm5,%%xmm0 \n" in YUY2ToUVRow_SSE2()
2222 "packuswb %%xmm0,%%xmm0 \n" in YUY2ToUVRow_SSE2()
2223 "psrlw $0x8,%%xmm1 \n" in YUY2ToUVRow_SSE2()
2224 "packuswb %%xmm1,%%xmm1 \n" in YUY2ToUVRow_SSE2()
2225 "movq %%xmm0,(%1) \n" in YUY2ToUVRow_SSE2()
2226 "movq %%xmm1,(%1,%2) \n" in YUY2ToUVRow_SSE2()
2227 "lea 0x8(%1),%1 \n" in YUY2ToUVRow_SSE2()
2228 "sub $0x10,%3 \n" in YUY2ToUVRow_SSE2()
2229 "jg 1b \n" in YUY2ToUVRow_SSE2()
2245 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToUV422Row_SSE2()
2246 "psrlw $0x8,%%xmm5 \n" in YUY2ToUV422Row_SSE2()
2247 "sub %1,%2 \n" in YUY2ToUV422Row_SSE2()
2248 ".p2align 4 \n" in YUY2ToUV422Row_SSE2()
2249 "1: \n" in YUY2ToUV422Row_SSE2()
2250 "movdqa (%0),%%xmm0 \n" in YUY2ToUV422Row_SSE2()
2251 "movdqa 0x10(%0),%%xmm1 \n" in YUY2ToUV422Row_SSE2()
2252 "lea 0x20(%0),%0 \n" in YUY2ToUV422Row_SSE2()
2253 "psrlw $0x8,%%xmm0 \n" in YUY2ToUV422Row_SSE2()
2254 "psrlw $0x8,%%xmm1 \n" in YUY2ToUV422Row_SSE2()
2255 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToUV422Row_SSE2()
2256 "movdqa %%xmm0,%%xmm1 \n" in YUY2ToUV422Row_SSE2()
2257 "pand %%xmm5,%%xmm0 \n" in YUY2ToUV422Row_SSE2()
2258 "packuswb %%xmm0,%%xmm0 \n" in YUY2ToUV422Row_SSE2()
2259 "psrlw $0x8,%%xmm1 \n" in YUY2ToUV422Row_SSE2()
2260 "packuswb %%xmm1,%%xmm1 \n" in YUY2ToUV422Row_SSE2()
2261 "movq %%xmm0,(%1) \n" in YUY2ToUV422Row_SSE2()
2262 "movq %%xmm1,(%1,%2) \n" in YUY2ToUV422Row_SSE2()
2263 "lea 0x8(%1),%1 \n" in YUY2ToUV422Row_SSE2()
2264 "sub $0x10,%3 \n" in YUY2ToUV422Row_SSE2()
2265 "jg 1b \n" in YUY2ToUV422Row_SSE2()
2281 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToYRow_Unaligned_SSE2()
2282 "psrlw $0x8,%%xmm5 \n" in YUY2ToYRow_Unaligned_SSE2()
2283 ".p2align 4 \n" in YUY2ToYRow_Unaligned_SSE2()
2284 "1: \n" in YUY2ToYRow_Unaligned_SSE2()
2285 "movdqu (%0),%%xmm0 \n" in YUY2ToYRow_Unaligned_SSE2()
2286 "movdqu 0x10(%0),%%xmm1 \n" in YUY2ToYRow_Unaligned_SSE2()
2287 "lea 0x20(%0),%0 \n" in YUY2ToYRow_Unaligned_SSE2()
2288 "pand %%xmm5,%%xmm0 \n" in YUY2ToYRow_Unaligned_SSE2()
2289 "pand %%xmm5,%%xmm1 \n" in YUY2ToYRow_Unaligned_SSE2()
2290 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToYRow_Unaligned_SSE2()
2291 "sub $0x10,%2 \n" in YUY2ToYRow_Unaligned_SSE2()
2292 "movdqu %%xmm0,(%1) \n" in YUY2ToYRow_Unaligned_SSE2()
2293 "lea 0x10(%1),%1 \n" in YUY2ToYRow_Unaligned_SSE2()
2294 "jg 1b \n" in YUY2ToYRow_Unaligned_SSE2()
2310 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToUVRow_Unaligned_SSE2()
2311 "psrlw $0x8,%%xmm5 \n" in YUY2ToUVRow_Unaligned_SSE2()
2312 "sub %1,%2 \n" in YUY2ToUVRow_Unaligned_SSE2()
2313 ".p2align 4 \n" in YUY2ToUVRow_Unaligned_SSE2()
2314 "1: \n" in YUY2ToUVRow_Unaligned_SSE2()
2315 "movdqu (%0),%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2316 "movdqu 0x10(%0),%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2317 "movdqu (%0,%4,1),%%xmm2 \n" in YUY2ToUVRow_Unaligned_SSE2()
2318 "movdqu 0x10(%0,%4,1),%%xmm3 \n" in YUY2ToUVRow_Unaligned_SSE2()
2319 "lea 0x20(%0),%0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2320 "pavgb %%xmm2,%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2321 "pavgb %%xmm3,%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2322 "psrlw $0x8,%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2323 "psrlw $0x8,%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2324 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2325 "movdqa %%xmm0,%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2326 "pand %%xmm5,%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2327 "packuswb %%xmm0,%%xmm0 \n" in YUY2ToUVRow_Unaligned_SSE2()
2328 "psrlw $0x8,%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2329 "packuswb %%xmm1,%%xmm1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2330 "movq %%xmm0,(%1) \n" in YUY2ToUVRow_Unaligned_SSE2()
2331 "movq %%xmm1,(%1,%2) \n" in YUY2ToUVRow_Unaligned_SSE2()
2332 "lea 0x8(%1),%1 \n" in YUY2ToUVRow_Unaligned_SSE2()
2333 "sub $0x10,%3 \n" in YUY2ToUVRow_Unaligned_SSE2()
2334 "jg 1b \n" in YUY2ToUVRow_Unaligned_SSE2()
2350 "pcmpeqb %%xmm5,%%xmm5 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2351 "psrlw $0x8,%%xmm5 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2352 "sub %1,%2 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2353 ".p2align 4 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2354 "1: \n" in YUY2ToUV422Row_Unaligned_SSE2()
2355 "movdqu (%0),%%xmm0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2356 "movdqu 0x10(%0),%%xmm1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2357 "lea 0x20(%0),%0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2358 "psrlw $0x8,%%xmm0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2359 "psrlw $0x8,%%xmm1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2360 "packuswb %%xmm1,%%xmm0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2361 "movdqa %%xmm0,%%xmm1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2362 "pand %%xmm5,%%xmm0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2363 "packuswb %%xmm0,%%xmm0 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2364 "psrlw $0x8,%%xmm1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2365 "packuswb %%xmm1,%%xmm1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2366 "movq %%xmm0,(%1) \n" in YUY2ToUV422Row_Unaligned_SSE2()
2367 "movq %%xmm1,(%1,%2) \n" in YUY2ToUV422Row_Unaligned_SSE2()
2368 "lea 0x8(%1),%1 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2369 "sub $0x10,%3 \n" in YUY2ToUV422Row_Unaligned_SSE2()
2370 "jg 1b \n" in YUY2ToUV422Row_Unaligned_SSE2()
2385 ".p2align 4 \n" in UYVYToYRow_SSE2()
2386 "1: \n" in UYVYToYRow_SSE2()
2387 "movdqa (%0),%%xmm0 \n" in UYVYToYRow_SSE2()
2388 "movdqa 0x10(%0),%%xmm1 \n" in UYVYToYRow_SSE2()
2389 "lea 0x20(%0),%0 \n" in UYVYToYRow_SSE2()
2390 "psrlw $0x8,%%xmm0 \n" in UYVYToYRow_SSE2()
2391 "psrlw $0x8,%%xmm1 \n" in UYVYToYRow_SSE2()
2392 "packuswb %%xmm1,%%xmm0 \n" in UYVYToYRow_SSE2()
2393 "sub $0x10,%2 \n" in UYVYToYRow_SSE2()
2394 "movdqa %%xmm0,(%1) \n" in UYVYToYRow_SSE2()
2395 "lea 0x10(%1),%1 \n" in UYVYToYRow_SSE2()
2396 "jg 1b \n" in UYVYToYRow_SSE2()
2411 "pcmpeqb %%xmm5,%%xmm5 \n" in UYVYToUVRow_SSE2()
2412 "psrlw $0x8,%%xmm5 \n" in UYVYToUVRow_SSE2()
2413 "sub %1,%2 \n" in UYVYToUVRow_SSE2()
2414 ".p2align 4 \n" in UYVYToUVRow_SSE2()
2415 "1: \n" in UYVYToUVRow_SSE2()
2416 "movdqa (%0),%%xmm0 \n" in UYVYToUVRow_SSE2()
2417 "movdqa 0x10(%0),%%xmm1 \n" in UYVYToUVRow_SSE2()
2418 "movdqa (%0,%4,1),%%xmm2 \n" in UYVYToUVRow_SSE2()
2419 "movdqa 0x10(%0,%4,1),%%xmm3 \n" in UYVYToUVRow_SSE2()
2420 "lea 0x20(%0),%0 \n" in UYVYToUVRow_SSE2()
2421 "pavgb %%xmm2,%%xmm0 \n" in UYVYToUVRow_SSE2()
2422 "pavgb %%xmm3,%%xmm1 \n" in UYVYToUVRow_SSE2()
2423 "pand %%xmm5,%%xmm0 \n" in UYVYToUVRow_SSE2()
2424 "pand %%xmm5,%%xmm1 \n" in UYVYToUVRow_SSE2()
2425 "packuswb %%xmm1,%%xmm0 \n" in UYVYToUVRow_SSE2()
2426 "movdqa %%xmm0,%%xmm1 \n" in UYVYToUVRow_SSE2()
2427 "pand %%xmm5,%%xmm0 \n" in UYVYToUVRow_SSE2()
2428 "packuswb %%xmm0,%%xmm0 \n" in UYVYToUVRow_SSE2()
2429 "psrlw $0x8,%%xmm1 \n" in UYVYToUVRow_SSE2()
2430 "packuswb %%xmm1,%%xmm1 \n" in UYVYToUVRow_SSE2()
2431 "movq %%xmm0,(%1) \n" in UYVYToUVRow_SSE2()
2432 "movq %%xmm1,(%1,%2) \n" in UYVYToUVRow_SSE2()
2433 "lea 0x8(%1),%1 \n" in UYVYToUVRow_SSE2()
2434 "sub $0x10,%3 \n" in UYVYToUVRow_SSE2()
2435 "jg 1b \n" in UYVYToUVRow_SSE2()
2451 "pcmpeqb %%xmm5,%%xmm5 \n" in UYVYToUV422Row_SSE2()
2452 "psrlw $0x8,%%xmm5 \n" in UYVYToUV422Row_SSE2()
2453 "sub %1,%2 \n" in UYVYToUV422Row_SSE2()
2454 ".p2align 4 \n" in UYVYToUV422Row_SSE2()
2455 "1: \n" in UYVYToUV422Row_SSE2()
2456 "movdqa (%0),%%xmm0 \n" in UYVYToUV422Row_SSE2()
2457 "movdqa 0x10(%0),%%xmm1 \n" in UYVYToUV422Row_SSE2()
2458 "lea 0x20(%0),%0 \n" in UYVYToUV422Row_SSE2()
2459 "pand %%xmm5,%%xmm0 \n" in UYVYToUV422Row_SSE2()
2460 "pand %%xmm5,%%xmm1 \n" in UYVYToUV422Row_SSE2()
2461 "packuswb %%xmm1,%%xmm0 \n" in UYVYToUV422Row_SSE2()
2462 "movdqa %%xmm0,%%xmm1 \n" in UYVYToUV422Row_SSE2()
2463 "pand %%xmm5,%%xmm0 \n" in UYVYToUV422Row_SSE2()
2464 "packuswb %%xmm0,%%xmm0 \n" in UYVYToUV422Row_SSE2()
2465 "psrlw $0x8,%%xmm1 \n" in UYVYToUV422Row_SSE2()
2466 "packuswb %%xmm1,%%xmm1 \n" in UYVYToUV422Row_SSE2()
2467 "movq %%xmm0,(%1) \n" in UYVYToUV422Row_SSE2()
2468 "movq %%xmm1,(%1,%2) \n" in UYVYToUV422Row_SSE2()
2469 "lea 0x8(%1),%1 \n" in UYVYToUV422Row_SSE2()
2470 "sub $0x10,%3 \n" in UYVYToUV422Row_SSE2()
2471 "jg 1b \n" in UYVYToUV422Row_SSE2()
2487 ".p2align 4 \n" in UYVYToYRow_Unaligned_SSE2()
2488 "1: \n" in UYVYToYRow_Unaligned_SSE2()
2489 "movdqu (%0),%%xmm0 \n" in UYVYToYRow_Unaligned_SSE2()
2490 "movdqu 0x10(%0),%%xmm1 \n" in UYVYToYRow_Unaligned_SSE2()
2491 "lea 0x20(%0),%0 \n" in UYVYToYRow_Unaligned_SSE2()
2492 "psrlw $0x8,%%xmm0 \n" in UYVYToYRow_Unaligned_SSE2()
2493 "psrlw $0x8,%%xmm1 \n" in UYVYToYRow_Unaligned_SSE2()
2494 "packuswb %%xmm1,%%xmm0 \n" in UYVYToYRow_Unaligned_SSE2()
2495 "sub $0x10,%2 \n" in UYVYToYRow_Unaligned_SSE2()
2496 "movdqu %%xmm0,(%1) \n" in UYVYToYRow_Unaligned_SSE2()
2497 "lea 0x10(%1),%1 \n" in UYVYToYRow_Unaligned_SSE2()
2498 "jg 1b \n" in UYVYToYRow_Unaligned_SSE2()
2513 "pcmpeqb %%xmm5,%%xmm5 \n" in UYVYToUVRow_Unaligned_SSE2()
2514 "psrlw $0x8,%%xmm5 \n" in UYVYToUVRow_Unaligned_SSE2()
2515 "sub %1,%2 \n" in UYVYToUVRow_Unaligned_SSE2()
2516 ".p2align 4 \n" in UYVYToUVRow_Unaligned_SSE2()
2517 "1: \n" in UYVYToUVRow_Unaligned_SSE2()
2518 "movdqu (%0),%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2519 "movdqu 0x10(%0),%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2520 "movdqu (%0,%4,1),%%xmm2 \n" in UYVYToUVRow_Unaligned_SSE2()
2521 "movdqu 0x10(%0,%4,1),%%xmm3 \n" in UYVYToUVRow_Unaligned_SSE2()
2522 "lea 0x20(%0),%0 \n" in UYVYToUVRow_Unaligned_SSE2()
2523 "pavgb %%xmm2,%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2524 "pavgb %%xmm3,%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2525 "pand %%xmm5,%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2526 "pand %%xmm5,%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2527 "packuswb %%xmm1,%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2528 "movdqa %%xmm0,%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2529 "pand %%xmm5,%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2530 "packuswb %%xmm0,%%xmm0 \n" in UYVYToUVRow_Unaligned_SSE2()
2531 "psrlw $0x8,%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2532 "packuswb %%xmm1,%%xmm1 \n" in UYVYToUVRow_Unaligned_SSE2()
2533 "movq %%xmm0,(%1) \n" in UYVYToUVRow_Unaligned_SSE2()
2534 "movq %%xmm1,(%1,%2) \n" in UYVYToUVRow_Unaligned_SSE2()
2535 "lea 0x8(%1),%1 \n" in UYVYToUVRow_Unaligned_SSE2()
2536 "sub $0x10,%3 \n" in UYVYToUVRow_Unaligned_SSE2()
2537 "jg 1b \n" in UYVYToUVRow_Unaligned_SSE2()
2553 "pcmpeqb %%xmm5,%%xmm5 \n" in UYVYToUV422Row_Unaligned_SSE2()
2554 "psrlw $0x8,%%xmm5 \n" in UYVYToUV422Row_Unaligned_SSE2()
2555 "sub %1,%2 \n" in UYVYToUV422Row_Unaligned_SSE2()
2556 ".p2align 4 \n" in UYVYToUV422Row_Unaligned_SSE2()
2557 "1: \n" in UYVYToUV422Row_Unaligned_SSE2()
2558 "movdqu (%0),%%xmm0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2559 "movdqu 0x10(%0),%%xmm1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2560 "lea 0x20(%0),%0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2561 "pand %%xmm5,%%xmm0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2562 "pand %%xmm5,%%xmm1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2563 "packuswb %%xmm1,%%xmm0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2564 "movdqa %%xmm0,%%xmm1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2565 "pand %%xmm5,%%xmm0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2566 "packuswb %%xmm0,%%xmm0 \n" in UYVYToUV422Row_Unaligned_SSE2()
2567 "psrlw $0x8,%%xmm1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2568 "packuswb %%xmm1,%%xmm1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2569 "movq %%xmm0,(%1) \n" in UYVYToUV422Row_Unaligned_SSE2()
2570 "movq %%xmm1,(%1,%2) \n" in UYVYToUV422Row_Unaligned_SSE2()
2571 "lea 0x8(%1),%1 \n" in UYVYToUV422Row_Unaligned_SSE2()
2572 "sub $0x10,%3 \n" in UYVYToUV422Row_Unaligned_SSE2()
2573 "jg 1b \n" in UYVYToUV422Row_Unaligned_SSE2()
2592 "pcmpeqb %%xmm7,%%xmm7 \n" in ARGBBlendRow_SSE2()
2593 "psrlw $0xf,%%xmm7 \n" in ARGBBlendRow_SSE2()
2594 "pcmpeqb %%xmm6,%%xmm6 \n" in ARGBBlendRow_SSE2()
2595 "psrlw $0x8,%%xmm6 \n" in ARGBBlendRow_SSE2()
2596 "pcmpeqb %%xmm5,%%xmm5 \n" in ARGBBlendRow_SSE2()
2597 "psllw $0x8,%%xmm5 \n" in ARGBBlendRow_SSE2()
2598 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBBlendRow_SSE2()
2599 "pslld $0x18,%%xmm4 \n" in ARGBBlendRow_SSE2()
2600 "sub $0x1,%3 \n" in ARGBBlendRow_SSE2()
2601 "je 91f \n" in ARGBBlendRow_SSE2()
2602 "jl 99f \n" in ARGBBlendRow_SSE2()
2605 "10: \n" in ARGBBlendRow_SSE2()
2606 "test $0xf,%2 \n" in ARGBBlendRow_SSE2()
2607 "je 19f \n" in ARGBBlendRow_SSE2()
2608 "movd (%0),%%xmm3 \n" in ARGBBlendRow_SSE2()
2609 "lea 0x4(%0),%0 \n" in ARGBBlendRow_SSE2()
2610 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSE2()
2611 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSE2()
2612 "movd (%1),%%xmm2 \n" in ARGBBlendRow_SSE2()
2613 "psrlw $0x8,%%xmm3 \n" in ARGBBlendRow_SSE2()
2614 "pshufhw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2615 "pshuflw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2616 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSE2()
2617 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSE2()
2618 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSE2()
2619 "movd (%1),%%xmm1 \n" in ARGBBlendRow_SSE2()
2620 "lea 0x4(%1),%1 \n" in ARGBBlendRow_SSE2()
2621 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSE2()
2622 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSE2()
2623 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSE2()
2624 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSE2()
2625 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSE2()
2626 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSE2()
2627 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSE2()
2628 "sub $0x1,%3 \n" in ARGBBlendRow_SSE2()
2629 "movd %%xmm0,(%2) \n" in ARGBBlendRow_SSE2()
2630 "lea 0x4(%2),%2 \n" in ARGBBlendRow_SSE2()
2631 "jge 10b \n" in ARGBBlendRow_SSE2()
2633 "19: \n" in ARGBBlendRow_SSE2()
2634 "add $1-4,%3 \n" in ARGBBlendRow_SSE2()
2635 "jl 49f \n" in ARGBBlendRow_SSE2()
2638 ".p2align 2 \n" in ARGBBlendRow_SSE2()
2639 "41: \n" in ARGBBlendRow_SSE2()
2640 "movdqu (%0),%%xmm3 \n" in ARGBBlendRow_SSE2()
2641 "lea 0x10(%0),%0 \n" in ARGBBlendRow_SSE2()
2642 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSE2()
2643 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSE2()
2644 "movdqu (%1),%%xmm2 \n" in ARGBBlendRow_SSE2()
2645 "psrlw $0x8,%%xmm3 \n" in ARGBBlendRow_SSE2()
2646 "pshufhw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2647 "pshuflw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2648 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSE2()
2649 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSE2()
2650 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSE2()
2651 "movdqu (%1),%%xmm1 \n" in ARGBBlendRow_SSE2()
2652 "lea 0x10(%1),%1 \n" in ARGBBlendRow_SSE2()
2653 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSE2()
2654 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSE2()
2655 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSE2()
2656 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSE2()
2657 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSE2()
2658 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSE2()
2659 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSE2()
2660 "sub $0x4,%3 \n" in ARGBBlendRow_SSE2()
2661 "movdqa %%xmm0,(%2) \n" in ARGBBlendRow_SSE2()
2662 "lea 0x10(%2),%2 \n" in ARGBBlendRow_SSE2()
2663 "jge 41b \n" in ARGBBlendRow_SSE2()
2665 "49: \n" in ARGBBlendRow_SSE2()
2666 "add $0x3,%3 \n" in ARGBBlendRow_SSE2()
2667 "jl 99f \n" in ARGBBlendRow_SSE2()
2670 "91: \n" in ARGBBlendRow_SSE2()
2671 "movd (%0),%%xmm3 \n" in ARGBBlendRow_SSE2()
2672 "lea 0x4(%0),%0 \n" in ARGBBlendRow_SSE2()
2673 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSE2()
2674 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSE2()
2675 "movd (%1),%%xmm2 \n" in ARGBBlendRow_SSE2()
2676 "psrlw $0x8,%%xmm3 \n" in ARGBBlendRow_SSE2()
2677 "pshufhw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2678 "pshuflw $0xf5,%%xmm3,%%xmm3 \n" in ARGBBlendRow_SSE2()
2679 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSE2()
2680 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSE2()
2681 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSE2()
2682 "movd (%1),%%xmm1 \n" in ARGBBlendRow_SSE2()
2683 "lea 0x4(%1),%1 \n" in ARGBBlendRow_SSE2()
2684 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSE2()
2685 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSE2()
2686 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSE2()
2687 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSE2()
2688 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSE2()
2689 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSE2()
2690 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSE2()
2691 "sub $0x1,%3 \n" in ARGBBlendRow_SSE2()
2692 "movd %%xmm0,(%2) \n" in ARGBBlendRow_SSE2()
2693 "lea 0x4(%2),%2 \n" in ARGBBlendRow_SSE2()
2694 "jge 91b \n" in ARGBBlendRow_SSE2()
2695 "99: \n" in ARGBBlendRow_SSE2()
2729 "pcmpeqb %%xmm7,%%xmm7 \n" in ARGBBlendRow_SSSE3()
2730 "psrlw $0xf,%%xmm7 \n" in ARGBBlendRow_SSSE3()
2731 "pcmpeqb %%xmm6,%%xmm6 \n" in ARGBBlendRow_SSSE3()
2732 "psrlw $0x8,%%xmm6 \n" in ARGBBlendRow_SSSE3()
2733 "pcmpeqb %%xmm5,%%xmm5 \n" in ARGBBlendRow_SSSE3()
2734 "psllw $0x8,%%xmm5 \n" in ARGBBlendRow_SSSE3()
2735 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBBlendRow_SSSE3()
2736 "pslld $0x18,%%xmm4 \n" in ARGBBlendRow_SSSE3()
2737 "sub $0x1,%3 \n" in ARGBBlendRow_SSSE3()
2738 "je 91f \n" in ARGBBlendRow_SSSE3()
2739 "jl 99f \n" in ARGBBlendRow_SSSE3()
2742 "10: \n" in ARGBBlendRow_SSSE3()
2743 "test $0xf,%2 \n" in ARGBBlendRow_SSSE3()
2744 "je 19f \n" in ARGBBlendRow_SSSE3()
2745 "movd (%0),%%xmm3 \n" in ARGBBlendRow_SSSE3()
2746 "lea 0x4(%0),%0 \n" in ARGBBlendRow_SSSE3()
2747 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2748 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2749 "movd (%1),%%xmm2 \n" in ARGBBlendRow_SSSE3()
2750 "pshufb %4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2751 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2752 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2753 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2754 "movd (%1),%%xmm1 \n" in ARGBBlendRow_SSSE3()
2755 "lea 0x4(%1),%1 \n" in ARGBBlendRow_SSSE3()
2756 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2757 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2758 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2759 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2760 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2761 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2762 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2763 "sub $0x1,%3 \n" in ARGBBlendRow_SSSE3()
2764 "movd %%xmm0,(%2) \n" in ARGBBlendRow_SSSE3()
2765 "lea 0x4(%2),%2 \n" in ARGBBlendRow_SSSE3()
2766 "jge 10b \n" in ARGBBlendRow_SSSE3()
2768 "19: \n" in ARGBBlendRow_SSSE3()
2769 "add $1-4,%3 \n" in ARGBBlendRow_SSSE3()
2770 "jl 49f \n" in ARGBBlendRow_SSSE3()
2771 "test $0xf,%0 \n" in ARGBBlendRow_SSSE3()
2772 "jne 41f \n" in ARGBBlendRow_SSSE3()
2773 "test $0xf,%1 \n" in ARGBBlendRow_SSSE3()
2774 "jne 41f \n" in ARGBBlendRow_SSSE3()
2777 ".p2align 2 \n" in ARGBBlendRow_SSSE3()
2778 "40: \n" in ARGBBlendRow_SSSE3()
2779 "movdqa (%0),%%xmm3 \n" in ARGBBlendRow_SSSE3()
2780 "lea 0x10(%0),%0 \n" in ARGBBlendRow_SSSE3()
2781 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2782 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2783 "movdqa (%1),%%xmm2 \n" in ARGBBlendRow_SSSE3()
2784 "pshufb %4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2785 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2786 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2787 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2788 "movdqa (%1),%%xmm1 \n" in ARGBBlendRow_SSSE3()
2789 "lea 0x10(%1),%1 \n" in ARGBBlendRow_SSSE3()
2790 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2791 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2792 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2793 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2794 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2795 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2796 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2797 "sub $0x4,%3 \n" in ARGBBlendRow_SSSE3()
2798 "movdqa %%xmm0,(%2) \n" in ARGBBlendRow_SSSE3()
2799 "lea 0x10(%2),%2 \n" in ARGBBlendRow_SSSE3()
2800 "jge 40b \n" in ARGBBlendRow_SSSE3()
2801 "jmp 49f \n" in ARGBBlendRow_SSSE3()
2804 ".p2align 2 \n" in ARGBBlendRow_SSSE3()
2805 "41: \n" in ARGBBlendRow_SSSE3()
2806 "movdqu (%0),%%xmm3 \n" in ARGBBlendRow_SSSE3()
2807 "lea 0x10(%0),%0 \n" in ARGBBlendRow_SSSE3()
2808 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2809 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2810 "movdqu (%1),%%xmm2 \n" in ARGBBlendRow_SSSE3()
2811 "pshufb %4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2812 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2813 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2814 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2815 "movdqu (%1),%%xmm1 \n" in ARGBBlendRow_SSSE3()
2816 "lea 0x10(%1),%1 \n" in ARGBBlendRow_SSSE3()
2817 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2818 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2819 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2820 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2821 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2822 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2823 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2824 "sub $0x4,%3 \n" in ARGBBlendRow_SSSE3()
2825 "movdqa %%xmm0,(%2) \n" in ARGBBlendRow_SSSE3()
2826 "lea 0x10(%2),%2 \n" in ARGBBlendRow_SSSE3()
2827 "jge 41b \n" in ARGBBlendRow_SSSE3()
2829 "49: \n" in ARGBBlendRow_SSSE3()
2830 "add $0x3,%3 \n" in ARGBBlendRow_SSSE3()
2831 "jl 99f \n" in ARGBBlendRow_SSSE3()
2834 "91: \n" in ARGBBlendRow_SSSE3()
2835 "movd (%0),%%xmm3 \n" in ARGBBlendRow_SSSE3()
2836 "lea 0x4(%0),%0 \n" in ARGBBlendRow_SSSE3()
2837 "movdqa %%xmm3,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2838 "pxor %%xmm4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2839 "movd (%1),%%xmm2 \n" in ARGBBlendRow_SSSE3()
2840 "pshufb %4,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2841 "pand %%xmm6,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2842 "paddw %%xmm7,%%xmm3 \n" in ARGBBlendRow_SSSE3()
2843 "pmullw %%xmm3,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2844 "movd (%1),%%xmm1 \n" in ARGBBlendRow_SSSE3()
2845 "lea 0x4(%1),%1 \n" in ARGBBlendRow_SSSE3()
2846 "psrlw $0x8,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2847 "por %%xmm4,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2848 "pmullw %%xmm3,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2849 "psrlw $0x8,%%xmm2 \n" in ARGBBlendRow_SSSE3()
2850 "paddusb %%xmm2,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2851 "pand %%xmm5,%%xmm1 \n" in ARGBBlendRow_SSSE3()
2852 "paddusb %%xmm1,%%xmm0 \n" in ARGBBlendRow_SSSE3()
2853 "sub $0x1,%3 \n" in ARGBBlendRow_SSSE3()
2854 "movd %%xmm0,(%2) \n" in ARGBBlendRow_SSSE3()
2855 "lea 0x4(%2),%2 \n" in ARGBBlendRow_SSSE3()
2856 "jge 91b \n" in ARGBBlendRow_SSSE3()
2857 "99: \n" in ARGBBlendRow_SSSE3()
2876 "sub %0,%1 \n" in ARGBAttenuateRow_SSE2()
2877 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBAttenuateRow_SSE2()
2878 "pslld $0x18,%%xmm4 \n" in ARGBAttenuateRow_SSE2()
2879 "pcmpeqb %%xmm5,%%xmm5 \n" in ARGBAttenuateRow_SSE2()
2880 "psrld $0x8,%%xmm5 \n" in ARGBAttenuateRow_SSE2()
2883 ".p2align 4 \n" in ARGBAttenuateRow_SSE2()
2884 "1: \n" in ARGBAttenuateRow_SSE2()
2885 "movdqa (%0),%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2886 "punpcklbw %%xmm0,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2887 "pshufhw $0xff,%%xmm0,%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2888 "pshuflw $0xff,%%xmm2,%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2889 "pmulhuw %%xmm2,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2890 "movdqa (%0),%%xmm1 \n" in ARGBAttenuateRow_SSE2()
2891 "punpckhbw %%xmm1,%%xmm1 \n" in ARGBAttenuateRow_SSE2()
2892 "pshufhw $0xff,%%xmm1,%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2893 "pshuflw $0xff,%%xmm2,%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2894 "pmulhuw %%xmm2,%%xmm1 \n" in ARGBAttenuateRow_SSE2()
2895 "movdqa (%0),%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2896 "psrlw $0x8,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2897 "pand %%xmm4,%%xmm2 \n" in ARGBAttenuateRow_SSE2()
2898 "psrlw $0x8,%%xmm1 \n" in ARGBAttenuateRow_SSE2()
2899 "packuswb %%xmm1,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2900 "pand %%xmm5,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2901 "por %%xmm2,%%xmm0 \n" in ARGBAttenuateRow_SSE2()
2902 "sub $0x4,%2 \n" in ARGBAttenuateRow_SSE2()
2903 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBAttenuateRow_SSE2()
2904 "lea 0x10(%0),%0 \n" in ARGBAttenuateRow_SSE2()
2905 "jg 1b \n" in ARGBAttenuateRow_SSE2()
2931 "sub %0,%1 \n" in ARGBAttenuateRow_SSSE3()
2932 "pcmpeqb %%xmm3,%%xmm3 \n" in ARGBAttenuateRow_SSSE3()
2933 "pslld $0x18,%%xmm3 \n" in ARGBAttenuateRow_SSSE3()
2934 "movdqa %3,%%xmm4 \n" in ARGBAttenuateRow_SSSE3()
2935 "movdqa %4,%%xmm5 \n" in ARGBAttenuateRow_SSSE3()
2938 ".p2align 4 \n" in ARGBAttenuateRow_SSSE3()
2939 "1: \n" in ARGBAttenuateRow_SSSE3()
2940 "movdqa (%0),%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2941 "pshufb %%xmm4,%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2942 "movdqa (%0),%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2943 "punpcklbw %%xmm1,%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2944 "pmulhuw %%xmm1,%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2945 "movdqa (%0),%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2946 "pshufb %%xmm5,%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2947 "movdqa (%0),%%xmm2 \n" in ARGBAttenuateRow_SSSE3()
2948 "punpckhbw %%xmm2,%%xmm2 \n" in ARGBAttenuateRow_SSSE3()
2949 "pmulhuw %%xmm2,%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2950 "movdqa (%0),%%xmm2 \n" in ARGBAttenuateRow_SSSE3()
2951 "pand %%xmm3,%%xmm2 \n" in ARGBAttenuateRow_SSSE3()
2952 "psrlw $0x8,%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2953 "psrlw $0x8,%%xmm1 \n" in ARGBAttenuateRow_SSSE3()
2954 "packuswb %%xmm1,%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2955 "por %%xmm2,%%xmm0 \n" in ARGBAttenuateRow_SSSE3()
2956 "sub $0x4,%2 \n" in ARGBAttenuateRow_SSSE3()
2957 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBAttenuateRow_SSSE3()
2958 "lea 0x10(%0),%0 \n" in ARGBAttenuateRow_SSSE3()
2959 "jg 1b \n" in ARGBAttenuateRow_SSSE3()
2980 "sub %0,%1 \n" in ARGBUnattenuateRow_SSE2()
2981 "pcmpeqb %%xmm4,%%xmm4 \n" in ARGBUnattenuateRow_SSE2()
2982 "pslld $0x18,%%xmm4 \n" in ARGBUnattenuateRow_SSE2()
2985 ".p2align 4 \n" in ARGBUnattenuateRow_SSE2()
2986 "1: \n" in ARGBUnattenuateRow_SSE2()
2987 "movdqa (%0),%%xmm0 \n" in ARGBUnattenuateRow_SSE2()
2988 "movzb 0x3(%0),%3 \n" in ARGBUnattenuateRow_SSE2()
2989 "punpcklbw %%xmm0,%%xmm0 \n" in ARGBUnattenuateRow_SSE2()
2990 "movd 0x0(%4,%3,4),%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
2991 "movzb 0x7(%0),%3 \n" in ARGBUnattenuateRow_SSE2()
2992 "movd 0x0(%4,%3,4),%%xmm3 \n" in ARGBUnattenuateRow_SSE2()
2993 "pshuflw $0xc0,%%xmm2,%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
2994 "pshuflw $0xc0,%%xmm3,%%xmm3 \n" in ARGBUnattenuateRow_SSE2()
2995 "movlhps %%xmm3,%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
2996 "pmulhuw %%xmm2,%%xmm0 \n" in ARGBUnattenuateRow_SSE2()
2997 "movdqa (%0),%%xmm1 \n" in ARGBUnattenuateRow_SSE2()
2998 "movzb 0xb(%0),%3 \n" in ARGBUnattenuateRow_SSE2()
2999 "punpckhbw %%xmm1,%%xmm1 \n" in ARGBUnattenuateRow_SSE2()
3000 "movd 0x0(%4,%3,4),%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
3001 "movzb 0xf(%0),%3 \n" in ARGBUnattenuateRow_SSE2()
3002 "movd 0x0(%4,%3,4),%%xmm3 \n" in ARGBUnattenuateRow_SSE2()
3003 "pshuflw $0xc0,%%xmm2,%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
3004 "pshuflw $0xc0,%%xmm3,%%xmm3 \n" in ARGBUnattenuateRow_SSE2()
3005 "movlhps %%xmm3,%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
3006 "pmulhuw %%xmm2,%%xmm1 \n" in ARGBUnattenuateRow_SSE2()
3007 "movdqa (%0),%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
3008 "pand %%xmm4,%%xmm2 \n" in ARGBUnattenuateRow_SSE2()
3009 "packuswb %%xmm1,%%xmm0 \n" in ARGBUnattenuateRow_SSE2()
3010 "por %%xmm2,%%xmm0 \n" in ARGBUnattenuateRow_SSE2()
3011 "sub $0x4,%2 \n" in ARGBUnattenuateRow_SSE2()
3012 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBUnattenuateRow_SSE2()
3013 "lea 0x10(%0),%0 \n" in ARGBUnattenuateRow_SSE2()
3014 "jg 1b \n" in ARGBUnattenuateRow_SSE2()
3037 "movdqa %3,%%xmm4 \n" in ARGBGrayRow_SSSE3()
3038 "sub %0,%1 \n" in ARGBGrayRow_SSSE3()
3041 ".p2align 4 \n" in ARGBGrayRow_SSSE3()
3042 "1: \n" in ARGBGrayRow_SSSE3()
3043 "movdqa (%0),%%xmm0 \n" in ARGBGrayRow_SSSE3()
3044 "movdqa 0x10(%0),%%xmm1 \n" in ARGBGrayRow_SSSE3()
3045 "pmaddubsw %%xmm4,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3046 "pmaddubsw %%xmm4,%%xmm1 \n" in ARGBGrayRow_SSSE3()
3047 "phaddw %%xmm1,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3048 "psrlw $0x7,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3049 "packuswb %%xmm0,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3050 "movdqa (%0),%%xmm2 \n" in ARGBGrayRow_SSSE3()
3051 "movdqa 0x10(%0),%%xmm3 \n" in ARGBGrayRow_SSSE3()
3052 "psrld $0x18,%%xmm2 \n" in ARGBGrayRow_SSSE3()
3053 "psrld $0x18,%%xmm3 \n" in ARGBGrayRow_SSSE3()
3054 "packuswb %%xmm3,%%xmm2 \n" in ARGBGrayRow_SSSE3()
3055 "packuswb %%xmm2,%%xmm2 \n" in ARGBGrayRow_SSSE3()
3056 "movdqa %%xmm0,%%xmm3 \n" in ARGBGrayRow_SSSE3()
3057 "punpcklbw %%xmm0,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3058 "punpcklbw %%xmm2,%%xmm3 \n" in ARGBGrayRow_SSSE3()
3059 "movdqa %%xmm0,%%xmm1 \n" in ARGBGrayRow_SSSE3()
3060 "punpcklwd %%xmm3,%%xmm0 \n" in ARGBGrayRow_SSSE3()
3061 "punpckhwd %%xmm3,%%xmm1 \n" in ARGBGrayRow_SSSE3()
3062 "sub $0x8,%2 \n" in ARGBGrayRow_SSSE3()
3063 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBGrayRow_SSSE3()
3064 "movdqa %%xmm1,0x10(%0,%1,1) \n" in ARGBGrayRow_SSSE3()
3065 "lea 0x20(%0),%0 \n" in ARGBGrayRow_SSSE3()
3066 "jg 1b \n" in ARGBGrayRow_SSSE3()
3099 "movdqa %2,%%xmm2 \n" in ARGBSepiaRow_SSSE3()
3100 "movdqa %3,%%xmm3 \n" in ARGBSepiaRow_SSSE3()
3101 "movdqa %4,%%xmm4 \n" in ARGBSepiaRow_SSSE3()
3104 ".p2align 4 \n" in ARGBSepiaRow_SSSE3()
3105 "1: \n" in ARGBSepiaRow_SSSE3()
3106 "movdqa (%0),%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3107 "movdqa 0x10(%0),%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3108 "pmaddubsw %%xmm2,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3109 "pmaddubsw %%xmm2,%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3110 "phaddw %%xmm6,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3111 "psrlw $0x7,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3112 "packuswb %%xmm0,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3113 "movdqa (%0),%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3114 "movdqa 0x10(%0),%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3115 "pmaddubsw %%xmm3,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3116 "pmaddubsw %%xmm3,%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3117 "phaddw %%xmm1,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3118 "psrlw $0x7,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3119 "packuswb %%xmm5,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3120 "punpcklbw %%xmm5,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3121 "movdqa (%0),%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3122 "movdqa 0x10(%0),%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3123 "pmaddubsw %%xmm4,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3124 "pmaddubsw %%xmm4,%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3125 "phaddw %%xmm1,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3126 "psrlw $0x7,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3127 "packuswb %%xmm5,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3128 "movdqa (%0),%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3129 "movdqa 0x10(%0),%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3130 "psrld $0x18,%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3131 "psrld $0x18,%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3132 "packuswb %%xmm1,%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3133 "packuswb %%xmm6,%%xmm6 \n" in ARGBSepiaRow_SSSE3()
3134 "punpcklbw %%xmm6,%%xmm5 \n" in ARGBSepiaRow_SSSE3()
3135 "movdqa %%xmm0,%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3136 "punpcklwd %%xmm5,%%xmm0 \n" in ARGBSepiaRow_SSSE3()
3137 "punpckhwd %%xmm5,%%xmm1 \n" in ARGBSepiaRow_SSSE3()
3138 "sub $0x8,%1 \n" in ARGBSepiaRow_SSSE3()
3139 "movdqa %%xmm0,(%0) \n" in ARGBSepiaRow_SSSE3()
3140 "movdqa %%xmm1,0x10(%0) \n" in ARGBSepiaRow_SSSE3()
3141 "lea 0x20(%0),%0 \n" in ARGBSepiaRow_SSSE3()
3142 "jg 1b \n" in ARGBSepiaRow_SSSE3()
3162 "movd (%2),%%xmm2 \n" in ARGBColorMatrixRow_SSSE3()
3163 "movd 0x4(%2),%%xmm3 \n" in ARGBColorMatrixRow_SSSE3()
3164 "movd 0x8(%2),%%xmm4 \n" in ARGBColorMatrixRow_SSSE3()
3165 "pshufd $0x0,%%xmm2,%%xmm2 \n" in ARGBColorMatrixRow_SSSE3()
3166 "pshufd $0x0,%%xmm3,%%xmm3 \n" in ARGBColorMatrixRow_SSSE3()
3167 "pshufd $0x0,%%xmm4,%%xmm4 \n" in ARGBColorMatrixRow_SSSE3()
3170 ".p2align 4 \n" in ARGBColorMatrixRow_SSSE3()
3171 "1: \n" in ARGBColorMatrixRow_SSSE3()
3172 "movdqa (%0),%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3173 "movdqa 0x10(%0),%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3174 "pmaddubsw %%xmm2,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3175 "pmaddubsw %%xmm2,%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3176 "movdqa (%0),%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3177 "movdqa 0x10(%0),%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3178 "pmaddubsw %%xmm3,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3179 "pmaddubsw %%xmm3,%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3180 "phaddsw %%xmm6,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3181 "phaddsw %%xmm1,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3182 "psraw $0x7,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3183 "psraw $0x7,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3184 "packuswb %%xmm0,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3185 "packuswb %%xmm5,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3186 "punpcklbw %%xmm5,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3187 "movdqa (%0),%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3188 "movdqa 0x10(%0),%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3189 "pmaddubsw %%xmm4,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3190 "pmaddubsw %%xmm4,%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3191 "phaddsw %%xmm1,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3192 "psraw $0x7,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3193 "packuswb %%xmm5,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3194 "movdqa (%0),%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3195 "movdqa 0x10(%0),%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3196 "psrld $0x18,%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3197 "psrld $0x18,%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3198 "packuswb %%xmm1,%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3199 "packuswb %%xmm6,%%xmm6 \n" in ARGBColorMatrixRow_SSSE3()
3200 "movdqa %%xmm0,%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3201 "punpcklbw %%xmm6,%%xmm5 \n" in ARGBColorMatrixRow_SSSE3()
3202 "punpcklwd %%xmm5,%%xmm0 \n" in ARGBColorMatrixRow_SSSE3()
3203 "punpckhwd %%xmm5,%%xmm1 \n" in ARGBColorMatrixRow_SSSE3()
3204 "sub $0x8,%1 \n" in ARGBColorMatrixRow_SSSE3()
3205 "movdqa %%xmm0,(%0) \n" in ARGBColorMatrixRow_SSSE3()
3206 "movdqa %%xmm1,0x10(%0) \n" in ARGBColorMatrixRow_SSSE3()
3207 "lea 0x20(%0),%0 \n" in ARGBColorMatrixRow_SSSE3()
3208 "jg 1b \n" in ARGBColorMatrixRow_SSSE3()
3226 "movd %2,%%xmm2 \n" in ARGBQuantizeRow_SSE2()
3227 "movd %3,%%xmm3 \n" in ARGBQuantizeRow_SSE2()
3228 "movd %4,%%xmm4 \n" in ARGBQuantizeRow_SSE2()
3229 "pshuflw $0x40,%%xmm2,%%xmm2 \n" in ARGBQuantizeRow_SSE2()
3230 "pshufd $0x44,%%xmm2,%%xmm2 \n" in ARGBQuantizeRow_SSE2()
3231 "pshuflw $0x40,%%xmm3,%%xmm3 \n" in ARGBQuantizeRow_SSE2()
3232 "pshufd $0x44,%%xmm3,%%xmm3 \n" in ARGBQuantizeRow_SSE2()
3233 "pshuflw $0x40,%%xmm4,%%xmm4 \n" in ARGBQuantizeRow_SSE2()
3234 "pshufd $0x44,%%xmm4,%%xmm4 \n" in ARGBQuantizeRow_SSE2()
3235 "pxor %%xmm5,%%xmm5 \n" in ARGBQuantizeRow_SSE2()
3236 "pcmpeqb %%xmm6,%%xmm6 \n" in ARGBQuantizeRow_SSE2()
3237 "pslld $0x18,%%xmm6 \n" in ARGBQuantizeRow_SSE2()
3240 ".p2align 2 \n" in ARGBQuantizeRow_SSE2()
3241 "1: \n" in ARGBQuantizeRow_SSE2()
3242 "movdqa (%0),%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3243 "punpcklbw %%xmm5,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3244 "pmulhuw %%xmm2,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3245 "movdqa (%0),%%xmm1 \n" in ARGBQuantizeRow_SSE2()
3246 "punpckhbw %%xmm5,%%xmm1 \n" in ARGBQuantizeRow_SSE2()
3247 "pmulhuw %%xmm2,%%xmm1 \n" in ARGBQuantizeRow_SSE2()
3248 "pmullw %%xmm3,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3249 "movdqa (%0),%%xmm7 \n" in ARGBQuantizeRow_SSE2()
3250 "pmullw %%xmm3,%%xmm1 \n" in ARGBQuantizeRow_SSE2()
3251 "pand %%xmm6,%%xmm7 \n" in ARGBQuantizeRow_SSE2()
3252 "paddw %%xmm4,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3253 "paddw %%xmm4,%%xmm1 \n" in ARGBQuantizeRow_SSE2()
3254 "packuswb %%xmm1,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3255 "por %%xmm7,%%xmm0 \n" in ARGBQuantizeRow_SSE2()
3256 "sub $0x4,%1 \n" in ARGBQuantizeRow_SSE2()
3257 "movdqa %%xmm0,(%0) \n" in ARGBQuantizeRow_SSE2()
3258 "lea 0x10(%0),%0 \n" in ARGBQuantizeRow_SSE2()
3259 "jg 1b \n" in ARGBQuantizeRow_SSE2()
3279 "sub %1,%2 \n" in ComputeCumulativeSumRow_SSE2()
3280 "pxor %%xmm0,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3281 "pxor %%xmm1,%%xmm1 \n" in ComputeCumulativeSumRow_SSE2()
3282 "sub $0x4,%3 \n" in ComputeCumulativeSumRow_SSE2()
3283 "jl 49f \n" in ComputeCumulativeSumRow_SSE2()
3284 "test $0xf,%1 \n" in ComputeCumulativeSumRow_SSE2()
3285 "jne 49f \n" in ComputeCumulativeSumRow_SSE2()
3287 // 4 pixel loop \n" in ComputeCumulativeSumRow_SSE2()
3288 ".p2align 2 \n" in ComputeCumulativeSumRow_SSE2()
3289 "40: \n" in ComputeCumulativeSumRow_SSE2()
3290 "movdqu (%0),%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3291 "lea 0x10(%0),%0 \n" in ComputeCumulativeSumRow_SSE2()
3292 "movdqa %%xmm2,%%xmm4 \n" in ComputeCumulativeSumRow_SSE2()
3293 "punpcklbw %%xmm1,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3294 "movdqa %%xmm2,%%xmm3 \n" in ComputeCumulativeSumRow_SSE2()
3295 "punpcklwd %%xmm1,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3296 "punpckhwd %%xmm1,%%xmm3 \n" in ComputeCumulativeSumRow_SSE2()
3297 "punpckhbw %%xmm1,%%xmm4 \n" in ComputeCumulativeSumRow_SSE2()
3298 "movdqa %%xmm4,%%xmm5 \n" in ComputeCumulativeSumRow_SSE2()
3299 "punpcklwd %%xmm1,%%xmm4 \n" in ComputeCumulativeSumRow_SSE2()
3300 "punpckhwd %%xmm1,%%xmm5 \n" in ComputeCumulativeSumRow_SSE2()
3301 "paddd %%xmm2,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3302 "movdqa (%1,%2,1),%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3303 "paddd %%xmm0,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3304 "paddd %%xmm3,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3305 "movdqa 0x10(%1,%2,1),%%xmm3 \n" in ComputeCumulativeSumRow_SSE2()
3306 "paddd %%xmm0,%%xmm3 \n" in ComputeCumulativeSumRow_SSE2()
3307 "paddd %%xmm4,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3308 "movdqa 0x20(%1,%2,1),%%xmm4 \n" in ComputeCumulativeSumRow_SSE2()
3309 "paddd %%xmm0,%%xmm4 \n" in ComputeCumulativeSumRow_SSE2()
3310 "paddd %%xmm5,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3311 "movdqa 0x30(%1,%2,1),%%xmm5 \n" in ComputeCumulativeSumRow_SSE2()
3312 "paddd %%xmm0,%%xmm5 \n" in ComputeCumulativeSumRow_SSE2()
3313 "movdqa %%xmm2,(%1) \n" in ComputeCumulativeSumRow_SSE2()
3314 "movdqa %%xmm3,0x10(%1) \n" in ComputeCumulativeSumRow_SSE2()
3315 "movdqa %%xmm4,0x20(%1) \n" in ComputeCumulativeSumRow_SSE2()
3316 "movdqa %%xmm5,0x30(%1) \n" in ComputeCumulativeSumRow_SSE2()
3317 "lea 0x40(%1),%1 \n" in ComputeCumulativeSumRow_SSE2()
3318 "sub $0x4,%3 \n" in ComputeCumulativeSumRow_SSE2()
3319 "jge 40b \n" in ComputeCumulativeSumRow_SSE2()
3321 "49: \n" in ComputeCumulativeSumRow_SSE2()
3322 "add $0x3,%3 \n" in ComputeCumulativeSumRow_SSE2()
3323 "jl 19f \n" in ComputeCumulativeSumRow_SSE2()
3325 // 1 pixel loop \n" in ComputeCumulativeSumRow_SSE2()
3326 ".p2align 2 \n" in ComputeCumulativeSumRow_SSE2()
3327 "10: \n" in ComputeCumulativeSumRow_SSE2()
3328 "movd (%0),%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3329 "lea 0x4(%0),%0 \n" in ComputeCumulativeSumRow_SSE2()
3330 "punpcklbw %%xmm1,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3331 "punpcklwd %%xmm1,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3332 "paddd %%xmm2,%%xmm0 \n" in ComputeCumulativeSumRow_SSE2()
3333 "movdqu (%1,%2,1),%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3334 "paddd %%xmm0,%%xmm2 \n" in ComputeCumulativeSumRow_SSE2()
3335 "movdqu %%xmm2,(%1) \n" in ComputeCumulativeSumRow_SSE2()
3336 "lea 0x10(%1),%1 \n" in ComputeCumulativeSumRow_SSE2()
3337 "sub $0x1,%3 \n" in ComputeCumulativeSumRow_SSE2()
3338 "jge 10b \n" in ComputeCumulativeSumRow_SSE2()
3340 "19: \n" in ComputeCumulativeSumRow_SSE2()
3358 "movd %5,%%xmm4 \n" in CumulativeSumToAverage_SSE2()
3359 "cvtdq2ps %%xmm4,%%xmm4 \n" in CumulativeSumToAverage_SSE2()
3360 "rcpss %%xmm4,%%xmm4 \n" in CumulativeSumToAverage_SSE2()
3361 "pshufd $0x0,%%xmm4,%%xmm4 \n" in CumulativeSumToAverage_SSE2()
3362 "sub $0x4,%3 \n" in CumulativeSumToAverage_SSE2()
3363 "jl 49f \n" in CumulativeSumToAverage_SSE2()
3365 // 4 pixel loop \n" in CumulativeSumToAverage_SSE2()
3366 ".p2align 2 \n" in CumulativeSumToAverage_SSE2()
3367 "40: \n" in CumulativeSumToAverage_SSE2()
3368 "movdqa (%0),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3369 "movdqa 0x10(%0),%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3370 "movdqa 0x20(%0),%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3371 "movdqa 0x30(%0),%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3372 "psubd (%0,%4,4),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3373 "psubd 0x10(%0,%4,4),%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3374 "psubd 0x20(%0,%4,4),%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3375 "psubd 0x30(%0,%4,4),%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3376 "lea 0x40(%0),%0 \n" in CumulativeSumToAverage_SSE2()
3377 "psubd (%1),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3378 "psubd 0x10(%1),%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3379 "psubd 0x20(%1),%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3380 "psubd 0x30(%1),%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3381 "paddd (%1,%4,4),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3382 "paddd 0x10(%1,%4,4),%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3383 "paddd 0x20(%1,%4,4),%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3384 "paddd 0x30(%1,%4,4),%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3385 "lea 0x40(%1),%1 \n" in CumulativeSumToAverage_SSE2()
3386 "cvtdq2ps %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3387 "cvtdq2ps %%xmm1,%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3388 "mulps %%xmm4,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3389 "mulps %%xmm4,%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3390 "cvtdq2ps %%xmm2,%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3391 "cvtdq2ps %%xmm3,%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3392 "mulps %%xmm4,%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3393 "mulps %%xmm4,%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3394 "cvtps2dq %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3395 "cvtps2dq %%xmm1,%%xmm1 \n" in CumulativeSumToAverage_SSE2()
3396 "cvtps2dq %%xmm2,%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3397 "cvtps2dq %%xmm3,%%xmm3 \n" in CumulativeSumToAverage_SSE2()
3398 "packssdw %%xmm1,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3399 "packssdw %%xmm3,%%xmm2 \n" in CumulativeSumToAverage_SSE2()
3400 "packuswb %%xmm2,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3401 "movdqu %%xmm0,(%2) \n" in CumulativeSumToAverage_SSE2()
3402 "lea 0x10(%2),%2 \n" in CumulativeSumToAverage_SSE2()
3403 "sub $0x4,%3 \n" in CumulativeSumToAverage_SSE2()
3404 "jge 40b \n" in CumulativeSumToAverage_SSE2()
3406 "49: \n" in CumulativeSumToAverage_SSE2()
3407 "add $0x3,%3 \n" in CumulativeSumToAverage_SSE2()
3408 "jl 19f \n" in CumulativeSumToAverage_SSE2()
3410 // 1 pixel loop \n" in CumulativeSumToAverage_SSE2()
3411 ".p2align 2 \n" in CumulativeSumToAverage_SSE2()
3412 "10: \n" in CumulativeSumToAverage_SSE2()
3413 "movdqa (%0),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3414 "psubd (%0,%4,4),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3415 "lea 0x10(%0),%0 \n" in CumulativeSumToAverage_SSE2()
3416 "psubd (%1),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3417 "paddd (%1,%4,4),%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3418 "lea 0x10(%1),%1 \n" in CumulativeSumToAverage_SSE2()
3419 "cvtdq2ps %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3420 "mulps %%xmm4,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3421 "cvtps2dq %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3422 "packssdw %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3423 "packuswb %%xmm0,%%xmm0 \n" in CumulativeSumToAverage_SSE2()
3424 "movd %%xmm0,(%2) \n" in CumulativeSumToAverage_SSE2()
3425 "lea 0x4(%2),%2 \n" in CumulativeSumToAverage_SSE2()
3426 "sub $0x1,%3 \n" in CumulativeSumToAverage_SSE2()
3427 "jge 10b \n" in CumulativeSumToAverage_SSE2()
3428 "19: \n" in CumulativeSumToAverage_SSE2()
3448 "movd %3,%%xmm2 \n" in ARGBShadeRow_SSE2()
3449 "sub %0,%1 \n" in ARGBShadeRow_SSE2()
3450 "punpcklbw %%xmm2,%%xmm2 \n" in ARGBShadeRow_SSE2()
3451 "punpcklqdq %%xmm2,%%xmm2 \n" in ARGBShadeRow_SSE2()
3454 ".p2align 2 \n" in ARGBShadeRow_SSE2()
3455 "1: \n" in ARGBShadeRow_SSE2()
3456 "movdqa (%0),%%xmm0 \n" in ARGBShadeRow_SSE2()
3457 "movdqa %%xmm0,%%xmm1 \n" in ARGBShadeRow_SSE2()
3458 "punpcklbw %%xmm0,%%xmm0 \n" in ARGBShadeRow_SSE2()
3459 "punpckhbw %%xmm1,%%xmm1 \n" in ARGBShadeRow_SSE2()
3460 "pmulhuw %%xmm2,%%xmm0 \n" in ARGBShadeRow_SSE2()
3461 "pmulhuw %%xmm2,%%xmm1 \n" in ARGBShadeRow_SSE2()
3462 "psrlw $0x8,%%xmm0 \n" in ARGBShadeRow_SSE2()
3463 "psrlw $0x8,%%xmm1 \n" in ARGBShadeRow_SSE2()
3464 "packuswb %%xmm1,%%xmm0 \n" in ARGBShadeRow_SSE2()
3465 "sub $0x4,%2 \n" in ARGBShadeRow_SSE2()
3466 "movdqa %%xmm0,(%0,%1,1) \n" in ARGBShadeRow_SSE2()
3467 "lea 0x10(%0),%0 \n" in ARGBShadeRow_SSE2()
3468 "jg 1b \n" in ARGBShadeRow_SSE2()
3494 "movq (%3),%%xmm2 \n" in ARGBAffineRow_SSE2()
3495 "movq 0x8(%3),%%xmm7 \n" in ARGBAffineRow_SSE2()
3496 "shl $0x10,%1 \n" in ARGBAffineRow_SSE2()
3497 "add $0x4,%1 \n" in ARGBAffineRow_SSE2()
3498 "movd %1,%%xmm5 \n" in ARGBAffineRow_SSE2()
3499 "sub $0x4,%4 \n" in ARGBAffineRow_SSE2()
3500 "jl 49f \n" in ARGBAffineRow_SSE2()
3502 "pshufd $0x44,%%xmm7,%%xmm7 \n" in ARGBAffineRow_SSE2()
3503 "pshufd $0x0,%%xmm5,%%xmm5 \n" in ARGBAffineRow_SSE2()
3504 "movdqa %%xmm2,%%xmm0 \n" in ARGBAffineRow_SSE2()
3505 "addps %%xmm7,%%xmm0 \n" in ARGBAffineRow_SSE2()
3506 "movlhps %%xmm0,%%xmm2 \n" in ARGBAffineRow_SSE2()
3507 "movdqa %%xmm7,%%xmm4 \n" in ARGBAffineRow_SSE2()
3508 "addps %%xmm4,%%xmm4 \n" in ARGBAffineRow_SSE2()
3509 "movdqa %%xmm2,%%xmm3 \n" in ARGBAffineRow_SSE2()
3510 "addps %%xmm4,%%xmm3 \n" in ARGBAffineRow_SSE2()
3511 "addps %%xmm4,%%xmm4 \n" in ARGBAffineRow_SSE2()
3513 // 4 pixel loop \n" in ARGBAffineRow_SSE2()
3514 ".p2align 4 \n" in ARGBAffineRow_SSE2()
3515 "40: \n" in ARGBAffineRow_SSE2()
3516 "cvttps2dq %%xmm2,%%xmm0 \n" in ARGBAffineRow_SSE2()
3517 "cvttps2dq %%xmm3,%%xmm1 \n" in ARGBAffineRow_SSE2()
3518 "packssdw %%xmm1,%%xmm0 \n" in ARGBAffineRow_SSE2()
3519 "pmaddwd %%xmm5,%%xmm0 \n" in ARGBAffineRow_SSE2()
3521 "movd %%xmm0,%1 \n" in ARGBAffineRow_SSE2()
3522 "mov %1,%5 \n" in ARGBAffineRow_SSE2()
3523 "and $0x0fffffff,%1 \n" in ARGBAffineRow_SSE2()
3524 "shr $32,%5 \n" in ARGBAffineRow_SSE2()
3525 "pshufd $0xEE,%%xmm0,%%xmm0 \n" in ARGBAffineRow_SSE2()
3527 "movd %%xmm0,%1 \n" in ARGBAffineRow_SSE2()
3528 "pshufd $0x39,%%xmm0,%%xmm0 \n" in ARGBAffineRow_SSE2()
3529 "movd %%xmm0,%5 \n" in ARGBAffineRow_SSE2()
3530 "pshufd $0x39,%%xmm0,%%xmm0 \n" in ARGBAffineRow_SSE2()
3532 "movd (%0,%1,1),%%xmm1 \n" in ARGBAffineRow_SSE2()
3533 "movd (%0,%5,1),%%xmm6 \n" in ARGBAffineRow_SSE2()
3534 "punpckldq %%xmm6,%%xmm1 \n" in ARGBAffineRow_SSE2()
3535 "addps %%xmm4,%%xmm2 \n" in ARGBAffineRow_SSE2()
3536 "movq %%xmm1,(%2) \n" in ARGBAffineRow_SSE2()
3538 "movd %%xmm0,%1 \n" in ARGBAffineRow_SSE2()
3539 "mov %1,%5 \n" in ARGBAffineRow_SSE2()
3540 "and $0x0fffffff,%1 \n" in ARGBAffineRow_SSE2()
3541 "shr $32,%5 \n" in ARGBAffineRow_SSE2()
3543 "movd %%xmm0,%1 \n" in ARGBAffineRow_SSE2()
3544 "pshufd $0x39,%%xmm0,%%xmm0 \n" in ARGBAffineRow_SSE2()
3545 "movd %%xmm0,%5 \n" in ARGBAffineRow_SSE2()
3547 "movd (%0,%1,1),%%xmm0 \n" in ARGBAffineRow_SSE2()
3548 "movd (%0,%5,1),%%xmm6 \n" in ARGBAffineRow_SSE2()
3549 "punpckldq %%xmm6,%%xmm0 \n" in ARGBAffineRow_SSE2()
3550 "addps %%xmm4,%%xmm3 \n" in ARGBAffineRow_SSE2()
3551 "sub $0x4,%4 \n" in ARGBAffineRow_SSE2()
3552 "movq %%xmm0,0x08(%2) \n" in ARGBAffineRow_SSE2()
3553 "lea 0x10(%2),%2 \n" in ARGBAffineRow_SSE2()
3554 "jge 40b \n" in ARGBAffineRow_SSE2()
3556 "49: \n" in ARGBAffineRow_SSE2()
3557 "add $0x3,%4 \n" in ARGBAffineRow_SSE2()
3558 "jl 19f \n" in ARGBAffineRow_SSE2()
3560 // 1 pixel loop \n" in ARGBAffineRow_SSE2()
3561 ".p2align 4 \n" in ARGBAffineRow_SSE2()
3562 "10: \n" in ARGBAffineRow_SSE2()
3563 "cvttps2dq %%xmm2,%%xmm0 \n" in ARGBAffineRow_SSE2()
3564 "packssdw %%xmm0,%%xmm0 \n" in ARGBAffineRow_SSE2()
3565 "pmaddwd %%xmm5,%%xmm0 \n" in ARGBAffineRow_SSE2()
3566 "addps %%xmm7,%%xmm2 \n" in ARGBAffineRow_SSE2()
3567 "movd %%xmm0,%1 \n" in ARGBAffineRow_SSE2()
3569 "and $0x0fffffff,%1 \n" in ARGBAffineRow_SSE2()
3571 "movd (%0,%1,1),%%xmm0 \n" in ARGBAffineRow_SSE2()
3572 "sub $0x1,%4 \n" in ARGBAffineRow_SSE2()
3573 "movd %%xmm0,(%2) \n" in ARGBAffineRow_SSE2()
3574 "lea 0x4(%2),%2 \n" in ARGBAffineRow_SSE2()
3575 "jge 10b \n" in ARGBAffineRow_SSE2()
3576 "19: \n" in ARGBAffineRow_SSE2()
3597 "sub %1,%0 \n" in ARGBInterpolateRow_SSSE3()
3598 "shr %3 \n" in ARGBInterpolateRow_SSSE3()
3599 "cmp $0x0,%3 \n" in ARGBInterpolateRow_SSSE3()
3600 "je 2f \n" in ARGBInterpolateRow_SSSE3()
3601 "cmp $0x40,%3 \n" in ARGBInterpolateRow_SSSE3()
3602 "je 3f \n" in ARGBInterpolateRow_SSSE3()
3603 "movd %3,%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3604 "neg %3 \n" in ARGBInterpolateRow_SSSE3()
3605 "add $0x80,%3 \n" in ARGBInterpolateRow_SSSE3()
3606 "movd %3,%%xmm5 \n" in ARGBInterpolateRow_SSSE3()
3607 "punpcklbw %%xmm0,%%xmm5 \n" in ARGBInterpolateRow_SSSE3()
3608 "punpcklwd %%xmm5,%%xmm5 \n" in ARGBInterpolateRow_SSSE3()
3609 "pshufd $0x0,%%xmm5,%%xmm5 \n" in ARGBInterpolateRow_SSSE3()
3610 ".p2align 4 \n" in ARGBInterpolateRow_SSSE3()
3611 "1: \n" in ARGBInterpolateRow_SSSE3()
3612 "movdqa (%1),%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3613 "movdqa (%1,%4,1),%%xmm2 \n" in ARGBInterpolateRow_SSSE3()
3614 "movdqa %%xmm0,%%xmm1 \n" in ARGBInterpolateRow_SSSE3()
3615 "punpcklbw %%xmm2,%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3616 "punpckhbw %%xmm2,%%xmm1 \n" in ARGBInterpolateRow_SSSE3()
3617 "pmaddubsw %%xmm5,%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3618 "pmaddubsw %%xmm5,%%xmm1 \n" in ARGBInterpolateRow_SSSE3()
3619 "psrlw $0x7,%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3620 "psrlw $0x7,%%xmm1 \n" in ARGBInterpolateRow_SSSE3()
3621 "packuswb %%xmm1,%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3622 "sub $0x4,%2 \n" in ARGBInterpolateRow_SSSE3()
3623 "movdqa %%xmm0,(%1,%0,1) \n" in ARGBInterpolateRow_SSSE3()
3624 "lea 0x10(%1),%1 \n" in ARGBInterpolateRow_SSSE3()
3625 "jg 1b \n" in ARGBInterpolateRow_SSSE3()
3626 "jmp 4f \n" in ARGBInterpolateRow_SSSE3()
3627 ".p2align 4 \n" in ARGBInterpolateRow_SSSE3()
3628 "2: \n" in ARGBInterpolateRow_SSSE3()
3629 "movdqa (%1),%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3630 "sub $0x4,%2 \n" in ARGBInterpolateRow_SSSE3()
3631 "movdqa %%xmm0,(%1,%0,1) \n" in ARGBInterpolateRow_SSSE3()
3632 "lea 0x10(%1),%1 \n" in ARGBInterpolateRow_SSSE3()
3633 "jg 2b \n" in ARGBInterpolateRow_SSSE3()
3634 "jmp 4f \n" in ARGBInterpolateRow_SSSE3()
3635 ".p2align 4 \n" in ARGBInterpolateRow_SSSE3()
3636 "3: \n" in ARGBInterpolateRow_SSSE3()
3637 "movdqa (%1),%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3638 "pavgb (%1,%4,1),%%xmm0 \n" in ARGBInterpolateRow_SSSE3()
3639 "sub $0x4,%2 \n" in ARGBInterpolateRow_SSSE3()
3640 "movdqa %%xmm0,(%1,%0,1) \n" in ARGBInterpolateRow_SSSE3()
3641 "lea 0x10(%1),%1 \n" in ARGBInterpolateRow_SSSE3()
3642 "jg 3b \n" in ARGBInterpolateRow_SSSE3()
3643 "4: \n" in ARGBInterpolateRow_SSSE3()
3644 ".p2align 4 \n" in ARGBInterpolateRow_SSSE3()