Lines Matching refs:xmm1
139 movdqa xmm1, xmm0 in I400ToARGBRow_SSE2()
141 punpckhwd xmm1, xmm1 in I400ToARGBRow_SSE2()
143 por xmm1, xmm5 in I400ToARGBRow_SSE2()
145 movdqa [edx + 16], xmm1 in I400ToARGBRow_SSE2()
250 movdqu xmm1, [eax + 16] in RGB24ToARGBRow_SSSE3()
254 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} in RGB24ToARGBRow_SSSE3()
257 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} in RGB24ToARGBRow_SSSE3()
261 pshufb xmm1, xmm4 in RGB24ToARGBRow_SSSE3()
263 por xmm1, xmm5 in RGB24ToARGBRow_SSSE3()
266 movdqa [edx + 16], xmm1 in RGB24ToARGBRow_SSSE3()
290 movdqu xmm1, [eax + 16] in RAWToARGBRow_SSSE3()
294 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} in RAWToARGBRow_SSSE3()
297 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} in RAWToARGBRow_SSSE3()
301 pshufb xmm1, xmm4 in RAWToARGBRow_SSSE3()
303 por xmm1, xmm5 in RAWToARGBRow_SSSE3()
306 movdqa [edx + 16], xmm1 in RAWToARGBRow_SSSE3()
350 movdqa xmm1, xmm0 in RGB565ToARGBRow_SSE2()
352 pand xmm1, xmm3 // R in upper 5 bits in RGB565ToARGBRow_SSE2()
354 pmulhuw xmm1, xmm5 // * (256 + 8) in RGB565ToARGBRow_SSE2()
356 psllw xmm1, 8 in RGB565ToARGBRow_SSE2()
357 por xmm1, xmm2 // RB in RGB565ToARGBRow_SSE2()
361 movdqa xmm2, xmm1 in RGB565ToARGBRow_SSE2()
362 punpcklbw xmm1, xmm0 in RGB565ToARGBRow_SSE2()
364 movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB in RGB565ToARGBRow_SSE2()
400 movdqa xmm1, xmm0 in ARGB1555ToARGBRow_SSE2()
402 psllw xmm1, 1 // R in upper 5 bits in ARGB1555ToARGBRow_SSE2()
404 pand xmm1, xmm3 in ARGB1555ToARGBRow_SSE2()
406 pmulhuw xmm1, xmm5 // * (256 + 8) in ARGB1555ToARGBRow_SSE2()
407 psllw xmm1, 8 in ARGB1555ToARGBRow_SSE2()
408 por xmm1, xmm2 // RB in ARGB1555ToARGBRow_SSE2()
415 movdqa xmm2, xmm1 in ARGB1555ToARGBRow_SSE2()
416 punpcklbw xmm1, xmm0 in ARGB1555ToARGBRow_SSE2()
418 movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB in ARGB1555ToARGBRow_SSE2()
449 movdqa xmm1, xmm0 in ARGB4444ToARGBRow_SSE2()
451 psllw xmm1, 4 in ARGB4444ToARGBRow_SSE2()
453 por xmm0, xmm1 in ARGB4444ToARGBRow_SSE2()
455 movdqa xmm1, xmm0 in ARGB4444ToARGBRow_SSE2()
457 punpckhbw xmm1, xmm2 in ARGB4444ToARGBRow_SSE2()
459 movdqa [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB in ARGB4444ToARGBRow_SSE2()
478 movdqa xmm1, [eax + 16] in ARGBToRGB24Row_SSSE3()
483 pshufb xmm1, xmm6 in ARGBToRGB24Row_SSSE3()
486 movdqa xmm4, xmm1 // 4 bytes from 1 for 0 in ARGBToRGB24Row_SSSE3()
487 psrldq xmm1, 4 // 8 bytes from 1 in ARGBToRGB24Row_SSSE3()
493 por xmm1, xmm5 // 8 bytes from 2 for 1 in ARGBToRGB24Row_SSSE3()
497 movdqa [edx + 16], xmm1 // store 1 in ARGBToRGB24Row_SSSE3()
517 movdqa xmm1, [eax + 16] in ARGBToRAWRow_SSSE3()
522 pshufb xmm1, xmm6 in ARGBToRAWRow_SSSE3()
525 movdqa xmm4, xmm1 // 4 bytes from 1 for 0 in ARGBToRAWRow_SSSE3()
526 psrldq xmm1, 4 // 8 bytes from 1 in ARGBToRAWRow_SSSE3()
532 por xmm1, xmm5 // 8 bytes from 2 for 1 in ARGBToRAWRow_SSSE3()
536 movdqa [edx + 16], xmm1 // store 1 in ARGBToRAWRow_SSSE3()
562 movdqa xmm1, xmm0 // B in ARGBToRGB565Row_SSE2()
565 psrld xmm1, 3 // B in ARGBToRGB565Row_SSE2()
568 pand xmm1, xmm3 // B in ARGBToRGB565Row_SSE2()
571 por xmm1, xmm2 // BG in ARGBToRGB565Row_SSE2()
572 por xmm0, xmm1 // BGR in ARGBToRGB565Row_SSE2()
602 movdqa xmm1, xmm0 // B in ARGBToARGB1555Row_SSE2()
606 psrld xmm1, 3 // B in ARGBToARGB1555Row_SSE2()
610 pand xmm1, xmm4 // B in ARGBToARGB1555Row_SSE2()
613 por xmm0, xmm1 // BA in ARGBToARGB1555Row_SSE2()
640 movdqa xmm1, xmm0 in ARGBToARGB4444Row_SSE2()
642 pand xmm1, xmm4 // high nibble in ARGBToARGB4444Row_SSE2()
644 psrl xmm1, 8 in ARGBToARGB4444Row_SSE2()
645 por xmm0, xmm1 in ARGBToARGB4444Row_SSE2()
669 movdqa xmm1, [eax + 16] in ARGBToYRow_SSSE3()
673 pmaddubsw xmm1, xmm4 in ARGBToYRow_SSSE3()
677 phaddw xmm0, xmm1 in ARGBToYRow_SSSE3()
703 movdqu xmm1, [eax + 16] in ARGBToYRow_Unaligned_SSSE3()
707 pmaddubsw xmm1, xmm4 in ARGBToYRow_Unaligned_SSSE3()
711 phaddw xmm0, xmm1 in ARGBToYRow_Unaligned_SSSE3()
737 movdqa xmm1, [eax + 16] in BGRAToYRow_SSSE3()
741 pmaddubsw xmm1, xmm4 in BGRAToYRow_SSSE3()
745 phaddw xmm0, xmm1 in BGRAToYRow_SSSE3()
771 movdqu xmm1, [eax + 16] in BGRAToYRow_Unaligned_SSSE3()
775 pmaddubsw xmm1, xmm4 in BGRAToYRow_Unaligned_SSSE3()
779 phaddw xmm0, xmm1 in BGRAToYRow_Unaligned_SSSE3()
805 movdqa xmm1, [eax + 16] in ABGRToYRow_SSSE3()
809 pmaddubsw xmm1, xmm4 in ABGRToYRow_SSSE3()
813 phaddw xmm0, xmm1 in ABGRToYRow_SSSE3()
839 movdqu xmm1, [eax + 16] in ABGRToYRow_Unaligned_SSSE3()
843 pmaddubsw xmm1, xmm4 in ABGRToYRow_Unaligned_SSSE3()
847 phaddw xmm0, xmm1 in ABGRToYRow_Unaligned_SSSE3()
873 movdqa xmm1, [eax + 16] in RGBAToYRow_SSSE3()
877 pmaddubsw xmm1, xmm4 in RGBAToYRow_SSSE3()
881 phaddw xmm0, xmm1 in RGBAToYRow_SSSE3()
907 movdqu xmm1, [eax + 16] in RGBAToYRow_Unaligned_SSSE3()
911 pmaddubsw xmm1, xmm4 in RGBAToYRow_Unaligned_SSSE3()
915 phaddw xmm0, xmm1 in RGBAToYRow_Unaligned_SSSE3()
949 movdqa xmm1, [eax + 16] in ARGBToUVRow_SSSE3()
953 pavgb xmm1, [eax + esi + 16] in ARGBToUVRow_SSSE3()
958 shufps xmm0, xmm1, 0x88 in ARGBToUVRow_SSSE3()
959 shufps xmm4, xmm1, 0xdd in ARGBToUVRow_SSSE3()
969 movdqa xmm1, xmm0 in ARGBToUVRow_SSSE3()
973 pmaddubsw xmm1, xmm6 // V in ARGBToUVRow_SSSE3()
976 phaddw xmm1, xmm3 in ARGBToUVRow_SSSE3()
978 psraw xmm1, 8 in ARGBToUVRow_SSSE3()
979 packsswb xmm0, xmm1 in ARGBToUVRow_SSSE3()
1015 movdqu xmm1, [eax + 16] in ARGBToUVRow_Unaligned_SSSE3()
1021 pavgb xmm1, xmm4 in ARGBToUVRow_Unaligned_SSSE3()
1028 shufps xmm0, xmm1, 0x88 in ARGBToUVRow_Unaligned_SSSE3()
1029 shufps xmm4, xmm1, 0xdd in ARGBToUVRow_Unaligned_SSSE3()
1039 movdqa xmm1, xmm0 in ARGBToUVRow_Unaligned_SSSE3()
1043 pmaddubsw xmm1, xmm6 // V in ARGBToUVRow_Unaligned_SSSE3()
1046 phaddw xmm1, xmm3 in ARGBToUVRow_Unaligned_SSSE3()
1048 psraw xmm1, 8 in ARGBToUVRow_Unaligned_SSSE3()
1049 packsswb xmm0, xmm1 in ARGBToUVRow_Unaligned_SSSE3()
1085 movdqa xmm1, [eax + 16] in BGRAToUVRow_SSSE3()
1089 pavgb xmm1, [eax + esi + 16] in BGRAToUVRow_SSSE3()
1094 shufps xmm0, xmm1, 0x88 in BGRAToUVRow_SSSE3()
1095 shufps xmm4, xmm1, 0xdd in BGRAToUVRow_SSSE3()
1105 movdqa xmm1, xmm0 in BGRAToUVRow_SSSE3()
1109 pmaddubsw xmm1, xmm6 // V in BGRAToUVRow_SSSE3()
1112 phaddw xmm1, xmm3 in BGRAToUVRow_SSSE3()
1114 psraw xmm1, 8 in BGRAToUVRow_SSSE3()
1115 packsswb xmm0, xmm1 in BGRAToUVRow_SSSE3()
1151 movdqu xmm1, [eax + 16] in BGRAToUVRow_Unaligned_SSSE3()
1157 pavgb xmm1, xmm4 in BGRAToUVRow_Unaligned_SSSE3()
1164 shufps xmm0, xmm1, 0x88 in BGRAToUVRow_Unaligned_SSSE3()
1165 shufps xmm4, xmm1, 0xdd in BGRAToUVRow_Unaligned_SSSE3()
1175 movdqa xmm1, xmm0 in BGRAToUVRow_Unaligned_SSSE3()
1179 pmaddubsw xmm1, xmm6 // V in BGRAToUVRow_Unaligned_SSSE3()
1182 phaddw xmm1, xmm3 in BGRAToUVRow_Unaligned_SSSE3()
1184 psraw xmm1, 8 in BGRAToUVRow_Unaligned_SSSE3()
1185 packsswb xmm0, xmm1 in BGRAToUVRow_Unaligned_SSSE3()
1221 movdqa xmm1, [eax + 16] in ABGRToUVRow_SSSE3()
1225 pavgb xmm1, [eax + esi + 16] in ABGRToUVRow_SSSE3()
1230 shufps xmm0, xmm1, 0x88 in ABGRToUVRow_SSSE3()
1231 shufps xmm4, xmm1, 0xdd in ABGRToUVRow_SSSE3()
1241 movdqa xmm1, xmm0 in ABGRToUVRow_SSSE3()
1245 pmaddubsw xmm1, xmm6 // V in ABGRToUVRow_SSSE3()
1248 phaddw xmm1, xmm3 in ABGRToUVRow_SSSE3()
1250 psraw xmm1, 8 in ABGRToUVRow_SSSE3()
1251 packsswb xmm0, xmm1 in ABGRToUVRow_SSSE3()
1287 movdqu xmm1, [eax + 16] in ABGRToUVRow_Unaligned_SSSE3()
1293 pavgb xmm1, xmm4 in ABGRToUVRow_Unaligned_SSSE3()
1300 shufps xmm0, xmm1, 0x88 in ABGRToUVRow_Unaligned_SSSE3()
1301 shufps xmm4, xmm1, 0xdd in ABGRToUVRow_Unaligned_SSSE3()
1311 movdqa xmm1, xmm0 in ABGRToUVRow_Unaligned_SSSE3()
1315 pmaddubsw xmm1, xmm6 // V in ABGRToUVRow_Unaligned_SSSE3()
1318 phaddw xmm1, xmm3 in ABGRToUVRow_Unaligned_SSSE3()
1320 psraw xmm1, 8 in ABGRToUVRow_Unaligned_SSSE3()
1321 packsswb xmm0, xmm1 in ABGRToUVRow_Unaligned_SSSE3()
1357 movdqa xmm1, [eax + 16] in RGBAToUVRow_SSSE3()
1361 pavgb xmm1, [eax + esi + 16] in RGBAToUVRow_SSSE3()
1366 shufps xmm0, xmm1, 0x88 in RGBAToUVRow_SSSE3()
1367 shufps xmm4, xmm1, 0xdd in RGBAToUVRow_SSSE3()
1377 movdqa xmm1, xmm0 in RGBAToUVRow_SSSE3()
1381 pmaddubsw xmm1, xmm6 // V in RGBAToUVRow_SSSE3()
1384 phaddw xmm1, xmm3 in RGBAToUVRow_SSSE3()
1386 psraw xmm1, 8 in RGBAToUVRow_SSSE3()
1387 packsswb xmm0, xmm1 in RGBAToUVRow_SSSE3()
1423 movdqu xmm1, [eax + 16] in RGBAToUVRow_Unaligned_SSSE3()
1429 pavgb xmm1, xmm4 in RGBAToUVRow_Unaligned_SSSE3()
1436 shufps xmm0, xmm1, 0x88 in RGBAToUVRow_Unaligned_SSSE3()
1437 shufps xmm4, xmm1, 0xdd in RGBAToUVRow_Unaligned_SSSE3()
1447 movdqa xmm1, xmm0 in RGBAToUVRow_Unaligned_SSSE3()
1451 pmaddubsw xmm1, xmm6 // V in RGBAToUVRow_Unaligned_SSSE3()
1454 phaddw xmm1, xmm3 in RGBAToUVRow_Unaligned_SSSE3()
1456 psraw xmm1, 8 in RGBAToUVRow_Unaligned_SSSE3()
1457 packsswb xmm0, xmm1 in RGBAToUVRow_Unaligned_SSSE3()
1527 __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
1529 __asm punpcklbw xmm0, xmm1 /* UV */ \
1535 __asm movd xmm1, [esi + edi] /* V */ \
1537 __asm punpcklbw xmm0, xmm1 /* UV */ \
1544 __asm movd xmm1, [esi + edi] /* V */ \
1546 __asm punpcklbw xmm0, xmm1 /* UV */ \
1561 __asm movdqa xmm1, xmm0 \
1564 __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
1567 __asm psubw xmm1, kUVBiasG \
1576 __asm paddsw xmm1, xmm3 /* G += Y */ \
1579 __asm psraw xmm1, 6 \
1582 __asm packuswb xmm1, xmm1 /* G */ \
1589 __asm movdqa xmm1, xmm0 \
1592 __asm pmaddubsw xmm1, kVUToG /* scale G UV */ \
1595 __asm psubw xmm1, kUVBiasG \
1604 __asm paddsw xmm1, xmm3 /* G += Y */ \
1607 __asm psraw xmm1, 6 \
1610 __asm packuswb xmm1, xmm1 /* G */ \
1640 punpcklbw xmm0, xmm1 // BG in I444ToARGBRow_SSSE3()
1642 movdqa xmm1, xmm0 in I444ToARGBRow_SSSE3()
1644 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I444ToARGBRow_SSSE3()
1646 movdqa [edx + 16], xmm1 in I444ToARGBRow_SSSE3()
1683 punpcklbw xmm0, xmm1 // BG in I422ToARGBRow_SSSE3()
1685 movdqa xmm1, xmm0 in I422ToARGBRow_SSSE3()
1687 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I422ToARGBRow_SSSE3()
1689 movdqa [edx + 16], xmm1 in I422ToARGBRow_SSSE3()
1727 punpcklbw xmm0, xmm1 // BG in I411ToARGBRow_SSSE3()
1729 movdqa xmm1, xmm0 in I411ToARGBRow_SSSE3()
1731 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I411ToARGBRow_SSSE3()
1733 movdqa [edx + 16], xmm1 in I411ToARGBRow_SSSE3()
1766 punpcklbw xmm0, xmm1 // BG in NV12ToARGBRow_SSSE3()
1768 movdqa xmm1, xmm0 in NV12ToARGBRow_SSSE3()
1770 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in NV12ToARGBRow_SSSE3()
1772 movdqa [edx + 16], xmm1 in NV12ToARGBRow_SSSE3()
1804 punpcklbw xmm0, xmm1 // BG in NV21ToARGBRow_SSSE3()
1806 movdqa xmm1, xmm0 in NV21ToARGBRow_SSSE3()
1808 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in NV21ToARGBRow_SSSE3()
1810 movdqa [edx + 16], xmm1 in NV21ToARGBRow_SSSE3()
1846 punpcklbw xmm0, xmm1 // BG in I444ToARGBRow_Unaligned_SSSE3()
1848 movdqa xmm1, xmm0 in I444ToARGBRow_Unaligned_SSSE3()
1850 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I444ToARGBRow_Unaligned_SSSE3()
1852 movdqu [edx + 16], xmm1 in I444ToARGBRow_Unaligned_SSSE3()
1889 punpcklbw xmm0, xmm1 // BG in I422ToARGBRow_Unaligned_SSSE3()
1891 movdqa xmm1, xmm0 in I422ToARGBRow_Unaligned_SSSE3()
1893 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I422ToARGBRow_Unaligned_SSSE3()
1895 movdqu [edx + 16], xmm1 in I422ToARGBRow_Unaligned_SSSE3()
1933 punpcklbw xmm0, xmm1 // BG in I411ToARGBRow_Unaligned_SSSE3()
1935 movdqa xmm1, xmm0 in I411ToARGBRow_Unaligned_SSSE3()
1937 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in I411ToARGBRow_Unaligned_SSSE3()
1939 movdqu [edx + 16], xmm1 in I411ToARGBRow_Unaligned_SSSE3()
1973 punpcklbw xmm0, xmm1 // BG in NV12ToARGBRow_Unaligned_SSSE3()
1975 movdqa xmm1, xmm0 in NV12ToARGBRow_Unaligned_SSSE3()
1977 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in NV12ToARGBRow_Unaligned_SSSE3()
1979 movdqu [edx + 16], xmm1 in NV12ToARGBRow_Unaligned_SSSE3()
2011 punpcklbw xmm0, xmm1 // BG in NV21ToARGBRow_Unaligned_SSSE3()
2013 movdqa xmm1, xmm0 in NV21ToARGBRow_Unaligned_SSSE3()
2015 punpckhwd xmm1, xmm2 // BGRA next 4 pixels in NV21ToARGBRow_Unaligned_SSSE3()
2017 movdqu [edx + 16], xmm1 in NV21ToARGBRow_Unaligned_SSSE3()
2051 punpcklbw xmm1, xmm0 // GB in I422ToBGRARow_SSSE3()
2054 punpcklwd xmm5, xmm1 // BGRA first 4 pixels in I422ToBGRARow_SSSE3()
2055 punpckhwd xmm0, xmm1 // BGRA next 4 pixels in I422ToBGRARow_SSSE3()
2092 punpcklbw xmm1, xmm0 // GB in I422ToBGRARow_Unaligned_SSSE3()
2095 punpcklwd xmm5, xmm1 // BGRA first 4 pixels in I422ToBGRARow_Unaligned_SSSE3()
2096 punpckhwd xmm0, xmm1 // BGRA next 4 pixels in I422ToBGRARow_Unaligned_SSSE3()
2133 punpcklbw xmm2, xmm1 // RG in I422ToABGRRow_SSSE3()
2135 movdqa xmm1, xmm2 in I422ToABGRRow_SSSE3()
2137 punpckhwd xmm1, xmm0 // RGBA next 4 pixels in I422ToABGRRow_SSSE3()
2139 movdqa [edx + 16], xmm1 in I422ToABGRRow_SSSE3()
2174 punpcklbw xmm2, xmm1 // RG in I422ToABGRRow_Unaligned_SSSE3()
2176 movdqa xmm1, xmm2 in I422ToABGRRow_Unaligned_SSSE3()
2178 punpckhwd xmm1, xmm0 // RGBA next 4 pixels in I422ToABGRRow_Unaligned_SSSE3()
2180 movdqu [edx + 16], xmm1 in I422ToABGRRow_Unaligned_SSSE3()
2215 punpcklbw xmm1, xmm2 // GR in I422ToRGBARow_SSSE3()
2218 punpcklwd xmm5, xmm1 // RGBA first 4 pixels in I422ToRGBARow_SSSE3()
2219 punpckhwd xmm0, xmm1 // RGBA next 4 pixels in I422ToRGBARow_SSSE3()
2256 punpcklbw xmm1, xmm2 // GR in I422ToRGBARow_Unaligned_SSSE3()
2259 punpcklwd xmm5, xmm1 // RGBA first 4 pixels in I422ToRGBARow_Unaligned_SSSE3()
2260 punpckhwd xmm0, xmm1 // RGBA next 4 pixels in I422ToRGBARow_Unaligned_SSSE3()
2305 movdqa xmm1, xmm0 in YToARGBRow_SSE2()
2307 punpckhwd xmm1, xmm1 // BGRA next 4 pixels in YToARGBRow_SSE2()
2309 por xmm1, xmm4 in YToARGBRow_SSE2()
2311 movdqa [edx + 16], xmm1 in YToARGBRow_SSE2()
2364 movdqa xmm1, xmm0 // swap bytes in MirrorRow_SSE2()
2366 psrlw xmm1, 8 in MirrorRow_SSE2()
2367 por xmm0, xmm1 in MirrorRow_SSE2()
2395 movdqa xmm1, kShuffleMirrorUV in MirrorRowUV_SSSE3()
2403 pshufb xmm0, xmm1 in MirrorRowUV_SSSE3()
2461 movdqa xmm1, [eax + 16] in SplitUV_SSE2()
2464 movdqa xmm3, xmm1 in SplitUV_SSE2()
2466 pand xmm1, xmm5 in SplitUV_SSE2()
2467 packuswb xmm0, xmm1 in SplitUV_SSE2()
2496 movdqa xmm1, [eax + 16] in CopyRow_SSE2()
2498 movdqa [eax + edx + 16], xmm1 in CopyRow_SSE2()
2587 movdqa xmm1, [eax + 16] in YUY2ToYRow_SSE2()
2590 pand xmm1, xmm5 in YUY2ToYRow_SSE2()
2591 packuswb xmm0, xmm1 in YUY2ToYRow_SSE2()
2618 movdqa xmm1, [eax + 16] in YUY2ToUVRow_SSE2()
2623 pavgb xmm1, xmm3 in YUY2ToUVRow_SSE2()
2625 psrlw xmm1, 8 in YUY2ToUVRow_SSE2()
2626 packuswb xmm0, xmm1 in YUY2ToUVRow_SSE2()
2627 movdqa xmm1, xmm0 in YUY2ToUVRow_SSE2()
2630 psrlw xmm1, 8 // V in YUY2ToUVRow_SSE2()
2631 packuswb xmm1, xmm1 in YUY2ToUVRow_SSE2()
2633 movq qword ptr [edx + edi], xmm1 in YUY2ToUVRow_SSE2()
2660 movdqa xmm1, [eax + 16] in YUY2ToUV422Row_SSE2()
2663 psrlw xmm1, 8 in YUY2ToUV422Row_SSE2()
2664 packuswb xmm0, xmm1 in YUY2ToUV422Row_SSE2()
2665 movdqa xmm1, xmm0 in YUY2ToUV422Row_SSE2()
2668 psrlw xmm1, 8 // V in YUY2ToUV422Row_SSE2()
2669 packuswb xmm1, xmm1 in YUY2ToUV422Row_SSE2()
2671 movq qword ptr [edx + edi], xmm1 in YUY2ToUV422Row_SSE2()
2694 movdqu xmm1, [eax + 16] in YUY2ToYRow_Unaligned_SSE2()
2697 pand xmm1, xmm5 in YUY2ToYRow_Unaligned_SSE2()
2698 packuswb xmm0, xmm1 in YUY2ToYRow_Unaligned_SSE2()
2725 movdqu xmm1, [eax + 16] in YUY2ToUVRow_Unaligned_SSE2()
2730 pavgb xmm1, xmm3 in YUY2ToUVRow_Unaligned_SSE2()
2732 psrlw xmm1, 8 in YUY2ToUVRow_Unaligned_SSE2()
2733 packuswb xmm0, xmm1 in YUY2ToUVRow_Unaligned_SSE2()
2734 movdqa xmm1, xmm0 in YUY2ToUVRow_Unaligned_SSE2()
2737 psrlw xmm1, 8 // V in YUY2ToUVRow_Unaligned_SSE2()
2738 packuswb xmm1, xmm1 in YUY2ToUVRow_Unaligned_SSE2()
2740 movq qword ptr [edx + edi], xmm1 in YUY2ToUVRow_Unaligned_SSE2()
2767 movdqu xmm1, [eax + 16] in YUY2ToUV422Row_Unaligned_SSE2()
2770 psrlw xmm1, 8 in YUY2ToUV422Row_Unaligned_SSE2()
2771 packuswb xmm0, xmm1 in YUY2ToUV422Row_Unaligned_SSE2()
2772 movdqa xmm1, xmm0 in YUY2ToUV422Row_Unaligned_SSE2()
2775 psrlw xmm1, 8 // V in YUY2ToUV422Row_Unaligned_SSE2()
2776 packuswb xmm1, xmm1 in YUY2ToUV422Row_Unaligned_SSE2()
2778 movq qword ptr [edx + edi], xmm1 in YUY2ToUV422Row_Unaligned_SSE2()
2799 movdqa xmm1, [eax + 16] in UYVYToYRow_SSE2()
2802 psrlw xmm1, 8 in UYVYToYRow_SSE2()
2803 packuswb xmm0, xmm1 in UYVYToYRow_SSE2()
2830 movdqa xmm1, [eax + 16] in UYVYToUVRow_SSE2()
2835 pavgb xmm1, xmm3 in UYVYToUVRow_SSE2()
2837 pand xmm1, xmm5 in UYVYToUVRow_SSE2()
2838 packuswb xmm0, xmm1 in UYVYToUVRow_SSE2()
2839 movdqa xmm1, xmm0 in UYVYToUVRow_SSE2()
2842 psrlw xmm1, 8 // V in UYVYToUVRow_SSE2()
2843 packuswb xmm1, xmm1 in UYVYToUVRow_SSE2()
2845 movq qword ptr [edx + edi], xmm1 in UYVYToUVRow_SSE2()
2872 movdqa xmm1, [eax + 16] in UYVYToUV422Row_SSE2()
2875 pand xmm1, xmm5 in UYVYToUV422Row_SSE2()
2876 packuswb xmm0, xmm1 in UYVYToUV422Row_SSE2()
2877 movdqa xmm1, xmm0 in UYVYToUV422Row_SSE2()
2880 psrlw xmm1, 8 // V in UYVYToUV422Row_SSE2()
2881 packuswb xmm1, xmm1 in UYVYToUV422Row_SSE2()
2883 movq qword ptr [edx + edi], xmm1 in UYVYToUV422Row_SSE2()
2904 movdqu xmm1, [eax + 16] in UYVYToYRow_Unaligned_SSE2()
2907 psrlw xmm1, 8 in UYVYToYRow_Unaligned_SSE2()
2908 packuswb xmm0, xmm1 in UYVYToYRow_Unaligned_SSE2()
2935 movdqu xmm1, [eax + 16] in UYVYToUVRow_Unaligned_SSE2()
2940 pavgb xmm1, xmm3 in UYVYToUVRow_Unaligned_SSE2()
2942 pand xmm1, xmm5 in UYVYToUVRow_Unaligned_SSE2()
2943 packuswb xmm0, xmm1 in UYVYToUVRow_Unaligned_SSE2()
2944 movdqa xmm1, xmm0 in UYVYToUVRow_Unaligned_SSE2()
2947 psrlw xmm1, 8 // V in UYVYToUVRow_Unaligned_SSE2()
2948 packuswb xmm1, xmm1 in UYVYToUVRow_Unaligned_SSE2()
2950 movq qword ptr [edx + edi], xmm1 in UYVYToUVRow_Unaligned_SSE2()
2977 movdqu xmm1, [eax + 16] in UYVYToUV422Row_Unaligned_SSE2()
2980 pand xmm1, xmm5 in UYVYToUV422Row_Unaligned_SSE2()
2981 packuswb xmm0, xmm1 in UYVYToUV422Row_Unaligned_SSE2()
2982 movdqa xmm1, xmm0 in UYVYToUV422Row_Unaligned_SSE2()
2985 psrlw xmm1, 8 // V in UYVYToUV422Row_Unaligned_SSE2()
2986 packuswb xmm1, xmm1 in UYVYToUV422Row_Unaligned_SSE2()
2988 movq qword ptr [edx + edi], xmm1 in UYVYToUV422Row_Unaligned_SSE2()
3038 movd xmm1, [esi] // _a_g in ARGBBlendRow_SSE2()
3040 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSE2()
3042 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2()
3045 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSE2()
3046 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSE2()
3069 movdqu xmm1, [esi] // _a_g in ARGBBlendRow_SSE2()
3071 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSE2()
3073 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2()
3076 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSE2()
3077 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSE2()
3100 movd xmm1, [esi] // _a_g in ARGBBlendRow_SSE2()
3102 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSE2()
3104 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2()
3107 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSE2()
3108 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSE2()
3170 movd xmm1, [esi] // _a_g in ARGBBlendRow_SSSE3()
3172 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSSE3()
3174 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSSE3()
3177 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSSE3()
3178 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSSE3()
3204 movdqa xmm1, [esi] // _a_g in ARGBBlendRow_SSSE3()
3206 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSSE3()
3208 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSSE3()
3211 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSSE3()
3212 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSSE3()
3230 movdqu xmm1, [esi] // _a_g in ARGBBlendRow_SSSE3()
3232 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSSE3()
3234 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSSE3()
3237 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSSE3()
3238 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSSE3()
3259 movd xmm1, [esi] // _a_g in ARGBBlendRow_SSSE3()
3261 psrlw xmm1, 8 // _a_g in ARGBBlendRow_SSSE3()
3263 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSSE3()
3266 pand xmm1, xmm5 // a_g_ convert to 8 bits again in ARGBBlendRow_SSSE3()
3267 paddusb xmm0, xmm1 // + src argb in ARGBBlendRow_SSSE3()
3302 movdqa xmm1, [eax] // read 4 pixels in ARGBAttenuateRow_SSE2()
3303 punpckhbw xmm1, xmm1 // next 2 pixels in ARGBAttenuateRow_SSE2()
3304 pshufhw xmm2, xmm1,0FFh // 8 alpha words in ARGBAttenuateRow_SSE2()
3306 pmulhuw xmm1, xmm2 // rgb * a in ARGBAttenuateRow_SSE2()
3310 psrlw xmm1, 8 in ARGBAttenuateRow_SSE2()
3311 packuswb xmm0, xmm1 in ARGBAttenuateRow_SSE2()
3349 movdqa xmm1, [eax] // read 4 pixels in ARGBAttenuateRow_SSSE3()
3350 punpcklbw xmm1, xmm1 // first 2 pixel rgbs in ARGBAttenuateRow_SSSE3()
3351 pmulhuw xmm0, xmm1 // rgb * a in ARGBAttenuateRow_SSSE3()
3352 movdqa xmm1, [eax] // read 4 pixels in ARGBAttenuateRow_SSSE3()
3353 pshufb xmm1, xmm5 // isolate next 2 alphas in ARGBAttenuateRow_SSSE3()
3356 pmulhuw xmm1, xmm2 // rgb * a in ARGBAttenuateRow_SSSE3()
3360 psrlw xmm1, 8 in ARGBAttenuateRow_SSSE3()
3361 packuswb xmm0, xmm1 in ARGBAttenuateRow_SSSE3()
3402 movdqa xmm1, [eax] // read 4 pixels in ARGBUnattenuateRow_SSE2()
3405 punpckhbw xmm1, xmm1 // next 2 in ARGBUnattenuateRow_SSE2()
3411 pmulhuw xmm1, xmm2 // rgb * a in ARGBUnattenuateRow_SSE2()
3415 packuswb xmm0, xmm1 in ARGBUnattenuateRow_SSE2()
3447 movdqa xmm1, [eax + 16] in ARGBGrayRow_SSSE3()
3449 pmaddubsw xmm1, xmm4 in ARGBGrayRow_SSSE3()
3450 phaddw xmm0, xmm1 in ARGBGrayRow_SSSE3()
3462 movdqa xmm1, xmm0 in ARGBGrayRow_SSSE3()
3464 punpckhwd xmm1, xmm3 // GGGA next 4 in ARGBGrayRow_SSSE3()
3467 movdqa [eax + edx + 16], xmm1 in ARGBGrayRow_SSSE3()
3512 movdqa xmm1, [eax + 16] in ARGBSepiaRow_SSSE3()
3514 pmaddubsw xmm1, xmm3 in ARGBSepiaRow_SSSE3()
3515 phaddw xmm5, xmm1 in ARGBSepiaRow_SSSE3()
3520 movdqa xmm1, [eax + 16] in ARGBSepiaRow_SSSE3()
3522 pmaddubsw xmm1, xmm4 in ARGBSepiaRow_SSSE3()
3523 phaddw xmm5, xmm1 in ARGBSepiaRow_SSSE3()
3527 movdqa xmm1, [eax + 16] in ARGBSepiaRow_SSSE3()
3529 psrld xmm1, 24 in ARGBSepiaRow_SSSE3()
3530 packuswb xmm6, xmm1 in ARGBSepiaRow_SSSE3()
3533 movdqa xmm1, xmm0 // Weave BG, RA together in ARGBSepiaRow_SSSE3()
3535 punpckhwd xmm1, xmm5 // BGRA next 4 in ARGBSepiaRow_SSSE3()
3538 movdqa [eax + 16], xmm1 in ARGBSepiaRow_SSSE3()
3572 movdqa xmm1, [eax + 16] in ARGBColorMatrixRow_SSSE3()
3574 pmaddubsw xmm1, xmm3 in ARGBColorMatrixRow_SSSE3()
3576 phaddsw xmm5, xmm1 // G in ARGBColorMatrixRow_SSSE3()
3583 movdqa xmm1, [eax + 16] in ARGBColorMatrixRow_SSSE3()
3585 pmaddubsw xmm1, xmm4 in ARGBColorMatrixRow_SSSE3()
3586 phaddsw xmm5, xmm1 in ARGBColorMatrixRow_SSSE3()
3590 movdqa xmm1, [eax + 16] in ARGBColorMatrixRow_SSSE3()
3592 psrld xmm1, 24 in ARGBColorMatrixRow_SSSE3()
3593 packuswb xmm6, xmm1 in ARGBColorMatrixRow_SSSE3()
3595 movdqa xmm1, xmm0 // Weave BG, RA together in ARGBColorMatrixRow_SSSE3()
3598 punpckhwd xmm1, xmm5 // BGRA next 4 in ARGBColorMatrixRow_SSSE3()
3601 movdqa [eax + 16], xmm1 in ARGBColorMatrixRow_SSSE3()
3684 movdqa xmm1, [eax] // read 4 pixels in ARGBQuantizeRow_SSE2()
3685 punpckhbw xmm1, xmm5 // next 2 pixels in ARGBQuantizeRow_SSE2()
3686 pmulhuw xmm1, xmm2 in ARGBQuantizeRow_SSE2()
3689 pmullw xmm1, xmm3 in ARGBQuantizeRow_SSE2()
3692 paddw xmm1, xmm4 in ARGBQuantizeRow_SSE2()
3693 packuswb xmm0, xmm1 in ARGBQuantizeRow_SSE2()
3738 movdqa xmm1, [eax + 16] in CumulativeSumToAverage_SSE2()
3744 psubd xmm1, [eax + edx * 4 + 16] in CumulativeSumToAverage_SSE2()
3751 psubd xmm1, [esi + 16] in CumulativeSumToAverage_SSE2()
3757 paddd xmm1, [esi + edx * 4 + 16] in CumulativeSumToAverage_SSE2()
3763 cvtdq2ps xmm1, xmm1 in CumulativeSumToAverage_SSE2()
3765 mulps xmm1, xmm4 in CumulativeSumToAverage_SSE2()
3771 cvtps2dq xmm1, xmm1 in CumulativeSumToAverage_SSE2()
3774 packssdw xmm0, xmm1 in CumulativeSumToAverage_SSE2()
3821 pxor xmm1, xmm1 in ComputeCumulativeSumRow_SSE2()
3835 punpcklbw xmm2, xmm1 in ComputeCumulativeSumRow_SSE2()
3837 punpcklwd xmm2, xmm1 in ComputeCumulativeSumRow_SSE2()
3838 punpckhwd xmm3, xmm1 in ComputeCumulativeSumRow_SSE2()
3840 punpckhbw xmm4, xmm1 in ComputeCumulativeSumRow_SSE2()
3842 punpcklwd xmm4, xmm1 in ComputeCumulativeSumRow_SSE2()
3843 punpckhwd xmm5, xmm1 in ComputeCumulativeSumRow_SSE2()
3879 punpcklbw xmm2, xmm1 in ComputeCumulativeSumRow_SSE2()
3880 punpcklwd xmm2, xmm1 in ComputeCumulativeSumRow_SSE2()
3912 movdqa xmm1, xmm0 in ARGBShadeRow_SSE2()
3914 punpckhbw xmm1, xmm1 // next 2 in ARGBShadeRow_SSE2()
3916 pmulhuw xmm1, xmm2 // argb * value in ARGBShadeRow_SSE2()
3918 psrlw xmm1, 8 in ARGBShadeRow_SSE2()
3919 packuswb xmm0, xmm1 in ARGBShadeRow_SSE2()
3968 cvttps2dq xmm1, xmm3 // x, y float to int next 2 in ARGBAffineRow_SSE2()
3969 packssdw xmm0, xmm1 // x, y as 8 shorts in ARGBAffineRow_SSE2()
3975 movd xmm1, [eax + esi] // read pixel 0 in ARGBAffineRow_SSE2()
3977 punpckldq xmm1, xmm6 // combine pixel 0 and 1 in ARGBAffineRow_SSE2()
3979 movq qword ptr [edx], xmm1 in ARGBAffineRow_SSE2()
4048 movdqa xmm1, xmm0 in ARGBInterpolateRow_SSSE3()
4050 punpckhbw xmm1, xmm2 in ARGBInterpolateRow_SSSE3()
4052 pmaddubsw xmm1, xmm5 in ARGBInterpolateRow_SSSE3()
4054 psrlw xmm1, 7 in ARGBInterpolateRow_SSSE3()
4055 packuswb xmm0, xmm1 in ARGBInterpolateRow_SSSE3()