1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) 15global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE 16sym(vp8_short_inv_walsh4x4_mmx): 17 push rbp 18 mov rbp, rsp 19 SHADOW_ARGS_TO_STACK 2 20 ; end prolog 21 22 mov rdx, arg(0) 23 mov rax, 30003h 24 25 movq mm0, [rdx + 0] ;ip[0] 26 movq mm1, [rdx + 8] ;ip[4] 27 movq mm7, rax 28 29 movq mm2, [rdx + 16] ;ip[8] 30 movq mm3, [rdx + 24] ;ip[12] 31 punpcklwd mm7, mm7 ;0003000300030003h 32 mov rdx, arg(1) 33 34 movq mm4, mm0 35 movq mm5, mm1 36 37 paddw mm4, mm3 ;ip[0] + ip[12] aka al 38 paddw mm5, mm2 ;ip[4] + ip[8] aka bl 39 40 movq mm6, mm4 ;temp al 41 paddw mm4, mm5 ;al + bl 42 psubw mm6, mm5 ;al - bl 43 44 psubw mm0, mm3 ;ip[0] - ip[12] aka d1 45 psubw mm1, mm2 ;ip[4] - ip[8] aka c1 46 47 movq mm5, mm0 ;temp dl 48 paddw mm0, mm1 ;dl + cl 49 psubw mm5, mm1 ;dl - cl 50 51 ; 03 02 01 00 52 ; 13 12 11 10 53 ; 23 22 21 20 54 ; 33 32 31 30 55 56 movq mm3, mm4 ; 03 02 01 00 57 punpcklwd mm4, mm0 ; 11 01 10 00 58 punpckhwd mm3, mm0 ; 13 03 12 02 59 60 movq mm1, mm6 ; 23 22 21 20 61 punpcklwd mm6, mm5 ; 31 21 30 20 62 punpckhwd mm1, mm5 ; 33 23 32 22 63 64 movq mm0, mm4 ; 11 01 10 00 65 movq mm2, mm3 ; 13 03 12 02 66 67 punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] 68 punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] 69 70 punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] 71 punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] 72;~~~~~~~~~~~~~~~~~~~~~ 73 movq mm1, mm0 74 movq mm5, mm4 75 paddw mm1, mm3 ;ip[0] + ip[12] aka al 76 paddw mm5, mm2 ;ip[4] + ip[8] aka bl 77 78 movq mm6, mm1 ;temp al 79 paddw mm1, mm5 ;al + bl 80 psubw mm6, mm5 ;al - bl 81 paddw mm1, mm7 82 paddw mm6, mm7 83 psraw mm1, 3 84 psraw mm6, 3 85 86 psubw mm0, mm3 ;ip[0] - ip[12] aka d1 87 psubw mm4, mm2 ;ip[4] - ip[8] aka c1 88 89 movq mm5, mm0 ;temp dl 90 paddw mm0, mm4 ;dl + cl 91 psubw mm5, mm4 ;dl - cl 92 paddw mm0, mm7 93 paddw mm5, mm7 94 psraw mm0, 3 95 psraw mm5, 3 96;~~~~~~~~~~~~~~~~~~~~~ 97 98 movd eax, mm1 99 movd ecx, mm0 100 psrlq mm0, 32 101 psrlq mm1, 32 102 mov word ptr[rdx+32*0], ax 103 mov word ptr[rdx+32*1], cx 104 shr eax, 16 105 shr ecx, 16 106 mov word ptr[rdx+32*4], ax 107 mov word ptr[rdx+32*5], cx 108 movd eax, mm1 109 movd ecx, mm0 110 mov word ptr[rdx+32*8], ax 111 mov word ptr[rdx+32*9], cx 112 shr eax, 16 113 shr ecx, 16 114 mov word ptr[rdx+32*12], ax 115 mov word ptr[rdx+32*13], cx 116 117 movd eax, mm6 118 movd ecx, mm5 119 psrlq mm5, 32 120 psrlq mm6, 32 121 mov word ptr[rdx+32*2], ax 122 mov word ptr[rdx+32*3], cx 123 shr eax, 16 124 shr ecx, 16 125 mov word ptr[rdx+32*6], ax 126 mov word ptr[rdx+32*7], cx 127 movd eax, mm6 128 movd ecx, mm5 129 mov word ptr[rdx+32*10], ax 130 mov word ptr[rdx+32*11], cx 131 shr eax, 16 132 shr ecx, 16 133 mov word ptr[rdx+32*14], ax 134 mov word ptr[rdx+32*15], cx 135 136 ; begin epilog 137 UNSHADOW_ARGS 138 pop rbp 139 ret 140 141