1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, 15; short *diff, unsigned char *Predictor, 16; int pitch); 17global sym(vp8_subtract_b_sse2_impl) PRIVATE 18sym(vp8_subtract_b_sse2_impl): 19 push rbp 20 mov rbp, rsp 21 SHADOW_ARGS_TO_STACK 5 22 GET_GOT rbx 23 push rsi 24 push rdi 25 ; end prolog 26 27 mov rdi, arg(2) ;diff 28 mov rax, arg(3) ;Predictor 29 mov rsi, arg(0) ;z 30 movsxd rdx, dword ptr arg(1);src_stride; 31 movsxd rcx, dword ptr arg(4);pitch 32 pxor mm7, mm7 33 34 movd mm0, [rsi] 35 movd mm1, [rax] 36 punpcklbw mm0, mm7 37 punpcklbw mm1, mm7 38 psubw mm0, mm1 39 movq MMWORD PTR [rdi], mm0 40 41 movd mm0, [rsi+rdx] 42 movd mm1, [rax+rcx] 43 punpcklbw mm0, mm7 44 punpcklbw mm1, mm7 45 psubw mm0, mm1 46 movq MMWORD PTR [rdi+rcx*2], mm0 47 48 movd mm0, [rsi+rdx*2] 49 movd mm1, [rax+rcx*2] 50 punpcklbw mm0, mm7 51 punpcklbw mm1, mm7 52 psubw mm0, mm1 53 movq MMWORD PTR [rdi+rcx*4], mm0 54 55 lea rsi, [rsi+rdx*2] 56 lea rcx, [rcx+rcx*2] 57 58 movd mm0, [rsi+rdx] 59 movd mm1, [rax+rcx] 60 punpcklbw mm0, mm7 61 punpcklbw mm1, mm7 62 psubw mm0, mm1 63 movq MMWORD PTR [rdi+rcx*2], mm0 64 65 ; begin epilog 66 pop rdi 67 pop rsi 68 RESTORE_GOT 69 UNSHADOW_ARGS 70 pop rbp 71 ret 72 73 74;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, 75;unsigned char *pred, int pred_stride) 76global sym(vp8_subtract_mby_sse2) PRIVATE 77sym(vp8_subtract_mby_sse2): 78 push rbp 79 mov rbp, rsp 80 SHADOW_ARGS_TO_STACK 5 81 GET_GOT rbx 82 push rsi 83 push rdi 84 ; end prolog 85 86 mov rdi, arg(0) ;diff 87 mov rsi, arg(1) ;src 88 movsxd rdx, dword ptr arg(2);src_stride 89 mov rax, arg(3) ;pred 90 movdqa xmm4, [GLOBAL(t80)] 91 push rbx 92 mov rcx, 8 ; do two lines at one time 93 movsxd rbx, dword ptr arg(4);pred_stride 94 95.submby_loop: 96 movdqa xmm0, [rsi] ; src 97 movdqa xmm1, [rax] ; pred 98 99 movdqa xmm2, xmm0 100 psubb xmm0, xmm1 101 102 pxor xmm1, xmm4 ;convert to signed values 103 pxor xmm2, xmm4 104 pcmpgtb xmm1, xmm2 ; obtain sign information 105 106 movdqa xmm2, xmm0 107 punpcklbw xmm0, xmm1 ; put sign back to subtraction 108 punpckhbw xmm2, xmm1 ; put sign back to subtraction 109 110 movdqa xmm3, [rsi + rdx] 111 movdqa xmm5, [rax + rbx] 112 113 lea rsi, [rsi+rdx*2] 114 lea rax, [rax+rbx*2] 115 116 movdqa [rdi], xmm0 117 movdqa [rdi +16], xmm2 118 119 movdqa xmm1, xmm3 120 psubb xmm3, xmm5 121 122 pxor xmm5, xmm4 ;convert to signed values 123 pxor xmm1, xmm4 124 pcmpgtb xmm5, xmm1 ; obtain sign information 125 126 movdqa xmm1, xmm3 127 punpcklbw xmm3, xmm5 ; put sign back to subtraction 128 punpckhbw xmm1, xmm5 ; put sign back to subtraction 129 130 movdqa [rdi +32], xmm3 131 movdqa [rdi +48], xmm1 132 133 add rdi, 64 134 dec rcx 135 jnz .submby_loop 136 137 pop rbx 138 pop rdi 139 pop rsi 140 ; begin epilog 141 RESTORE_GOT 142 UNSHADOW_ARGS 143 pop rbp 144 ret 145 146;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, 147; int src_stride, unsigned char *upred, 148; unsigned char *vpred, int pred_stride) 149global sym(vp8_subtract_mbuv_sse2) PRIVATE 150sym(vp8_subtract_mbuv_sse2): 151 push rbp 152 mov rbp, rsp 153 SHADOW_ARGS_TO_STACK 7 154 GET_GOT rbx 155 push rsi 156 push rdi 157 ; end prolog 158 159 movdqa xmm4, [GLOBAL(t80)] 160 mov rdi, arg(0) ;diff 161 mov rsi, arg(1) ;usrc 162 movsxd rdx, dword ptr arg(3);src_stride; 163 mov rax, arg(4) ;upred 164 add rdi, 256*2 ;diff = diff + 256 (shorts) 165 mov rcx, 4 166 push rbx 167 movsxd rbx, dword ptr arg(6);pred_stride 168 169 ;u 170.submbu_loop: 171 movq xmm0, [rsi] ; src 172 movq xmm2, [rsi+rdx] ; src -- next line 173 movq xmm1, [rax] ; pred 174 movq xmm3, [rax+rbx] ; pred -- next line 175 lea rsi, [rsi + rdx*2] 176 lea rax, [rax + rbx*2] 177 178 punpcklqdq xmm0, xmm2 179 punpcklqdq xmm1, xmm3 180 181 movdqa xmm2, xmm0 182 psubb xmm0, xmm1 ; subtraction with sign missed 183 184 pxor xmm1, xmm4 ;convert to signed values 185 pxor xmm2, xmm4 186 pcmpgtb xmm1, xmm2 ; obtain sign information 187 188 movdqa xmm2, xmm0 189 movdqa xmm3, xmm1 190 punpcklbw xmm0, xmm1 ; put sign back to subtraction 191 punpckhbw xmm2, xmm3 ; put sign back to subtraction 192 193 movdqa [rdi], xmm0 ; store difference 194 movdqa [rdi +16], xmm2 ; store difference 195 add rdi, 32 196 sub rcx, 1 197 jnz .submbu_loop 198 199 mov rsi, arg(2) ;vsrc 200 mov rax, arg(5) ;vpred 201 mov rcx, 4 202 203 ;v 204.submbv_loop: 205 movq xmm0, [rsi] ; src 206 movq xmm2, [rsi+rdx] ; src -- next line 207 movq xmm1, [rax] ; pred 208 movq xmm3, [rax+rbx] ; pred -- next line 209 lea rsi, [rsi + rdx*2] 210 lea rax, [rax + rbx*2] 211 212 punpcklqdq xmm0, xmm2 213 punpcklqdq xmm1, xmm3 214 215 movdqa xmm2, xmm0 216 psubb xmm0, xmm1 ; subtraction with sign missed 217 218 pxor xmm1, xmm4 ;convert to signed values 219 pxor xmm2, xmm4 220 pcmpgtb xmm1, xmm2 ; obtain sign information 221 222 movdqa xmm2, xmm0 223 movdqa xmm3, xmm1 224 punpcklbw xmm0, xmm1 ; put sign back to subtraction 225 punpckhbw xmm2, xmm3 ; put sign back to subtraction 226 227 movdqa [rdi], xmm0 ; store difference 228 movdqa [rdi +16], xmm2 ; store difference 229 add rdi, 32 230 sub rcx, 1 231 jnz .submbv_loop 232 233 pop rbx 234 ; begin epilog 235 pop rdi 236 pop rsi 237 RESTORE_GOT 238 UNSHADOW_ARGS 239 pop rbp 240 ret 241 242SECTION_RODATA 243align 16 244t80: 245 times 16 db 0x80 246