1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, 15; short *diff, unsigned char *Predictor, 16; int pitch); 17global sym(vp8_subtract_b_mmx_impl) PRIVATE 18sym(vp8_subtract_b_mmx_impl): 19 push rbp 20 mov rbp, rsp 21 SHADOW_ARGS_TO_STACK 5 22 push rsi 23 push rdi 24 ; end prolog 25 26 27 mov rdi, arg(2) ;diff 28 mov rax, arg(3) ;Predictor 29 mov rsi, arg(0) ;z 30 movsxd rdx, dword ptr arg(1);src_stride; 31 movsxd rcx, dword ptr arg(4);pitch 32 pxor mm7, mm7 33 34 movd mm0, [rsi] 35 movd mm1, [rax] 36 punpcklbw mm0, mm7 37 punpcklbw mm1, mm7 38 psubw mm0, mm1 39 movq [rdi], mm0 40 41 42 movd mm0, [rsi+rdx] 43 movd mm1, [rax+rcx] 44 punpcklbw mm0, mm7 45 punpcklbw mm1, mm7 46 psubw mm0, mm1 47 movq [rdi+rcx*2],mm0 48 49 50 movd mm0, [rsi+rdx*2] 51 movd mm1, [rax+rcx*2] 52 punpcklbw mm0, mm7 53 punpcklbw mm1, mm7 54 psubw mm0, mm1 55 movq [rdi+rcx*4], mm0 56 57 lea rsi, [rsi+rdx*2] 58 lea rcx, [rcx+rcx*2] 59 60 61 62 movd mm0, [rsi+rdx] 63 movd mm1, [rax+rcx] 64 punpcklbw mm0, mm7 65 punpcklbw mm1, mm7 66 psubw mm0, mm1 67 movq [rdi+rcx*2], mm0 68 69 ; begin epilog 70 pop rdi 71 pop rsi 72 UNSHADOW_ARGS 73 pop rbp 74 ret 75 76;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, 77;unsigned char *pred, int pred_stride) 78global sym(vp8_subtract_mby_mmx) PRIVATE 79sym(vp8_subtract_mby_mmx): 80 push rbp 81 mov rbp, rsp 82 SHADOW_ARGS_TO_STACK 5 83 push rsi 84 push rdi 85 ; end prolog 86 87 mov rdi, arg(0) ;diff 88 mov rsi, arg(1) ;src 89 movsxd rdx, dword ptr arg(2);src_stride 90 mov rax, arg(3) ;pred 91 push rbx 92 movsxd rbx, dword ptr arg(4);pred_stride 93 94 pxor mm0, mm0 95 mov rcx, 16 96 97 98.submby_loop: 99 movq mm1, [rsi] 100 movq mm3, [rax] 101 102 movq mm2, mm1 103 movq mm4, mm3 104 105 punpcklbw mm1, mm0 106 punpcklbw mm3, mm0 107 108 punpckhbw mm2, mm0 109 punpckhbw mm4, mm0 110 111 psubw mm1, mm3 112 psubw mm2, mm4 113 114 movq [rdi], mm1 115 movq [rdi+8], mm2 116 117 movq mm1, [rsi+8] 118 movq mm3, [rax+8] 119 120 movq mm2, mm1 121 movq mm4, mm3 122 123 punpcklbw mm1, mm0 124 punpcklbw mm3, mm0 125 126 punpckhbw mm2, mm0 127 punpckhbw mm4, mm0 128 129 psubw mm1, mm3 130 psubw mm2, mm4 131 132 movq [rdi+16], mm1 133 movq [rdi+24], mm2 134 add rdi, 32 135 lea rax, [rax+rbx] 136 lea rsi, [rsi+rdx] 137 dec rcx 138 jnz .submby_loop 139 140 pop rbx 141 pop rdi 142 pop rsi 143 ; begin epilog 144 UNSHADOW_ARGS 145 pop rbp 146 ret 147 148 149;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, 150; int src_stride, unsigned char *upred, 151; unsigned char *vpred, int pred_stride) 152 153global sym(vp8_subtract_mbuv_mmx) PRIVATE 154sym(vp8_subtract_mbuv_mmx): 155 push rbp 156 mov rbp, rsp 157 SHADOW_ARGS_TO_STACK 7 158 push rsi 159 push rdi 160 ; end prolog 161 162 mov rdi, arg(0) ;diff 163 mov rsi, arg(1) ;usrc 164 movsxd rdx, dword ptr arg(3);src_stride; 165 mov rax, arg(4) ;upred 166 add rdi, 256*2 ;diff = diff + 256 (shorts) 167 mov rcx, 8 168 push rbx 169 movsxd rbx, dword ptr arg(6);pred_stride 170 171 pxor mm7, mm7 172 173.submbu_loop: 174 movq mm0, [rsi] 175 movq mm1, [rax] 176 movq mm3, mm0 177 movq mm4, mm1 178 punpcklbw mm0, mm7 179 punpcklbw mm1, mm7 180 punpckhbw mm3, mm7 181 punpckhbw mm4, mm7 182 psubw mm0, mm1 183 psubw mm3, mm4 184 movq [rdi], mm0 185 movq [rdi+8], mm3 186 add rdi, 16 187 add rsi, rdx 188 add rax, rbx 189 190 dec rcx 191 jnz .submbu_loop 192 193 mov rsi, arg(2) ;vsrc 194 mov rax, arg(5) ;vpred 195 mov rcx, 8 196 197.submbv_loop: 198 movq mm0, [rsi] 199 movq mm1, [rax] 200 movq mm3, mm0 201 movq mm4, mm1 202 punpcklbw mm0, mm7 203 punpcklbw mm1, mm7 204 punpckhbw mm3, mm7 205 punpckhbw mm4, mm7 206 psubw mm0, mm1 207 psubw mm3, mm4 208 movq [rdi], mm0 209 movq [rdi+8], mm3 210 add rdi, 16 211 add rsi, rdx 212 add rax, rbx 213 214 dec rcx 215 jnz .submbv_loop 216 217 pop rbx 218 ; begin epilog 219 pop rdi 220 pop rsi 221 UNSHADOW_ARGS 222 pop rbp 223 ret 224