1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14 15;void copy_mem8x8_mmx( 16; unsigned char *src, 17; int src_stride, 18; unsigned char *dst, 19; int dst_stride 20; ) 21global sym(vp8_copy_mem8x8_mmx) PRIVATE 22sym(vp8_copy_mem8x8_mmx): 23 push rbp 24 mov rbp, rsp 25 SHADOW_ARGS_TO_STACK 4 26 push rsi 27 push rdi 28 ; end prolog 29 30 mov rsi, arg(0) ;src; 31 movq mm0, [rsi] 32 33 movsxd rax, dword ptr arg(1) ;src_stride; 34 mov rdi, arg(2) ;dst; 35 36 movq mm1, [rsi+rax] 37 movq mm2, [rsi+rax*2] 38 39 movsxd rcx, dword ptr arg(3) ;dst_stride 40 lea rsi, [rsi+rax*2] 41 42 movq [rdi], mm0 43 add rsi, rax 44 45 movq [rdi+rcx], mm1 46 movq [rdi+rcx*2], mm2 47 48 49 lea rdi, [rdi+rcx*2] 50 movq mm3, [rsi] 51 52 add rdi, rcx 53 movq mm4, [rsi+rax] 54 55 movq mm5, [rsi+rax*2] 56 movq [rdi], mm3 57 58 lea rsi, [rsi+rax*2] 59 movq [rdi+rcx], mm4 60 61 movq [rdi+rcx*2], mm5 62 lea rdi, [rdi+rcx*2] 63 64 movq mm0, [rsi+rax] 65 movq mm1, [rsi+rax*2] 66 67 movq [rdi+rcx], mm0 68 movq [rdi+rcx*2],mm1 69 70 ; begin epilog 71 pop rdi 72 pop rsi 73 UNSHADOW_ARGS 74 pop rbp 75 ret 76 77 78;void copy_mem8x4_mmx( 79; unsigned char *src, 80; int src_stride, 81; unsigned char *dst, 82; int dst_stride 83; ) 84global sym(vp8_copy_mem8x4_mmx) PRIVATE 85sym(vp8_copy_mem8x4_mmx): 86 push rbp 87 mov rbp, rsp 88 SHADOW_ARGS_TO_STACK 4 89 push rsi 90 push rdi 91 ; end prolog 92 93 mov rsi, arg(0) ;src; 94 movq mm0, [rsi] 95 96 movsxd rax, dword ptr arg(1) ;src_stride; 97 mov rdi, arg(2) ;dst; 98 99 movq mm1, [rsi+rax] 100 movq mm2, [rsi+rax*2] 101 102 movsxd rcx, dword ptr arg(3) ;dst_stride 103 lea rsi, [rsi+rax*2] 104 105 movq [rdi], mm0 106 movq [rdi+rcx], mm1 107 108 movq [rdi+rcx*2], mm2 109 lea rdi, [rdi+rcx*2] 110 111 movq mm3, [rsi+rax] 112 movq [rdi+rcx], mm3 113 114 ; begin epilog 115 pop rdi 116 pop rsi 117 UNSHADOW_ARGS 118 pop rbp 119 ret 120 121 122;void copy_mem16x16_mmx( 123; unsigned char *src, 124; int src_stride, 125; unsigned char *dst, 126; int dst_stride 127; ) 128global sym(vp8_copy_mem16x16_mmx) PRIVATE 129sym(vp8_copy_mem16x16_mmx): 130 push rbp 131 mov rbp, rsp 132 SHADOW_ARGS_TO_STACK 4 133 push rsi 134 push rdi 135 ; end prolog 136 137 mov rsi, arg(0) ;src; 138 movsxd rax, dword ptr arg(1) ;src_stride; 139 140 mov rdi, arg(2) ;dst; 141 movsxd rcx, dword ptr arg(3) ;dst_stride 142 143 movq mm0, [rsi] 144 movq mm3, [rsi+8]; 145 146 movq mm1, [rsi+rax] 147 movq mm4, [rsi+rax+8] 148 149 movq mm2, [rsi+rax*2] 150 movq mm5, [rsi+rax*2+8] 151 152 lea rsi, [rsi+rax*2] 153 add rsi, rax 154 155 movq [rdi], mm0 156 movq [rdi+8], mm3 157 158 movq [rdi+rcx], mm1 159 movq [rdi+rcx+8], mm4 160 161 movq [rdi+rcx*2], mm2 162 movq [rdi+rcx*2+8], mm5 163 164 lea rdi, [rdi+rcx*2] 165 add rdi, rcx 166 167 movq mm0, [rsi] 168 movq mm3, [rsi+8]; 169 170 movq mm1, [rsi+rax] 171 movq mm4, [rsi+rax+8] 172 173 movq mm2, [rsi+rax*2] 174 movq mm5, [rsi+rax*2+8] 175 176 lea rsi, [rsi+rax*2] 177 add rsi, rax 178 179 movq [rdi], mm0 180 movq [rdi+8], mm3 181 182 movq [rdi+rcx], mm1 183 movq [rdi+rcx+8], mm4 184 185 movq [rdi+rcx*2], mm2 186 movq [rdi+rcx*2+8], mm5 187 188 lea rdi, [rdi+rcx*2] 189 add rdi, rcx 190 191 movq mm0, [rsi] 192 movq mm3, [rsi+8]; 193 194 movq mm1, [rsi+rax] 195 movq mm4, [rsi+rax+8] 196 197 movq mm2, [rsi+rax*2] 198 movq mm5, [rsi+rax*2+8] 199 200 lea rsi, [rsi+rax*2] 201 add rsi, rax 202 203 movq [rdi], mm0 204 movq [rdi+8], mm3 205 206 movq [rdi+rcx], mm1 207 movq [rdi+rcx+8], mm4 208 209 movq [rdi+rcx*2], mm2 210 movq [rdi+rcx*2+8], mm5 211 212 lea rdi, [rdi+rcx*2] 213 add rdi, rcx 214 215 movq mm0, [rsi] 216 movq mm3, [rsi+8]; 217 218 movq mm1, [rsi+rax] 219 movq mm4, [rsi+rax+8] 220 221 movq mm2, [rsi+rax*2] 222 movq mm5, [rsi+rax*2+8] 223 224 lea rsi, [rsi+rax*2] 225 add rsi, rax 226 227 movq [rdi], mm0 228 movq [rdi+8], mm3 229 230 movq [rdi+rcx], mm1 231 movq [rdi+rcx+8], mm4 232 233 movq [rdi+rcx*2], mm2 234 movq [rdi+rcx*2+8], mm5 235 236 lea rdi, [rdi+rcx*2] 237 add rdi, rcx 238 239 movq mm0, [rsi] 240 movq mm3, [rsi+8]; 241 242 movq mm1, [rsi+rax] 243 movq mm4, [rsi+rax+8] 244 245 movq mm2, [rsi+rax*2] 246 movq mm5, [rsi+rax*2+8] 247 248 lea rsi, [rsi+rax*2] 249 add rsi, rax 250 251 movq [rdi], mm0 252 movq [rdi+8], mm3 253 254 movq [rdi+rcx], mm1 255 movq [rdi+rcx+8], mm4 256 257 movq [rdi+rcx*2], mm2 258 movq [rdi+rcx*2+8], mm5 259 260 lea rdi, [rdi+rcx*2] 261 add rdi, rcx 262 263 movq mm0, [rsi] 264 movq mm3, [rsi+8]; 265 266 movq [rdi], mm0 267 movq [rdi+8], mm3 268 269 ; begin epilog 270 pop rdi 271 pop rsi 272 UNSHADOW_ARGS 273 pop rbp 274 ret 275