1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void copy_mem16x16_sse2( 15; unsigned char *src, 16; int src_stride, 17; unsigned char *dst, 18; int dst_stride 19; ) 20global sym(vp8_copy_mem16x16_sse2) PRIVATE 21sym(vp8_copy_mem16x16_sse2): 22 push rbp 23 mov rbp, rsp 24 SHADOW_ARGS_TO_STACK 4 25 push rsi 26 push rdi 27 ; end prolog 28 29 mov rsi, arg(0) ;src; 30 movdqu xmm0, [rsi] 31 32 movsxd rax, dword ptr arg(1) ;src_stride; 33 mov rdi, arg(2) ;dst; 34 35 movdqu xmm1, [rsi+rax] 36 movdqu xmm2, [rsi+rax*2] 37 38 movsxd rcx, dword ptr arg(3) ;dst_stride 39 lea rsi, [rsi+rax*2] 40 41 movdqa [rdi], xmm0 42 add rsi, rax 43 44 movdqa [rdi+rcx], xmm1 45 movdqa [rdi+rcx*2],xmm2 46 47 lea rdi, [rdi+rcx*2] 48 movdqu xmm3, [rsi] 49 50 add rdi, rcx 51 movdqu xmm4, [rsi+rax] 52 53 movdqu xmm5, [rsi+rax*2] 54 lea rsi, [rsi+rax*2] 55 56 movdqa [rdi], xmm3 57 add rsi, rax 58 59 movdqa [rdi+rcx], xmm4 60 movdqa [rdi+rcx*2],xmm5 61 62 lea rdi, [rdi+rcx*2] 63 movdqu xmm0, [rsi] 64 65 add rdi, rcx 66 movdqu xmm1, [rsi+rax] 67 68 movdqu xmm2, [rsi+rax*2] 69 lea rsi, [rsi+rax*2] 70 71 movdqa [rdi], xmm0 72 add rsi, rax 73 74 movdqa [rdi+rcx], xmm1 75 76 movdqa [rdi+rcx*2], xmm2 77 movdqu xmm3, [rsi] 78 79 movdqu xmm4, [rsi+rax] 80 lea rdi, [rdi+rcx*2] 81 82 add rdi, rcx 83 movdqu xmm5, [rsi+rax*2] 84 85 lea rsi, [rsi+rax*2] 86 movdqa [rdi], xmm3 87 88 add rsi, rax 89 movdqa [rdi+rcx], xmm4 90 91 movdqa [rdi+rcx*2],xmm5 92 movdqu xmm0, [rsi] 93 94 lea rdi, [rdi+rcx*2] 95 movdqu xmm1, [rsi+rax] 96 97 add rdi, rcx 98 movdqu xmm2, [rsi+rax*2] 99 100 lea rsi, [rsi+rax*2] 101 movdqa [rdi], xmm0 102 103 movdqa [rdi+rcx], xmm1 104 movdqa [rdi+rcx*2],xmm2 105 106 movdqu xmm3, [rsi+rax] 107 lea rdi, [rdi+rcx*2] 108 109 movdqa [rdi+rcx], xmm3 110 111 ; begin epilog 112 pop rdi 113 pop rsi 114 UNSHADOW_ARGS 115 pop rbp 116 ret 117