1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14SECTION .text 15 16;void copy_mem16x16_sse2( 17; unsigned char *src, 18; int src_stride, 19; unsigned char *dst, 20; int dst_stride 21; ) 22global sym(vp8_copy_mem16x16_sse2) PRIVATE 23sym(vp8_copy_mem16x16_sse2): 24 push rbp 25 mov rbp, rsp 26 SHADOW_ARGS_TO_STACK 4 27 push rsi 28 push rdi 29 ; end prolog 30 31 mov rsi, arg(0) ;src; 32 movdqu xmm0, [rsi] 33 34 movsxd rax, dword ptr arg(1) ;src_stride; 35 mov rdi, arg(2) ;dst; 36 37 movdqu xmm1, [rsi+rax] 38 movdqu xmm2, [rsi+rax*2] 39 40 movsxd rcx, dword ptr arg(3) ;dst_stride 41 lea rsi, [rsi+rax*2] 42 43 movdqa [rdi], xmm0 44 add rsi, rax 45 46 movdqa [rdi+rcx], xmm1 47 movdqa [rdi+rcx*2],xmm2 48 49 lea rdi, [rdi+rcx*2] 50 movdqu xmm3, [rsi] 51 52 add rdi, rcx 53 movdqu xmm4, [rsi+rax] 54 55 movdqu xmm5, [rsi+rax*2] 56 lea rsi, [rsi+rax*2] 57 58 movdqa [rdi], xmm3 59 add rsi, rax 60 61 movdqa [rdi+rcx], xmm4 62 movdqa [rdi+rcx*2],xmm5 63 64 lea rdi, [rdi+rcx*2] 65 movdqu xmm0, [rsi] 66 67 add rdi, rcx 68 movdqu xmm1, [rsi+rax] 69 70 movdqu xmm2, [rsi+rax*2] 71 lea rsi, [rsi+rax*2] 72 73 movdqa [rdi], xmm0 74 add rsi, rax 75 76 movdqa [rdi+rcx], xmm1 77 78 movdqa [rdi+rcx*2], xmm2 79 movdqu xmm3, [rsi] 80 81 movdqu xmm4, [rsi+rax] 82 lea rdi, [rdi+rcx*2] 83 84 add rdi, rcx 85 movdqu xmm5, [rsi+rax*2] 86 87 lea rsi, [rsi+rax*2] 88 movdqa [rdi], xmm3 89 90 add rsi, rax 91 movdqa [rdi+rcx], xmm4 92 93 movdqa [rdi+rcx*2],xmm5 94 movdqu xmm0, [rsi] 95 96 lea rdi, [rdi+rcx*2] 97 movdqu xmm1, [rsi+rax] 98 99 add rdi, rcx 100 movdqu xmm2, [rsi+rax*2] 101 102 lea rsi, [rsi+rax*2] 103 movdqa [rdi], xmm0 104 105 movdqa [rdi+rcx], xmm1 106 movdqa [rdi+rcx*2],xmm2 107 108 movdqu xmm3, [rsi+rax] 109 lea rdi, [rdi+rcx*2] 110 111 movdqa [rdi+rcx], xmm3 112 113 ; begin epilog 114 pop rdi 115 pop rsi 116 UNSHADOW_ARGS 117 pop rbp 118 ret 119