1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12%include "vpx_ports/x86_abi_support.asm"
13
14;void copy_mem16x16_sse2(
15;    unsigned char *src,
16;    int src_stride,
17;    unsigned char *dst,
18;    int dst_stride
19;    )
20global sym(vp8_copy_mem16x16_sse2) PRIVATE
21sym(vp8_copy_mem16x16_sse2):
22    push        rbp
23    mov         rbp, rsp
24    SHADOW_ARGS_TO_STACK 4
25    push        rsi
26    push        rdi
27    ; end prolog
28
29        mov         rsi,        arg(0) ;src;
30        movdqu      xmm0,       [rsi]
31
32        movsxd      rax,        dword ptr arg(1) ;src_stride;
33        mov         rdi,        arg(2) ;dst;
34
35        movdqu      xmm1,       [rsi+rax]
36        movdqu      xmm2,       [rsi+rax*2]
37
38        movsxd      rcx,        dword ptr arg(3) ;dst_stride
39        lea         rsi,        [rsi+rax*2]
40
41        movdqa      [rdi],      xmm0
42        add         rsi,        rax
43
44        movdqa      [rdi+rcx],  xmm1
45        movdqa      [rdi+rcx*2],xmm2
46
47        lea         rdi,        [rdi+rcx*2]
48        movdqu      xmm3,       [rsi]
49
50        add         rdi,        rcx
51        movdqu      xmm4,       [rsi+rax]
52
53        movdqu      xmm5,       [rsi+rax*2]
54        lea         rsi,        [rsi+rax*2]
55
56        movdqa      [rdi],  xmm3
57        add         rsi,        rax
58
59        movdqa      [rdi+rcx],  xmm4
60        movdqa      [rdi+rcx*2],xmm5
61
62        lea         rdi,        [rdi+rcx*2]
63        movdqu      xmm0,       [rsi]
64
65        add         rdi,        rcx
66        movdqu      xmm1,       [rsi+rax]
67
68        movdqu      xmm2,       [rsi+rax*2]
69        lea         rsi,        [rsi+rax*2]
70
71        movdqa      [rdi],      xmm0
72        add         rsi,        rax
73
74        movdqa      [rdi+rcx],  xmm1
75
76        movdqa      [rdi+rcx*2],    xmm2
77        movdqu      xmm3,       [rsi]
78
79        movdqu      xmm4,       [rsi+rax]
80        lea         rdi,        [rdi+rcx*2]
81
82        add         rdi,        rcx
83        movdqu      xmm5,       [rsi+rax*2]
84
85        lea         rsi,        [rsi+rax*2]
86        movdqa      [rdi],  xmm3
87
88        add         rsi,        rax
89        movdqa      [rdi+rcx],  xmm4
90
91        movdqa      [rdi+rcx*2],xmm5
92        movdqu      xmm0,       [rsi]
93
94        lea         rdi,        [rdi+rcx*2]
95        movdqu      xmm1,       [rsi+rax]
96
97        add         rdi,        rcx
98        movdqu      xmm2,       [rsi+rax*2]
99
100        lea         rsi,        [rsi+rax*2]
101        movdqa      [rdi],      xmm0
102
103        movdqa      [rdi+rcx],  xmm1
104        movdqa      [rdi+rcx*2],xmm2
105
106        movdqu      xmm3,       [rsi+rax]
107        lea         rdi,        [rdi+rcx*2]
108
109        movdqa      [rdi+rcx],  xmm3
110
111    ; begin epilog
112    pop rdi
113    pop rsi
114    UNSHADOW_ARGS
115    pop         rbp
116    ret
117