1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_sad8x8_neon|
13    EXPORT  |vp8_sad8x16_neon|
14    EXPORT  |vp8_sad4x4_neon|
15
16    ARM
17    REQUIRE8
18    PRESERVE8
19
20    AREA ||.text||, CODE, READONLY, ALIGN=2
21; unsigned int vp8_sad8x8_c(
22;    unsigned char *src_ptr,
23;    int  src_stride,
24;    unsigned char *ref_ptr,
25;    int  ref_stride)
26
27|vp8_sad8x8_neon| PROC
28    vld1.8          {d0}, [r0], r1
29    vld1.8          {d8}, [r2], r3
30
31    vld1.8          {d2}, [r0], r1
32    vld1.8          {d10}, [r2], r3
33
34    vabdl.u8        q12, d0, d8
35
36    vld1.8          {d4}, [r0], r1
37    vld1.8          {d12}, [r2], r3
38
39    vabal.u8        q12, d2, d10
40
41    vld1.8          {d6}, [r0], r1
42    vld1.8          {d14}, [r2], r3
43
44    vabal.u8        q12, d4, d12
45
46    vld1.8          {d0}, [r0], r1
47    vld1.8          {d8}, [r2], r3
48
49    vabal.u8        q12, d6, d14
50
51    vld1.8          {d2}, [r0], r1
52    vld1.8          {d10}, [r2], r3
53
54    vabal.u8        q12, d0, d8
55
56    vld1.8          {d4}, [r0], r1
57    vld1.8          {d12}, [r2], r3
58
59    vabal.u8        q12, d2, d10
60
61    vld1.8          {d6}, [r0], r1
62    vld1.8          {d14}, [r2], r3
63
64    vabal.u8        q12, d4, d12
65    vabal.u8        q12, d6, d14
66
67    vpaddl.u16      q1, q12
68    vpaddl.u32      q0, q1
69    vadd.u32        d0, d0, d1
70
71    vmov.32         r0, d0[0]
72
73    bx              lr
74
75    ENDP
76
77;============================
78;unsigned int vp8_sad8x16_c(
79;    unsigned char *src_ptr,
80;    int  src_stride,
81;    unsigned char *ref_ptr,
82;    int  ref_stride)
83
84|vp8_sad8x16_neon| PROC
85    vld1.8          {d0}, [r0], r1
86    vld1.8          {d8}, [r2], r3
87
88    vld1.8          {d2}, [r0], r1
89    vld1.8          {d10}, [r2], r3
90
91    vabdl.u8        q12, d0, d8
92
93    vld1.8          {d4}, [r0], r1
94    vld1.8          {d12}, [r2], r3
95
96    vabal.u8        q12, d2, d10
97
98    vld1.8          {d6}, [r0], r1
99    vld1.8          {d14}, [r2], r3
100
101    vabal.u8        q12, d4, d12
102
103    vld1.8          {d0}, [r0], r1
104    vld1.8          {d8}, [r2], r3
105
106    vabal.u8        q12, d6, d14
107
108    vld1.8          {d2}, [r0], r1
109    vld1.8          {d10}, [r2], r3
110
111    vabal.u8        q12, d0, d8
112
113    vld1.8          {d4}, [r0], r1
114    vld1.8          {d12}, [r2], r3
115
116    vabal.u8        q12, d2, d10
117
118    vld1.8          {d6}, [r0], r1
119    vld1.8          {d14}, [r2], r3
120
121    vabal.u8        q12, d4, d12
122
123    vld1.8          {d0}, [r0], r1
124    vld1.8          {d8}, [r2], r3
125
126    vabal.u8        q12, d6, d14
127
128    vld1.8          {d2}, [r0], r1
129    vld1.8          {d10}, [r2], r3
130
131    vabal.u8        q12, d0, d8
132
133    vld1.8          {d4}, [r0], r1
134    vld1.8          {d12}, [r2], r3
135
136    vabal.u8        q12, d2, d10
137
138    vld1.8          {d6}, [r0], r1
139    vld1.8          {d14}, [r2], r3
140
141    vabal.u8        q12, d4, d12
142
143    vld1.8          {d0}, [r0], r1
144    vld1.8          {d8}, [r2], r3
145
146    vabal.u8        q12, d6, d14
147
148    vld1.8          {d2}, [r0], r1
149    vld1.8          {d10}, [r2], r3
150
151    vabal.u8        q12, d0, d8
152
153    vld1.8          {d4}, [r0], r1
154    vld1.8          {d12}, [r2], r3
155
156    vabal.u8        q12, d2, d10
157
158    vld1.8          {d6}, [r0], r1
159    vld1.8          {d14}, [r2], r3
160
161    vabal.u8        q12, d4, d12
162    vabal.u8        q12, d6, d14
163
164    vpaddl.u16      q1, q12
165    vpaddl.u32      q0, q1
166    vadd.u32        d0, d0, d1
167
168    vmov.32         r0, d0[0]
169
170    bx              lr
171
172    ENDP
173
174;===========================
175;unsigned int vp8_sad4x4_c(
176;    unsigned char *src_ptr,
177;    int  src_stride,
178;    unsigned char *ref_ptr,
179;    int  ref_stride)
180
181|vp8_sad4x4_neon| PROC
182    vld1.8          {d0}, [r0], r1
183    vld1.8          {d8}, [r2], r3
184
185    vld1.8          {d2}, [r0], r1
186    vld1.8          {d10}, [r2], r3
187
188    vabdl.u8        q12, d0, d8
189
190    vld1.8          {d4}, [r0], r1
191    vld1.8          {d12}, [r2], r3
192
193    vabal.u8        q12, d2, d10
194
195    vld1.8          {d6}, [r0], r1
196    vld1.8          {d14}, [r2], r3
197
198    vabal.u8        q12, d4, d12
199    vabal.u8        q12, d6, d14
200
201    vpaddl.u16      d1, d24
202    vpaddl.u32      d0, d1
203    vmov.32         r0, d0[0]
204
205    bx              lr
206
207    ENDP
208
209    END
210