1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11    EXPORT |vp8_short_inv_walsh4x4_v6|
12
13    ARM
14    REQUIRE8
15    PRESERVE8
16
17    AREA    |.text|, CODE, READONLY  ; name this block of code
18
19;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
20|vp8_short_inv_walsh4x4_v6| PROC
21
22    stmdb       sp!, {r4 - r12, lr}
23
24    ldr         r2, [r0, #0]         ; [1  |  0]
25    ldr         r3, [r0, #4]         ; [3  |  2]
26    ldr         r4, [r0, #8]         ; [5  |  4]
27    ldr         r5, [r0, #12]        ; [7  |  6]
28    ldr         r6, [r0, #16]        ; [9  |  8]
29    ldr         r7, [r0, #20]        ; [11 | 10]
30    ldr         r8, [r0, #24]        ; [13 | 12]
31    ldr         r9, [r0, #28]        ; [15 | 14]
32
33    qadd16      r10, r2, r8          ; a1 [1+13  |  0+12]
34    qadd16      r11, r4, r6          ; b1 [5+9   |  4+8]
35    qsub16      r12, r4, r6          ; c1 [5-9   |  4-8]
36    qsub16      lr, r2, r8           ; d1 [1-13  |  0-12]
37
38    qadd16      r2, r10, r11         ; a1 + b1 [1  |  0]
39    qadd16      r4, r12, lr          ; c1 + d1 [5  |  4]
40    qsub16      r6, r10, r11         ; a1 - b1 [9  |  8]
41    qsub16      r8, lr, r12          ; d1 - c1 [13 | 12]
42
43    qadd16      r10, r3, r9          ; a1 [3+15  |  2+14]
44    qadd16      r11, r5, r7          ; b1 [7+11  |  6+10]
45    qsub16      r12, r5, r7          ; c1 [7-11  |  6-10]
46    qsub16      lr, r3, r9           ; d1 [3-15  |  2-14]
47
48    qadd16      r3, r10, r11         ; a1 + b1 [3  |  2]
49    qadd16      r5, r12, lr          ; c1 + d1 [7  |  6]
50    qsub16      r7, r10, r11         ; a1 - b1 [11 | 10]
51    qsub16      r9, lr, r12          ; d1 - c1 [15 | 14]
52
53    ; first transform complete
54
55    qsubaddx    r10, r2, r3          ; [c1|a1] [1-2   |   0+3]
56    qaddsubx    r11, r2, r3          ; [b1|d1] [1+2   |   0-3]
57    qsubaddx    r12, r4, r5          ; [c1|a1] [5-6   |   4+7]
58    qaddsubx    lr, r4, r5           ; [b1|d1] [5+6   |   4-7]
59
60    qaddsubx    r2, r10, r11         ; [b2|c2] [c1+d1 | a1-b1]
61    qaddsubx    r3, r11, r10         ; [a2|d2] [b1+a1 | d1-c1]
62    ldr         r10, c0x00030003
63    qaddsubx    r4, r12, lr          ; [b2|c2] [c1+d1 | a1-b1]
64    qaddsubx    r5, lr, r12          ; [a2|d2] [b1+a1 | d1-c1]
65
66    qadd16      r2, r2, r10          ; [b2+3|c2+3]
67    qadd16      r3, r3, r10          ; [a2+3|d2+3]
68    qadd16      r4, r4, r10          ; [b2+3|c2+3]
69    qadd16      r5, r5, r10          ; [a2+3|d2+3]
70
71    asr         r12, r3, #19         ; [0]
72    strh        r12, [r1], #32
73    asr         lr, r2, #19          ; [1]
74    strh        lr, [r1], #32
75    sxth        r2, r2
76    sxth        r3, r3
77    asr         r2, r2, #3           ; [2]
78    strh        r2, [r1], #32
79    asr         r3, r3, #3           ; [3]
80    strh        r3, [r1], #32
81
82    asr         r12, r5, #19         ; [4]
83    strh        r12, [r1], #32
84    asr         lr, r4, #19          ; [5]
85    strh        lr, [r1], #32
86    sxth        r4, r4
87    sxth        r5, r5
88    asr         r4, r4, #3           ; [6]
89    strh        r4, [r1], #32
90    asr         r5, r5, #3           ; [7]
91    strh        r5, [r1], #32
92
93    qsubaddx    r2, r6, r7           ; [c1|a1] [9-10  |  8+11]
94    qaddsubx    r3, r6, r7           ; [b1|d1] [9+10  |  8-11]
95    qsubaddx    r4, r8, r9           ; [c1|a1] [13-14 | 12+15]
96    qaddsubx    r5, r8, r9           ; [b1|d1] [13+14 | 12-15]
97
98    qaddsubx    r6, r2, r3           ; [b2|c2] [c1+d1 | a1-b1]
99    qaddsubx    r7, r3, r2           ; [a2|d2] [b1+a1 | d1-c1]
100    qaddsubx    r8, r4, r5           ; [b2|c2] [c1+d1 | a1-b1]
101    qaddsubx    r9, r5, r4           ; [a2|d2] [b1+a1 | d1-c1]
102
103    qadd16      r6, r6, r10          ; [b2+3|c2+3]
104    qadd16      r7, r7, r10          ; [a2+3|d2+3]
105    qadd16      r8, r8, r10          ; [b2+3|c2+3]
106    qadd16      r9, r9, r10          ; [a2+3|d2+3]
107
108    asr         r12, r7, #19         ; [8]
109    strh        r12, [r1], #32
110    asr         lr, r6, #19          ; [9]
111    strh        lr, [r1], #32
112    sxth        r6, r6
113    sxth        r7, r7
114    asr         r6, r6, #3           ; [10]
115    strh        r6, [r1], #32
116    asr         r7, r7, #3           ; [11]
117    strh        r7, [r1], #32
118
119    asr         r12, r9, #19         ; [12]
120    strh        r12, [r1], #32
121    asr         lr, r8, #19          ; [13]
122    strh        lr, [r1], #32
123    sxth        r8, r8
124    sxth        r9, r9
125    asr         r8, r8, #3           ; [14]
126    strh        r8, [r1], #32
127    asr         r9, r9, #3           ; [15]
128    strh        r9, [r1], #32
129
130    ldmia       sp!, {r4 - r12, pc}
131    ENDP        ; |vp8_short_inv_walsh4x4_v6|
132
133
134; Constant Pool
135c0x00030003 DCD 0x00030003
136    END
137