1@ This file was created from a .asm file
2@  using the ads2gas.pl script.
3	.syntax unified
4@
5@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
6@
7@  Use of this source code is governed by a BSD-style license and patent
8@  grant that can be found in the LICENSE file in the root of the source
9@  tree. All contributing project authors may be found in the AUTHORS
10@  file in the root of the source tree.
11@
12
13
14    .global vpx_idct4x4_1_add_neon
15	.type vpx_idct4x4_1_add_neon, function
16   .arm
17   .eabi_attribute 24, 1 @Tag_ABI_align_needed
18   .eabi_attribute 25, 1 @Tag_ABI_align_preserved
19
20.text
21.p2align 2
22
23@void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int stride)
24@
25@ r0  int16_t input
26@ r1  uint8_t *dest
27@ r2  int stride)
28
29_vpx_idct4x4_1_add_neon:
30	vpx_idct4x4_1_add_neon: @ PROC
31    ldrsh            r0, [r0]
32
33    @ cospi_16_64 = 11585
34    movw             r12, #0x2d41
35
36    @ out = dct_const_round_shift(input[0] * cospi_16_64)
37    mul              r0, r0, r12               @ input[0] * cospi_16_64
38    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
39    asr              r0, r0, #14               @ >> DCT_CONST_BITS
40
41    @ out = dct_const_round_shift(out * cospi_16_64)
42    mul              r0, r0, r12               @ out * cospi_16_64
43    mov              r12, r1                   @ save dest
44    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
45    asr              r0, r0, #14               @ >> DCT_CONST_BITS
46
47    @ a1 = ROUND_POWER_OF_TWO(out, 4)
48    add              r0, r0, #8                @ + (1 <<((4) - 1))
49    asr              r0, r0, #4                @ >> 4
50
51    vdup.s16         q0, r0                    @ duplicate a1
52
53    vld1.32          {d2[0]}, [r1], r2
54    vld1.32          {d2[1]}, [r1], r2
55    vld1.32          {d4[0]}, [r1], r2
56    vld1.32          {d4[1]}, [r1]
57
58    vaddw.u8         q8, q0, d2                @ dest[x] + a1
59    vaddw.u8         q9, q0, d4
60
61    vqmovun.s16      d6, q8                    @ clip_pixel
62    vqmovun.s16      d7, q9
63
64    vst1.32          {d6[0]}, [r12], r2
65    vst1.32          {d6[1]}, [r12], r2
66    vst1.32          {d7[0]}, [r12], r2
67    vst1.32          {d7[1]}, [r12]
68
69    bx               lr
70	.size vpx_idct4x4_1_add_neon, .-vpx_idct4x4_1_add_neon    @ ENDP             @ |vpx_idct4x4_1_add_neon|
71
72	.section	.note.GNU-stack,"",%progbits
73