1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #include "vpx_dsp/inv_txfm.h"
15
create_dcd(const int16_t dc)16 static INLINE uint8x8_t create_dcd(const int16_t dc) {
17 int16x8_t t = vdupq_n_s16(dc);
18 return vqmovun_s16(t);
19 }
20
idct8x8_1_add_pos_kernel(uint8_t ** dest,const int stride,const uint8x8_t res)21 static INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride,
22 const uint8x8_t res) {
23 const uint8x8_t a = vld1_u8(*dest);
24 const uint8x8_t b = vqadd_u8(a, res);
25 vst1_u8(*dest, b);
26 *dest += stride;
27 }
28
idct8x8_1_add_neg_kernel(uint8_t ** dest,const int stride,const uint8x8_t res)29 static INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride,
30 const uint8x8_t res) {
31 const uint8x8_t a = vld1_u8(*dest);
32 const uint8x8_t b = vqsub_u8(a, res);
33 vst1_u8(*dest, b);
34 *dest += stride;
35 }
36
vpx_idct8x8_1_add_neon(const tran_low_t * input,uint8_t * dest,int stride)37 void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest,
38 int stride) {
39 const int16_t out0 =
40 WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64));
41 const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64));
42 const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
43
44 if (a1 >= 0) {
45 const uint8x8_t dc = create_dcd(a1);
46 idct8x8_1_add_pos_kernel(&dest, stride, dc);
47 idct8x8_1_add_pos_kernel(&dest, stride, dc);
48 idct8x8_1_add_pos_kernel(&dest, stride, dc);
49 idct8x8_1_add_pos_kernel(&dest, stride, dc);
50 idct8x8_1_add_pos_kernel(&dest, stride, dc);
51 idct8x8_1_add_pos_kernel(&dest, stride, dc);
52 idct8x8_1_add_pos_kernel(&dest, stride, dc);
53 idct8x8_1_add_pos_kernel(&dest, stride, dc);
54 } else {
55 const uint8x8_t dc = create_dcd(-a1);
56 idct8x8_1_add_neg_kernel(&dest, stride, dc);
57 idct8x8_1_add_neg_kernel(&dest, stride, dc);
58 idct8x8_1_add_neg_kernel(&dest, stride, dc);
59 idct8x8_1_add_neg_kernel(&dest, stride, dc);
60 idct8x8_1_add_neg_kernel(&dest, stride, dc);
61 idct8x8_1_add_neg_kernel(&dest, stride, dc);
62 idct8x8_1_add_neg_kernel(&dest, stride, dc);
63 idct8x8_1_add_neg_kernel(&dest, stride, dc);
64 }
65 }
66