1 /*
2 * Copyright 2017 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/basic_types.h"
12
13 #include "libyuv/compare_row.h"
14 #include "libyuv/row.h"
15
16 // This module is for GCC MSA
17 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
18 #include "libyuv/macros_msa.h"
19
20 #ifdef __cplusplus
21 namespace libyuv {
22 extern "C" {
23 #endif
24
HammingDistance_MSA(const uint8_t * src_a,const uint8_t * src_b,int count)25 uint32_t HammingDistance_MSA(const uint8_t* src_a,
26 const uint8_t* src_b,
27 int count) {
28 uint32_t diff = 0u;
29 int i;
30 v16u8 src0, src1, src2, src3;
31 v2i64 vec0 = {0}, vec1 = {0};
32
33 for (i = 0; i < count; i += 32) {
34 src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
35 src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
36 src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
37 src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
38 src0 ^= src2;
39 src1 ^= src3;
40 vec0 += __msa_pcnt_d((v2i64)src0);
41 vec1 += __msa_pcnt_d((v2i64)src1);
42 src_a += 32;
43 src_b += 32;
44 }
45
46 vec0 += vec1;
47 diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
48 diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
49 return diff;
50 }
51
SumSquareError_MSA(const uint8_t * src_a,const uint8_t * src_b,int count)52 uint32_t SumSquareError_MSA(const uint8_t* src_a,
53 const uint8_t* src_b,
54 int count) {
55 uint32_t sse = 0u;
56 int i;
57 v16u8 src0, src1, src2, src3;
58 v8i16 vec0, vec1, vec2, vec3;
59 v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
60 v2i64 tmp0;
61
62 for (i = 0; i < count; i += 32) {
63 src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
64 src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
65 src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
66 src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
67 vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
68 vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
69 vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
70 vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
71 vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
72 vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
73 vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
74 vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
75 reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
76 reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
77 reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
78 reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
79 src_a += 32;
80 src_b += 32;
81 }
82
83 reg0 += reg1;
84 reg2 += reg3;
85 reg0 += reg2;
86 tmp0 = __msa_hadd_s_d(reg0, reg0);
87 sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
88 sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
89 return sse;
90 }
91
92 #ifdef __cplusplus
93 } // extern "C"
94 } // namespace libyuv
95 #endif
96
97 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
98