1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 extern "C" void foo();
20
21 namespace {
22
f0()23 int f0() {
24 return 1;
25 }
26
f1()27 int f1() {
28 return 2;
29 }
30
f2()31 int f2() {
32 return 3;
33 }
34
f3()35 int f3() {
36 return 4;
37 }
38
39 } // namespace
40
TEST(BerberisPerf,XorLoop)41 TEST(BerberisPerf, XorLoop) {
42 unsigned c = 0xdeadbeef;
43
44 // c "wraps" every 32 iterations. Since 1,000,000,000 is divisible
45 // by 32, we expect to get the original value back.
46 for (int i = 0; i < 1000 * 1000 * 1000; i++) {
47 c ^= (c << 1);
48 }
49
50 EXPECT_EQ(c, 0xdeadbeef);
51 }
52
TEST(BerberisPerf,LoopWithCondition)53 TEST(BerberisPerf, LoopWithCondition) {
54 unsigned res = 0xf00dfeed;
55
56 // We want to make sure the loop body is efficiently executed even when loop
57 // body is split by an unconditional branch. E.g. this shouldn't result in two
58 // translated regions.
59 // Note that simple if-else won't suffice. First, it can be replaced by
60 // a conditional MOV instruction. Second, one uncoditional branch can be merged
61 // with the back branch of the loop. Thus we intentionally use if-else_if-else.
62 for (int i = 0; i < 1000 * 1000 * 1000; i++) {
63 int mod = i % 4;
64 if (mod == 0) {
65 res ^= res << 1;
66 } else if (mod == 1) {
67 res ^= res << 2;
68 } else if (mod == 2) {
69 res ^= res << 3;
70 } else {
71 res ^= res << 4;
72 }
73 }
74
75 EXPECT_EQ(res, 0xf00dfeed);
76 }
77
TEST(BerberisPerf,Pi)78 TEST(BerberisPerf, Pi) {
79 // Calculate the area of a circle with r = 10000 by checking to see
80 // if each point in the 20000 x 20000 square lies within the circle.
81 const int N = 10000;
82 int c = 0;
83 for (int i = -N; i < N; i++) {
84 for (int j = -N; j < N; j++) {
85 c += ((i * i + j * j) < N * N);
86 }
87 }
88 EXPECT_EQ(c, 314159017);
89 }
90
TEST(BerberisPerf,FuncPtr)91 TEST(BerberisPerf, FuncPtr) {
92 using FuncPtr = int (*)(void);
93 static const FuncPtr fptrs[4] = {f0, f1, f2, f3};
94
95 // Call functions with their pointers 100 million times.
96 int a = 0;
97 for (int i = 0; i < 100 * 1000 * 1000; i++) {
98 // The array index expression below has a period of length 16.
99 a += fptrs[(i ^ (i >> 2)) & 3]();
100 }
101 EXPECT_EQ(a, 250000000);
102 }
103
TEST(BerberisPerf,StrlenFruits)104 TEST(BerberisPerf, StrlenFruits) {
105 // Call strlen about 35 million times while incrementing the pointer
106 // to the string. This way, we get to test different alignments.
107 //
108 // Dropping "256" below seems to change the characteristics of the
109 // test, and the execution time would collapse to 300ms from 4000ms.
110 static const char str[256] =
111 "banana apple orange strawberry pinapple grape lemon cherry pear melon watermelon peach";
112 unsigned result = 0;
113 int e = strlen(str);
114
115 for (int i = 0; i < 300 * 1000; i++) {
116 for (int j = 0; j != e; j++) {
117 result ^= strlen(str + j);
118 }
119 }
120 EXPECT_EQ(result, 0U);
121 }
122
TEST(BerberisPerf,StrlenEmpty)123 TEST(BerberisPerf, StrlenEmpty) {
124 // Call strlen with the empty string to measure the overhead of
125 // trampoline.
126 //
127 // We keep assigning to and using "len" to prevent the compiler from
128 // optimizing away calls to strlen.
129 unsigned len = 0;
130 int i;
131 for (i = 0; i < 30 * 1000 * 1000; i++) {
132 char str[1] = {static_cast<char>(len)};
133 len = strlen(str);
134 }
135 EXPECT_EQ(len, 0U);
136 }
137
TEST(BerberisPerf,HighRegPres)138 TEST(BerberisPerf, HighRegPres) {
139 // High register pressure test.
140 //
141 // The generated code on ARM has no spill. Twelve variables from v0
142 // to vb, "i", SP, LR, and PC use up exactly 16 registers.
143 unsigned v0 = 0;
144 unsigned v1 = 1;
145 unsigned v2 = 2;
146 unsigned v3 = 3;
147 unsigned v4 = 4;
148 unsigned v5 = 5;
149 unsigned v6 = 6;
150 unsigned v7 = 7;
151 unsigned v8 = 8;
152 unsigned v9 = 9;
153 unsigned va = 10;
154 unsigned vb = 11;
155 volatile unsigned vol = 0;
156 for (size_t i = 0; i < 100 * 1000 * 1000; i++) {
157 // Disable the auto vectorization by reading a volatile variable.
158 i += vol;
159
160 v0 += i ^ 3;
161 v1 += i ^ 4;
162 v2 += i ^ 5;
163 v3 += i ^ 6;
164 v4 += i ^ 7;
165 v5 += i ^ 8;
166 v6 += i ^ 9;
167 v7 += i ^ 10;
168 v8 += i ^ 11;
169 v9 += i ^ 12;
170 va += i ^ 13;
171 vb += i ^ 14;
172 }
173 unsigned result = (v0 ^ v1 ^ v2 ^ v3 ^ v4 ^ v5 ^ v6 ^ v7 ^ v8 ^ v9 ^ va ^ vb);
174 EXPECT_EQ(result, 0U);
175 }
176
TEST(BerberisPerf,EmptyFunc)177 TEST(BerberisPerf, EmptyFunc) {
178 // Keep calling an empty function.
179 for (size_t i = 0; i < 500 * 1000 * 1000; i++) {
180 foo();
181 }
182 EXPECT_EQ(0, 0);
183 }
184
TEST(BerberisPerf,ConvertF32I32)185 TEST(BerberisPerf, ConvertF32I32) {
186 static const float vals[] = {0.5, 1.2};
187 int sum = 0;
188 for (int i = 0; i < 100 * 1000 * 1000; i++) {
189 sum += static_cast<int>(vals[i & 1]);
190 }
191 EXPECT_EQ(sum, 50000000);
192 }
193
194 #if defined __arm__
195
TEST(BerberisPerf,ReadWriteFPSCR)196 TEST(BerberisPerf, ReadWriteFPSCR) {
197 for (int i = 0; i < 0x1ffffff; i++) {
198 // Filter-out bits which implementation does not support and exception bits.
199 // If we set exception bits then we get FP-exception (correct behavior), but
200 // it's handling dwarfs the execution time by huge margin thus we couldn't do
201 // that in perf test.
202 uint32_t fpscr_in = i & 0xc01f00;
203 uint32_t fpscr_out;
204 asm("vmsr fpscr, %1\n"
205 "vmrs %0, fpscr\n"
206 : "=r"(fpscr_out)
207 : "r"(fpscr_in));
208 EXPECT_EQ(fpscr_in, fpscr_out);
209 }
210 }
211
212 #endif // defined __arm__