1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/time.h>
21 #include <time.h>
22 #include <unistd.h>
23 #include <sched.h>
24 #include <sys/resource.h>
25 #include <sys/syscall.h>
26 #include <sys/types.h>
27 #include <sys/mman.h>
28 
29 #ifdef __ARM_NEON__
30 #include <arm_neon.h>
31 #endif
32 
33 
34 typedef long long nsecs_t;
35 static nsecs_t gTime;
36 float data_f[1024 * 128];
37 
system_time()38 static nsecs_t system_time()
39 {
40     struct timespec t;
41     t.tv_sec = t.tv_nsec = 0;
42     clock_gettime(CLOCK_MONOTONIC, &t);
43     return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
44 }
45 
startTime()46 static void startTime()
47 {
48     gTime = system_time();
49 }
50 
endTime(const char * str,double ops)51 static void endTime(const char *str, double ops)
52 {
53     nsecs_t t = system_time() - gTime;
54     double ds = ((double)t) / 1e9;
55     printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
56 }
57 
58 
test_mad()59 static void test_mad() {
60     for(int i=0; i<1020; i++) {
61         data_f[i] = i;
62     }
63 
64     startTime();
65 
66     float total = 0;
67     // Do ~1 billion ops
68     for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
69         for (int i=0; i < 1000; i++) {
70             data_f[i] = (data_f[i] * 0.02f +
71                          data_f[i+1] * 0.04f +
72                          data_f[i+2] * 0.05f +
73                          data_f[i+3] * 0.1f +
74                          data_f[i+4] * 0.2f +
75                          data_f[i+5] * 0.2f +
76                          data_f[i+6] * 0.1f +
77                          data_f[i+7] * 0.05f +
78                          data_f[i+8] * 0.04f +
79                          data_f[i+9] * 0.02f + 1.f);
80         }
81     }
82 
83     endTime("scalar mad", 1e9);
84 }
85 
86 
87 #ifdef __ARM_NEON__
88 
test_fma()89 static void test_fma() {
90     for(int i=0; i<1020 * 4; i++) {
91         data_f[i] = i;
92     }
93     float32x4_t c0_02 = vdupq_n_f32(0.02f);
94     float32x4_t c0_04 = vdupq_n_f32(0.04f);
95     float32x4_t c0_05 = vdupq_n_f32(0.05f);
96     float32x4_t c0_10 = vdupq_n_f32(0.1f);
97     float32x4_t c0_20 = vdupq_n_f32(0.2f);
98     float32x4_t c1_00 = vdupq_n_f32(1.0f);
99 
100     startTime();
101 
102     float total = 0;
103     // Do ~1 billion ops
104     for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
105         for (int i=0; i < 1000; i++) {
106             float32x4_t t;
107             t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
108             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
109             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
110             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
111             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
112             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
113             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
114             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
115             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
116             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
117             t = vaddq_f32(t, c1_00);
118             vst1q_f32((float32_t *)&data_f[i], t);
119         }
120     }
121 
122     endTime("neon fma", 1e9);
123 }
124 #endif
125 
fp_test(int argc,char ** argv)126 int fp_test(int argc, char** argv) {
127     test_mad();
128 
129 #ifdef __ARM_NEON__
130     test_fma();
131 #endif
132 
133     return 0;
134 }
135 
136 
137 
138 
139