1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <jni.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/time.h>
21
22 /* Code from now to qsort_local all copied from bionic source.
23 * The code is duplicated here to remove dependency on optimized bionic
24 */
25 static __inline char *med3(char *, char *, char *, int (*)(const void *, const void *));
26 static __inline void swapfunc(char *, char *, int, int);
27
28 #define min(a, b) (a) < (b) ? a : b
29
30 /*
31 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
32 */
33 #define swapcode(TYPE, parmi, parmj, n) { \
34 long i = (n) / sizeof (TYPE); \
35 TYPE *pi = (TYPE *) (parmi); \
36 TYPE *pj = (TYPE *) (parmj); \
37 do { \
38 TYPE t = *pi; \
39 *pi++ = *pj; \
40 *pj++ = t; \
41 } while (--i > 0); \
42 }
43
44 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
45 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
46
47 static __inline void
swapfunc(char * a,char * b,int n,int swaptype)48 swapfunc(char *a, char *b, int n, int swaptype)
49 {
50 if (swaptype <= 1)
51 swapcode(long, a, b, n)
52 else
53 swapcode(char, a, b, n)
54 }
55
56 #define swap(a, b) \
57 if (swaptype == 0) { \
58 long t = *(long *)(a); \
59 *(long *)(a) = *(long *)(b); \
60 *(long *)(b) = t; \
61 } else \
62 swapfunc(a, b, es, swaptype)
63
64 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
65
66 static __inline char *
med3(char * a,char * b,char * c,int (* cmp)(const void *,const void *))67 med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
68 {
69 return cmp(a, b) < 0 ?
70 (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
71 :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
72 }
73
74 void
qsort_local(void * aa,size_t n,size_t es,int (* cmp)(const void *,const void *))75 qsort_local(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
76 {
77 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
78 int d, r, swaptype, swap_cnt;
79 char *a = (char*)aa;
80
81 loop: SWAPINIT(a, es);
82 swap_cnt = 0;
83 if (n < 7) {
84 for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
85 for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
86 pl -= es)
87 swap(pl, pl - es);
88 return;
89 }
90 pm = (char *)a + (n / 2) * es;
91 if (n > 7) {
92 pl = (char *)a;
93 pn = (char *)a + (n - 1) * es;
94 if (n > 40) {
95 d = (n / 8) * es;
96 pl = med3(pl, pl + d, pl + 2 * d, cmp);
97 pm = med3(pm - d, pm, pm + d, cmp);
98 pn = med3(pn - 2 * d, pn - d, pn, cmp);
99 }
100 pm = med3(pl, pm, pn, cmp);
101 }
102 swap(a, pm);
103 pa = pb = (char *)a + es;
104
105 pc = pd = (char *)a + (n - 1) * es;
106 for (;;) {
107 while (pb <= pc && (r = cmp(pb, a)) <= 0) {
108 if (r == 0) {
109 swap_cnt = 1;
110 swap(pa, pb);
111 pa += es;
112 }
113 pb += es;
114 }
115 while (pb <= pc && (r = cmp(pc, a)) >= 0) {
116 if (r == 0) {
117 swap_cnt = 1;
118 swap(pc, pd);
119 pd -= es;
120 }
121 pc -= es;
122 }
123 if (pb > pc)
124 break;
125 swap(pb, pc);
126 swap_cnt = 1;
127 pb += es;
128 pc -= es;
129 }
130 if (swap_cnt == 0) { /* Switch to insertion sort */
131 for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
132 for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
133 pl -= es)
134 swap(pl, pl - es);
135 return;
136 }
137
138 pn = (char *)a + n * es;
139 r = min(pa - (char *)a, pb - pa);
140 vecswap(a, pb - r, r);
141 r = min(pd - pc, pn - pd - (int)es);
142 vecswap(pb, pn - r, r);
143 if ((r = pb - pa) > (int)es)
144 qsort_local(a, r / es, es, cmp);
145 if ((r = pd - pc) > (int)es) {
146 /* Iterate rather than recurse to save stack space */
147 a = pn - r;
148 n = r / es;
149 goto loop;
150 }
151 /* qsort(pn - r, r / es, es, cmp); */
152 }
153
154 /* code duplication ends here */
155
156 /**
157 * Util for getting time stamp
158 */
currentTimeMillis()159 double currentTimeMillis()
160 {
161 struct timeval tv;
162 gettimeofday(&tv, (struct timezone *) NULL);
163 return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
164 }
165
166 /**
167 * Initialize given array randomly for the given seed
168 */
randomInitArray(T * array,int len,unsigned int seed)169 template <typename T> void randomInitArray(T* array, int len, unsigned int seed)
170 {
171 srand(seed);
172 for (int i = 0; i < len; i++) {
173 array[i] = (T) rand();
174 }
175 }
176
177 /**
178 * comparison function for int, for qsort
179 */
cmpint(const void * p1,const void * p2)180 int cmpint(const void* p1, const void* p2)
181 {
182 return *(int*)p1 - *(int*)p2;
183 }
184
Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv * env,jclass clazz,jint numberElements,jint repetition)185 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv* env,
186 jclass clazz, jint numberElements, jint repetition)
187 {
188 int* data = new int[numberElements];
189 if (data == NULL) {
190 env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
191 return -1;
192 }
193 double totalTime = 0;
194 for (int i = 0; i < repetition; i++) {
195 randomInitArray<int>(data, numberElements, 0);
196 double start = currentTimeMillis();
197 qsort_local(data, numberElements, sizeof(int), cmpint);
198 double end = currentTimeMillis();
199 totalTime += (end - start);
200 }
201 delete[] data;
202 return totalTime;
203 }
204
205
206 /**
207 * Do matrix multiplication, C = A x B with all matrices having dimension of n x n
208 * The implementation is not in the most efficient, but it is good enough for benchmarking purpose.
209 * @param n should be multiple of 8
210 */
doMatrixMultiplication(float * A,float * B,float * C,int n)211 void doMatrixMultiplication(float* A, float* B, float* C, int n)
212 {
213 // batch size
214 const int M = 8;
215 for (int i = 0; i < n; i++) {
216 for (int j = 0; j < n; j += M) {
217 float sum[M];
218 for (int k = 0; k < M; k++) {
219 sum[k] = 0;
220 }
221 // re-use the whole cache line for accessing B.
222 // otherwise, the whole line will be read and only one value will be used.
223
224 for (int k = 0; k < n; k++) {
225 float a = A[i * n + k];
226 sum[0] += a * B[k * n + j];
227 sum[1] += a * B[k * n + j + 1];
228 sum[2] += a * B[k * n + j + 2];
229 sum[3] += a * B[k * n + j + 3];
230 sum[4] += a * B[k * n + j + 4];
231 sum[5] += a * B[k * n + j + 5];
232 sum[6] += a * B[k * n + j + 6];
233 sum[7] += a * B[k * n + j + 7];
234 }
235 for (int k = 0; k < M; k++) {
236 C[i * n + j + k] = sum[k];
237 }
238 }
239 }
240 }
241
Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(JNIEnv * env,jclass clazz,jint n,jint repetition)242 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(
243 JNIEnv* env, jclass clazz, jint n, jint repetition)
244 {
245 // C = A x B
246 float* A = new float[n * n];
247 float* B = new float[n * n];
248 float* C = new float[n * n];
249 if ((A == NULL) || (B == NULL) || (C == NULL)) {
250 delete[] A;
251 delete[] B;
252 delete[] C;
253 env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
254 return -1;
255 }
256 double totalTime = 0;
257 for (int i = 0; i < repetition; i++) {
258 randomInitArray<float>(A, n * n, 0);
259 randomInitArray<float>(B, n * n, 1);
260 double start = currentTimeMillis();
261 doMatrixMultiplication(A, B, C, n);
262 double end = currentTimeMillis();
263 totalTime += (end - start);
264 }
265 delete[] A;
266 delete[] B;
267 delete[] C;
268 return totalTime;
269 }
270
271