1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* UT_reduce_backward.java is a much simpler version of this test
18  * case that exercises pragmas after the functions (backward
19  * reference), whereas this test case exercises the pragmas before
20  * the functions (forward reference).
21  */
22 
23 package com.android.rs.test;
24 
25 import android.content.Context;
26 import android.content.res.Resources;
27 import android.renderscript.*;
28 import android.util.Log;
29 import java.lang.Float;
30 import java.lang.Math;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Random;
34 import static junit.framework.Assert.*;
35 
36 public class UT_reduce extends UnitTest {
37     private static final String TAG = "reduce";
38 
UT_reduce(RSTestCore rstc, Resources res, Context ctx)39     protected UT_reduce(RSTestCore rstc, Resources res, Context ctx) {
40         super(rstc, "reduce", ctx);
41     }
42 
43     private static class timing {
timing(long myJavaStart, long myJavaEnd, long myRsStart, long myCopyStart, long myKernelStart, long myRsEnd, Allocation... myInputs)44         timing(long myJavaStart, long myJavaEnd, long myRsStart,
45                long myCopyStart, long myKernelStart, long myRsEnd,
46                Allocation... myInputs) {
47             javaStart = myJavaStart;
48             javaEnd = myJavaEnd;
49             rsStart = myRsStart;
50             copyStart = myCopyStart;
51             kernelStart = myKernelStart;
52             rsEnd = myRsEnd;
53 
54             inputBytes = 0;
55             for (Allocation input : myInputs)
56                 inputBytes += input.getBytesSize();
57 
58             inputCells = (myInputs.length > 0) ? myInputs[0].getType().getCount() : 0;
59         }
60 
timing(long myInputCells)61         timing(long myInputCells) {
62             inputCells = myInputCells;
63         }
64 
65         private long javaStart = -1;
66         private long javaEnd = -1;
67         private long rsStart = -1;
68         private long copyStart = -1;
69         private long kernelStart = -1;
70         private long rsEnd = -1;
71         private long inputBytes = -1;
72         private long inputCells = -1;
73 
javaTime()74         public long javaTime() { return javaEnd - javaStart; }
rsTime()75         public long rsTime() { return rsEnd - rsStart; }
kernelTime()76         public long kernelTime() { return rsEnd - kernelStart; }
overheadTime()77         public long overheadTime() { return kernelStart - rsStart; }
allocationTime()78         public long allocationTime() { return copyStart - rsStart; }
copyTime()79         public long copyTime() { return kernelStart - copyStart; }
80 
string(long myJavaStart, long myJavaEnd, long myRsStart, long myCopyStart, long myKernelStart, long myRsEnd, Allocation... myInputs)81         public static String string(long myJavaStart, long myJavaEnd, long myRsStart,
82                                     long myCopyStart, long myKernelStart, long myRsEnd,
83                                     Allocation... myInputs) {
84             return (new timing(myJavaStart, myJavaEnd, myRsStart,
85                                myCopyStart, myKernelStart, myRsEnd, myInputs)).string();
86         }
87 
string(long myInputCells)88         public static String string(long myInputCells) {
89             return (new timing(myInputCells)).string();
90         }
91 
string()92         public String string() {
93             String result;
94             if (javaStart >= 0) {
95                 result = "(java " + javaTime() + "ms, rs " + rsTime() + "ms = overhead " +
96                          overheadTime() + "ms (alloc " + allocationTime() + "ms + copy " +
97                          copyTime() + "ms) + kernel+get() " + kernelTime() + "ms)";
98                 if (inputCells > 0)
99                     result += " ";
100             } else {
101                 result = "";
102             }
103             if (inputCells > 0) {
104                 result += "(" + fmt.format(inputCells) + " cells";
105                 if (inputBytes > 0)
106                     result += ", " + fmt.format(inputBytes) + " bytes";
107                 result += ")";
108             }
109             return result;
110         }
111 
112         private static java.text.DecimalFormat fmt;
113         static {
114             fmt = new java.text.DecimalFormat("###,###");
115         }
116     };
117 
createInputArrayByte(int len, int seed)118     private byte[] createInputArrayByte(int len, int seed) {
119         byte[] array = new byte[len];
120         (new Random(seed)).nextBytes(array);
121         return array;
122     }
123 
createInputArrayFloat(int len, int seed)124     private float[] createInputArrayFloat(int len, int seed) {
125         Random rand = new Random(seed);
126         float[] array = new float[len];
127         for (int i = 0; i < len; ++i)
128             array[i] = rand.nextFloat();
129         return array;
130     }
131 
createInputArrayInt(int len, int seed)132     private int[] createInputArrayInt(int len, int seed) {
133         Random rand = new Random(seed);
134         int[] array = new int[len];
135         for (int i = 0; i < len; ++i)
136             array[i] = rand.nextInt();
137         return array;
138     }
139 
createInputArrayInt(int len, int seed, int eltRange)140     private int[] createInputArrayInt(int len, int seed, int eltRange) {
141         Random rand = new Random(seed);
142         int[] array = new int[len];
143         for (int i = 0; i < len; ++i)
144             array[i] = rand.nextInt(eltRange);
145         return array;
146     }
147 
intArrayToLong(final int[] input)148     private long[] intArrayToLong(final int[] input) {
149         final long[] output = new long[input.length];
150 
151         for (int i = 0; i < input.length; ++i)
152             output[i] = input[i];
153 
154         return output;
155     }
156 
result(String testName, final timing t, T javaRslt, T rsRslt)157     private <T extends Number> boolean result(String testName, final timing t,
158                                               T javaRslt, T rsRslt) {
159         final boolean success = javaRslt.equals(rsRslt);
160         String status = (success ? "PASSED" : "FAILED");
161         if (success && (t != null))
162             status += " " + t.string();
163         Log.i(TAG, testName + ": java " + javaRslt + ", rs " + rsRslt + ": " + status);
164         return success;
165     }
166 
result(String testName, final timing t, final float[] javaRslt, final float[] rsRslt)167     private boolean result(String testName, final timing t,
168                            final float[] javaRslt, final float[] rsRslt) {
169         if (javaRslt.length != rsRslt.length) {
170             Log.i(TAG, testName + ": java length " + javaRslt.length +
171                        ", rs length " + rsRslt.length + ": FAILED");
172             return false;
173         }
174         for (int i = 0; i < javaRslt.length; ++i) {
175             if (javaRslt[i] != rsRslt[i]) {
176                 Log.i(TAG, testName + "[" + i + "]: java " + javaRslt[i] +
177                            ", rs " + rsRslt[i] + ": FAILED");
178                 return false;
179             }
180         }
181         String status = "PASSED";
182         if (t != null)
183             status += " " + t.string();
184         Log.i(TAG, testName + ": " + status);
185         return true;
186     }
187 
result(String testName, final timing t, final long[] javaRslt, final long[] rsRslt)188     private boolean result(String testName, final timing t,
189                            final long[] javaRslt, final long[] rsRslt) {
190         if (javaRslt.length != rsRslt.length) {
191             Log.i(TAG, testName + ": java length " + javaRslt.length +
192                        ", rs length " + rsRslt.length + ": FAILED");
193             return false;
194         }
195         for (int i = 0; i < javaRslt.length; ++i) {
196             if (javaRslt[i] != rsRslt[i]) {
197                 Log.i(TAG, testName + "[" + i + "]: java " + javaRslt[i] +
198                            ", rs " + rsRslt[i] + ": FAILED");
199                 return false;
200             }
201         }
202         String status = "PASSED";
203         if (t != null)
204             status += " " + t.string();
205         Log.i(TAG, testName + ": " + status);
206         return true;
207     }
208 
result(String testName, final timing t, final int[] javaRslt, final int[] rsRslt)209     private boolean result(String testName, final timing t,
210                            final int[] javaRslt, final int[] rsRslt) {
211         return result(testName, t, intArrayToLong(javaRslt), intArrayToLong(rsRslt));
212     }
213 
result(String testName, final timing t, Int2 javaRslt, Int2 rsRslt)214     private boolean result(String testName, final timing t, Int2 javaRslt, Int2 rsRslt) {
215         final boolean success = (javaRslt.x == rsRslt.x) && (javaRslt.y == rsRslt.y);
216         String status = (success ? "PASSED" : "FAILED");
217         if (success && (t != null))
218             status += " " + t.string();
219         Log.i(TAG,
220                 testName +
221                 ": java (" + javaRslt.x + ", " + javaRslt.y + ")" +
222                 ", rs (" + rsRslt.x + ", " + rsRslt.y + ")" +
223                 ": " + status);
224         return success;
225     }
226 
result(String testName, final timing t, Float2 javaRslt, Float2 rsRslt)227     private boolean result(String testName, final timing t, Float2 javaRslt, Float2 rsRslt) {
228         final boolean success = (javaRslt.x == rsRslt.x) && (javaRslt.y == rsRslt.y);
229         String status = (success ? "PASSED" : "FAILED");
230         if (success && (t != null))
231             status += " " + t.string();
232         Log.i(TAG,
233                 testName +
234                 ": java (" + javaRslt.x + ", " + javaRslt.y + ")" +
235                 ", rs (" + rsRslt.x + ", " + rsRslt.y + ")" +
236                 ": " + status);
237         return success;
238     }
239 
240     ///////////////////////////////////////////////////////////////////
241 
addint(int[] input)242     private int addint(int[] input) {
243         int rslt = 0;
244         for (int idx = 0; idx < input.length; ++idx)
245             rslt += input[idx];
246         return rslt;
247     }
248 
addint1D_array(RenderScript RS, ScriptC_reduce s, int seed, int[] size)249     private boolean addint1D_array(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
250         final int[] input = createInputArrayInt(size[0], seed, Integer.MAX_VALUE / size[0]);
251 
252         final int javaRslt = addint(input);
253         final int rsRslt = s.reduce_addint(input).get();
254 
255         return result("addint1D_array", new timing(size[0]), javaRslt, rsRslt);
256     }
257 
addint1D(RenderScript RS, ScriptC_reduce s, int seed, int[] size)258     private boolean addint1D(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
259         final int[] inputArray = createInputArrayInt(size[0], seed, Integer.MAX_VALUE / size[0]);
260 
261         final long javaTimeStart = java.lang.System.currentTimeMillis();
262         final int javaRslt = addint(inputArray);
263         final long javaTimeEnd = java.lang.System.currentTimeMillis();
264 
265         final long rsTimeStart = java.lang.System.currentTimeMillis();
266 
267         Allocation inputAllocation = Allocation.createSized(RS, Element.I32(RS), inputArray.length);
268 
269         final long copyTimeStart = java.lang.System.currentTimeMillis();
270 
271         inputAllocation.copyFrom(inputArray);
272 
273         final long kernelTimeStart = java.lang.System.currentTimeMillis();
274         final int rsRslt = s.reduce_addint(inputAllocation).get();
275         final long rsTimeEnd = java.lang.System.currentTimeMillis();
276 
277         final boolean success =
278                 result("addint1D",
279                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
280                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
281                         javaRslt, rsRslt);
282         inputAllocation.destroy();
283         return success;
284     }
285 
addint2D(RenderScript RS, ScriptC_reduce s, int seed, int[] size)286     private boolean addint2D(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
287         final int dimX = size[0];
288         final int dimY = size[1];
289 
290         final int[] inputArray = createInputArrayInt(dimX * dimY, seed, Integer.MAX_VALUE / (dimX * dimY));
291 
292         final long javaTimeStart = java.lang.System.currentTimeMillis();
293         final int javaRslt = addint(inputArray);
294         final long javaTimeEnd = java.lang.System.currentTimeMillis();
295 
296         final long rsTimeStart = java.lang.System.currentTimeMillis();
297 
298         Type.Builder typeBuilder = new Type.Builder(RS, Element.I32(RS));
299         typeBuilder.setX(dimX).setY(dimY);
300         Allocation inputAllocation = Allocation.createTyped(RS, typeBuilder.create());
301 
302         final long copyTimeStart = java.lang.System.currentTimeMillis();
303 
304         inputAllocation.copy2DRangeFrom(0, 0, dimX, dimY, inputArray);
305 
306         final long kernelTimeStart = java.lang.System.currentTimeMillis();
307         final int rsRslt = s.reduce_addint(inputAllocation).get();
308         final long rsTimeEnd = java.lang.System.currentTimeMillis();
309 
310         final boolean success =
311                 result("addint2D",
312                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
313                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
314                         javaRslt, rsRslt);
315         inputAllocation.destroy();
316         return success;
317     }
318 
addint3D(RenderScript RS, ScriptC_reduce s, int seed, int[] size)319     private boolean addint3D(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
320         final int dimX = size[0];
321         final int dimY = size[1];
322         final int dimZ = size[2];
323 
324         final int[] inputArray = createInputArrayInt(dimX * dimY * dimZ, seed, Integer.MAX_VALUE / (dimX * dimY * dimZ));
325 
326         final long javaTimeStart = java.lang.System.currentTimeMillis();
327         final int javaRslt = addint(inputArray);
328         final long javaTimeEnd = java.lang.System.currentTimeMillis();
329 
330         final long rsTimeStart = java.lang.System.currentTimeMillis();
331 
332         Type.Builder typeBuilder = new Type.Builder(RS, Element.I32(RS));
333         typeBuilder.setX(dimX).setY(dimY).setZ(dimZ);
334         Allocation inputAllocation = Allocation.createTyped(RS, typeBuilder.create());
335 
336         final long copyTimeStart = java.lang.System.currentTimeMillis();
337 
338         inputAllocation.copy3DRangeFrom(0, 0, 0, dimX, dimY, dimZ, inputArray);
339 
340         final long kernelTimeStart = java.lang.System.currentTimeMillis();
341         final int rsRslt = s.reduce_addint(inputAllocation).get();
342         final long rsTimeEnd = java.lang.System.currentTimeMillis();
343 
344         final boolean success =
345                 result("addint3D",
346                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
347                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
348                         javaRslt, rsRslt);
349         inputAllocation.destroy();
350         return success;
351     }
352 
353     //-----------------------------------------------------------------
354 
patternInterleavedReduce(RenderScript RS, ScriptC_reduce s)355     private boolean patternInterleavedReduce(RenderScript RS, ScriptC_reduce s) {
356         // Run two reduce operations without forcing completion between them.
357         // We want to ensure that the driver can handle this, and that
358         // temporary Allocations created to run the reduce operations survive
359         // until get().
360 
361         boolean pass = true;
362 
363         final int inputSize = (1 << 18);
364 
365         final int[] input1 = createInputArrayInt(123, Integer.MAX_VALUE / inputSize);
366         final int[] input2 = createInputArrayInt(456, Integer.MAX_VALUE / inputSize);
367 
368         final int javaRslt1 = addint(input1);
369         final int javaRslt2 = addint(input2);
370 
371         final ScriptC_reduce.result_int rsRsltFuture1 = s.reduce_addint(input1);
372         final ScriptC_reduce.result_int rsRsltFuture2 = s.reduce_addint(input2);
373 
374         pass &= result("patternInterleavedReduce (1)", new timing(inputSize),
375                 javaRslt1, rsRsltFuture1.get());
376         pass &= result("patternInterleavedReduce (2)", new timing(inputSize),
377                 javaRslt2, rsRsltFuture2.get());
378 
379         return pass;
380     }
381 
382     //-----------------------------------------------------------------
383 
sillySumIntoDecArray(final int[] input)384     private int[] sillySumIntoDecArray(final int[] input) {
385         final int resultScalar = addint(input);
386         final int[] result = new int[4];
387         for (int i = 0; i < 4; ++i)
388             result[i] = resultScalar/(i+1);
389         return result;
390     }
391 
sillySumIntoIncArray(final int[] input)392     private int[] sillySumIntoIncArray(final int[] input) {
393         final int resultScalar = addint(input);
394         final int[] result = new int[4];
395         for (int i = 0; i < 4; ++i)
396             result[i] = resultScalar/(4-i);
397         return result;
398     }
399 
patternDuplicateAnonymousResult(RenderScript RS, ScriptC_reduce s)400     private boolean patternDuplicateAnonymousResult(RenderScript RS, ScriptC_reduce s) {
401         // Ensure that we can have two kernels with the same anonymous result type.
402 
403         boolean pass = true;
404 
405         final int inputSize = 1000;
406         final int[] input = createInputArrayInt(149, Integer.MAX_VALUE / inputSize);
407 
408         final int[] javaRsltDec = sillySumIntoDecArray(input);
409         final int[] rsRsltDec = s.reduce_sillySumIntoDecArray(input).get();
410         pass &= result("patternDuplicateAnonymousResult (Dec)", new timing(inputSize),
411                 javaRsltDec, rsRsltDec);
412 
413         final int[] javaRsltInc = sillySumIntoIncArray(input);
414         final int[] rsRsltInc = s.reduce_sillySumIntoIncArray(input).get();
415         pass &= result("patternDuplicateAnonymousResult (Inc)", new timing(inputSize),
416                 javaRsltInc, rsRsltInc);
417 
418         return pass;
419     }
420 
421     ///////////////////////////////////////////////////////////////////
422 
findMinAndMax(float[] input)423     private Int2 findMinAndMax(float[] input) {
424         float minVal = Float.POSITIVE_INFINITY;
425         int minIdx = -1;
426         float maxVal = Float.NEGATIVE_INFINITY;
427         int maxIdx = -1;
428 
429         for (int idx = 0; idx < input.length; ++idx) {
430             if (input[idx] < minVal) {
431                 minVal = input[idx];
432                 minIdx = idx;
433             }
434             if (input[idx] > maxVal) {
435                 maxVal = input[idx];
436                 maxIdx = idx;
437             }
438         }
439 
440         return new Int2(minIdx, maxIdx);
441     }
442 
findMinAndMax_array(RenderScript RS, ScriptC_reduce s, int seed, int[] size)443     private boolean findMinAndMax_array(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
444         final float[] input = createInputArrayFloat(size[0], seed);
445 
446         final Int2 javaRslt = findMinAndMax(input);
447         final Int2 rsRslt = s.reduce_findMinAndMax(input).get();
448 
449         // Note that the Java and RenderScript algorithms are not
450         // guaranteed to find the same cells -- but they should
451         // find cells of the same value.
452         final Float2 javaVal = new Float2(input[javaRslt.x], input[javaRslt.y]);
453         final Float2 rsVal = new Float2(input[rsRslt.x], input[rsRslt.y]);
454 
455         return result("findMinAndMax_array", new timing(size[0]), javaVal, rsVal);
456     }
457 
findMinAndMax(RenderScript RS, ScriptC_reduce s, int seed, int[] size)458     private boolean findMinAndMax(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
459         final float[] inputArray = createInputArrayFloat(size[0], seed);
460 
461         final long javaTimeStart = java.lang.System.currentTimeMillis();
462         final Int2 javaRslt = findMinAndMax(inputArray);
463         final long javaTimeEnd = java.lang.System.currentTimeMillis();
464 
465         final long rsTimeStart = java.lang.System.currentTimeMillis();
466 
467         Allocation inputAllocation = Allocation.createSized(RS, Element.F32(RS), inputArray.length);
468 
469         final long copyTimeStart = java.lang.System.currentTimeMillis();
470 
471         inputAllocation.copyFrom(inputArray);
472 
473         final long kernelTimeStart = java.lang.System.currentTimeMillis();
474         final Int2 rsRslt = s.reduce_findMinAndMax(inputAllocation).get();
475         final long rsTimeEnd = java.lang.System.currentTimeMillis();
476 
477         // Note that the Java and RenderScript algorithms are not
478         // guaranteed to find the same cells -- but they should
479         // find cells of the same value.
480         final Float2 javaVal = new Float2(inputArray[javaRslt.x], inputArray[javaRslt.y]);
481         final Float2 rsVal = new Float2(inputArray[rsRslt.x], inputArray[rsRslt.y]);
482 
483         final boolean success =
484                 result("findMinAndMax",
485                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
486                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
487                         javaVal, rsVal);
488         inputAllocation.destroy();
489         return success;
490     }
491 
492     ///////////////////////////////////////////////////////////////////
493 
494     // Both the input and the result are linearized representations of matSize*matSize matrices.
findMinMat(final float[] inputArray, final int matSize)495     private float[] findMinMat(final float[] inputArray, final int matSize) {
496         final int matSizeSquared = matSize*matSize;
497 
498         float[] result = new float[matSizeSquared];
499         for (int i = 0; i < matSizeSquared; ++i)
500             result[i] = Float.POSITIVE_INFINITY;
501 
502         for (int i = 0; i < inputArray.length; ++i)
503             result[i % matSizeSquared] = Math.min(result[i % matSizeSquared], inputArray[i]);
504 
505         return result;
506     }
507 
508     static interface ReduceFindMinMat {
run(Allocation input)509         float[] run(Allocation input);
510     };
511 
findMinMat(RenderScript RS, int seed, int[] inputSize, int matSize, Element matElement, ReduceFindMinMat reduction)512     private boolean findMinMat(RenderScript RS, int seed, int[] inputSize,
513             int matSize, Element matElement, ReduceFindMinMat reduction) {
514         final int length = inputSize[0];
515         final int matSizeSquared = matSize*matSize;
516 
517         final float[] inputArray = createInputArrayFloat(matSizeSquared * length, seed);
518 
519         final long javaTimeStart = java.lang.System.currentTimeMillis();
520         final float[] javaRslt = findMinMat(inputArray, matSize);
521         final long javaTimeEnd = java.lang.System.currentTimeMillis();
522 
523         final long rsTimeStart = java.lang.System.currentTimeMillis();
524 
525         Allocation inputAllocation = Allocation.createSized(RS, matElement, length);
526 
527         final long copyTimeStart = java.lang.System.currentTimeMillis();
528 
529         inputAllocation.copyFromUnchecked(inputArray);
530 
531         final long kernelTimeStart = java.lang.System.currentTimeMillis();
532         final float[] rsRslt = reduction.run(inputAllocation);
533         final long rsTimeEnd = java.lang.System.currentTimeMillis();
534 
535         final boolean success =
536                 result("findMinMat" + matSize,
537                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
538                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
539                         javaRslt, rsRslt);
540         inputAllocation.destroy();
541         return success;
542     }
543 
findMinMat2(RenderScript RS, ScriptC_reduce s, int seed, int[] size)544     private boolean findMinMat2(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
545         return findMinMat(RS, seed, size, 2, Element.MATRIX_2X2(RS),
546                 (Allocation input) -> s.reduce_findMinMat2(input).get());
547     }
548 
findMinMat4(RenderScript RS, ScriptC_reduce s, int seed, int[] size)549     private boolean findMinMat4(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
550         return findMinMat(RS, seed, size, 4, Element.MATRIX_4X4(RS),
551                 (Allocation input) -> s.reduce_findMinMat4(input).get());
552     }
553 
554     ///////////////////////////////////////////////////////////////////
555 
fz(final int[] input)556     private int fz(final int[] input) {
557         for (int i = 0; i < input.length; ++i)
558             if (input[i] == 0)
559                 return i;
560         return -1;
561     }
562 
fz_array(RenderScript RS, ScriptC_reduce s, int seed, int size[])563     private boolean fz_array(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
564         final int inputLen = size[0];
565         int[] input = createInputArrayInt(inputLen, seed+0);
566         // just in case we got unlucky
567         input[(new Random(seed+1)).nextInt(inputLen)] = 0;
568 
569         final int rsRslt = s.reduce_fz(input).get();
570 
571         final boolean success = (input[rsRslt] == 0);
572         Log.i(TAG,
573                 "fz_array: input[" + rsRslt + "] == " + input[rsRslt] + ": " +
574                 (success ? "PASSED " + timing.string(size[0]) : "FAILED"));
575         return success;
576     }
577 
fz(RenderScript RS, ScriptC_reduce s, int seed, int size[])578     private boolean fz(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
579         final int inputLen = size[0];
580         int[] inputArray = createInputArrayInt(inputLen, seed+0);
581         // just in case we got unlucky
582         inputArray[(new Random(seed+1)).nextInt(inputLen)] = 0;
583 
584         final long javaTimeStart = java.lang.System.currentTimeMillis();
585         final int javaRslt = fz(inputArray);
586         final long javaTimeEnd = java.lang.System.currentTimeMillis();
587 
588         final long rsTimeStart = java.lang.System.currentTimeMillis();
589 
590         Allocation inputAllocation = Allocation.createSized(RS, Element.I32(RS), inputArray.length);
591 
592         final long copyTimeStart = java.lang.System.currentTimeMillis();
593 
594         inputAllocation.copyFrom(inputArray);
595 
596         final long kernelTimeStart = java.lang.System.currentTimeMillis();
597         final int rsRslt = s.reduce_fz(inputAllocation).get();
598         final long rsTimeEnd = java.lang.System.currentTimeMillis();
599 
600         final boolean success = (inputArray[rsRslt] == 0);
601         String status = (success ? "PASSED" : "FAILED");
602         if (success)
603             status += " " + timing.string(javaTimeStart, javaTimeEnd, rsTimeStart,
604                                           copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation);
605         Log.i(TAG,
606                 "fz: java input[" + javaRslt + "] == " + inputArray[javaRslt] +
607                 ", rs input[" + rsRslt + "] == " + inputArray[javaRslt] + ": " + status);
608         inputAllocation.destroy();
609         return success;
610     }
611 
612     ///////////////////////////////////////////////////////////////////
613 
fz2(RenderScript RS, ScriptC_reduce s, int seed, int size[])614     private boolean fz2(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
615         final int dimX = size[0], dimY = size[1];
616         final int inputLen = dimX * dimY;
617 
618         int[] inputArray = createInputArrayInt(inputLen, seed+0);
619         // just in case we got unlucky
620         inputArray[(new Random(seed+1)).nextInt(inputLen)] = 0;
621 
622         final long javaTimeStart = java.lang.System.currentTimeMillis();
623         final int javaRsltLinear = fz(inputArray);
624         final long javaTimeEnd = java.lang.System.currentTimeMillis();
625 
626         final Int2 javaRslt = new Int2(javaRsltLinear % dimX, javaRsltLinear / dimX);
627         final int javaCellVal = inputArray[javaRslt.x + dimX * javaRslt.y];
628 
629         final long rsTimeStart = java.lang.System.currentTimeMillis();
630 
631         Type.Builder typeBuilder = new Type.Builder(RS, Element.I32(RS));
632         typeBuilder.setX(dimX).setY(dimY);
633         Allocation inputAllocation = Allocation.createTyped(RS, typeBuilder.create());
634 
635         final long copyTimeStart = java.lang.System.currentTimeMillis();
636 
637         inputAllocation.copy2DRangeFrom(0, 0, dimX, dimY, inputArray);
638 
639         final long kernelTimeStart = java.lang.System.currentTimeMillis();
640         final Int2 rsRslt = s.reduce_fz2(inputAllocation).get();
641         final long rsTimeEnd = java.lang.System.currentTimeMillis();
642 
643         final int rsCellVal = inputArray[rsRslt.x + dimX * rsRslt.y];
644         final boolean success = (rsCellVal == 0);
645         String status = (success ? "PASSED" : "FAILED");
646         if (success)
647             status += " " + timing.string(javaTimeStart, javaTimeEnd, rsTimeStart,
648                                           copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation);
649         Log.i(TAG,
650                 "fz2: java input[" + javaRslt.x + ", " + javaRslt.y + "] == " + javaCellVal +
651                 ", rs input[" + rsRslt.x + ", " + rsRslt.y + "] == " + rsCellVal + ": " + status);
652         inputAllocation.destroy();
653         return success;
654     }
655 
656     ///////////////////////////////////////////////////////////////////
657 
fz3(RenderScript RS, ScriptC_reduce s, int seed, int[] size)658     private boolean fz3(RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
659         final int dimX = size[0], dimY = size[1], dimZ = size[2];
660         final int inputLen = dimX * dimY * dimZ;
661 
662         int[] inputArray = createInputArrayInt(inputLen, seed+0);
663         // just in case we got unlucky
664         inputArray[(new Random(seed+1)).nextInt(inputLen)] = 0;
665 
666         final long javaTimeStart = java.lang.System.currentTimeMillis();
667         final int javaRsltLinear = fz(inputArray);
668         final long javaTimeEnd = java.lang.System.currentTimeMillis();
669 
670         final Int3 javaRslt = new Int3(
671             javaRsltLinear % dimX,
672             (javaRsltLinear / dimX) % dimY,
673             javaRsltLinear / (dimX * dimY));
674         final int javaCellVal = inputArray[javaRslt.x + dimX * javaRslt.y + dimX * dimY * javaRslt.z];
675 
676         final long rsTimeStart = java.lang.System.currentTimeMillis();
677 
678         Type.Builder typeBuilder = new Type.Builder(RS, Element.I32(RS));
679         typeBuilder.setX(dimX).setY(dimY).setZ(dimZ);
680         Allocation inputAllocation = Allocation.createTyped(RS, typeBuilder.create());
681 
682         final long copyTimeStart = java.lang.System.currentTimeMillis();
683 
684         inputAllocation.copy3DRangeFrom(0, 0, 0, dimX, dimY, dimZ, inputArray);
685 
686         final long kernelTimeStart = java.lang.System.currentTimeMillis();
687         final Int3 rsRslt = s.reduce_fz3(inputAllocation).get();
688         final long rsTimeEnd = java.lang.System.currentTimeMillis();
689 
690         final int rsCellVal = inputArray[rsRslt.x + dimX * rsRslt.y + dimX * dimY * rsRslt.z];
691         final boolean success = (rsCellVal == 0);
692         String status = (success ? "PASSED" : "FAILED");
693         if (success)
694             status += " " + timing.string(javaTimeStart, javaTimeEnd, rsTimeStart,
695                                           copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation);
696         Log.i(TAG,
697                 "fz3: java input[" + javaRslt.x + ", " + javaRslt.y + ", " + javaRslt.z + "] == " + javaCellVal +
698                 ", rs input[" + rsRslt.x + ", " + rsRslt.y + ", " + rsRslt.z + "] == " + rsCellVal + ": " + status);
699         inputAllocation.destroy();
700         return success;
701     }
702 
703     ///////////////////////////////////////////////////////////////////
704 
705     private static final int histogramBucketCount = 256;
706 
histogram(RenderScript RS, final byte[] inputArray)707     private long[] histogram(RenderScript RS, final byte[] inputArray) {
708         Allocation inputAllocation = Allocation.createSized(RS, Element.U8(RS), inputArray.length);
709         inputAllocation.copyFrom(inputArray);
710 
711         Allocation outputAllocation = Allocation.createSized(RS, Element.U32(RS), histogramBucketCount);
712 
713         ScriptIntrinsicHistogram scriptHsg = ScriptIntrinsicHistogram.create(RS, Element.U8(RS));
714         scriptHsg.setOutput(outputAllocation);
715         scriptHsg.forEach(inputAllocation);
716 
717         int[] outputArrayMistyped = new int[histogramBucketCount];
718         outputAllocation.copyTo(outputArrayMistyped);
719 
720         long[] outputArray = new long[histogramBucketCount];
721         for (int i = 0; i < histogramBucketCount; ++i)
722             outputArray[i] = outputArrayMistyped[i] & (long)0xffffffff;
723 
724         inputAllocation.destroy();
725         outputAllocation.destroy();
726 
727         return outputArray;
728     }
729 
histogram_array(RenderScript RS, ScriptC_reduce s, int seed, int size[])730     private boolean histogram_array(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
731         final byte[] inputArray = createInputArrayByte(size[0], seed);
732 
733         final long[] javaRslt = histogram(RS, inputArray);
734         assertEquals("javaRslt length", histogramBucketCount, javaRslt.length);
735         final long[] rsRslt = s.reduce_histogram(inputArray).get();
736         assertEquals("rsRslt length", histogramBucketCount, rsRslt.length);
737 
738         return result("histogram_array", new timing(size[0]), javaRslt, rsRslt);
739     }
740 
histogram(RenderScript RS, ScriptC_reduce s, int seed, int size[])741     private boolean histogram(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
742         final byte[] inputArray = createInputArrayByte(size[0], seed);
743 
744         final long javaTimeStart = java.lang.System.currentTimeMillis();
745         final long[] javaRslt = histogram(RS, inputArray);
746         final long javaTimeEnd = java.lang.System.currentTimeMillis();
747         assertEquals("javaRslt length", histogramBucketCount, javaRslt.length);
748 
749         final long rsTimeStart = java.lang.System.currentTimeMillis();
750 
751         Allocation inputAllocation = Allocation.createSized(RS, Element.U8(RS), inputArray.length);
752 
753         final long copyTimeStart = java.lang.System.currentTimeMillis();
754 
755         inputAllocation.copyFrom(inputArray);
756 
757         final long kernelTimeStart = java.lang.System.currentTimeMillis();
758         final long[] rsRslt = s.reduce_histogram(inputAllocation).get();
759         final long rsTimeEnd = java.lang.System.currentTimeMillis();
760         assertEquals("rsRslt length", histogramBucketCount, rsRslt.length);
761 
762         // NOTE: The "java time" is actually for the RenderScript histogram intrinsic
763         final boolean success =
764                 result("histogram",
765                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart,
766                                    copyTimeStart, kernelTimeStart, rsTimeEnd, inputAllocation),
767                         javaRslt, rsRslt);
768         inputAllocation.destroy();
769         return success;
770     }
771 
772     //-----------------------------------------------------------------
773 
patternRedundantGet(RenderScript RS, ScriptC_reduce s)774     private boolean patternRedundantGet(RenderScript RS, ScriptC_reduce s) {
775         // Ensure that get() can be called multiple times on the same
776         // result, and returns the same object each time.
777 
778         boolean pass = true;
779 
780         final int inputLength = 1 << 18;
781         final byte[] inputArray = createInputArrayByte(inputLength, 789);
782 
783         final long[] javaRslt = histogram(RS, inputArray);
784         assertEquals("javaRslt length", histogramBucketCount, javaRslt.length);
785 
786         final ScriptC_reduce.resultArray256_uint rsRsltFuture = s.reduce_histogram(inputArray);
787         final long[] rsRslt1 = rsRsltFuture.get();
788         assertEquals("rsRslt1 length", histogramBucketCount, rsRslt1.length);
789         pass &= result("patternRedundantGet (1)", new timing(inputLength), javaRslt, rsRslt1);
790 
791         final long[] rsRslt2 = rsRsltFuture.get();
792         pass &= result("patternRedundantGet (2)", new timing(inputLength), javaRslt, rsRslt2);
793 
794         final boolean success = (rsRslt1 == rsRslt2);
795         Log.i(TAG, "patternRedundantGet (object equality): " + (success ? "PASSED" : "FAILED"));
796         pass &= success;
797 
798         return pass;
799     }
800 
801     //-----------------------------------------------------------------
802 
mode(RenderScript RS, final byte[] inputArray)803     private Int2 mode(RenderScript RS, final byte[] inputArray) {
804         long[] hsg = histogram(RS, inputArray);
805 
806         int modeIdx = 0;
807         for (int i = 1; i < hsg.length; ++i)
808             if (hsg[i] > hsg[modeIdx]) modeIdx =i;
809         return new Int2(modeIdx, (int)hsg[modeIdx]);
810     }
811 
mode_array(RenderScript RS, ScriptC_reduce s, int seed, int size[])812     private boolean mode_array(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
813         final byte[] inputArray = createInputArrayByte(size[0], seed);
814 
815         final Int2 javaRslt = mode(RS, inputArray);
816         final Int2 rsRslt = s.reduce_mode(inputArray).get();
817 
818         return result("mode", new timing(size[0]), javaRslt, rsRslt);
819     }
820 
821     ///////////////////////////////////////////////////////////////////
822 
sumgcd(final int in1[], final int in2[])823     private long sumgcd(final int in1[], final int in2[]) {
824         assertEquals("sumgcd input lengths", in1.length, in2.length);
825 
826         long sum = 0;
827         for (int i = 0; i < in1.length; ++i) {
828             int a = in1[i], b = in2[i];
829 
830             while (b != 0) {
831                 final int aNew = b;
832                 final int bNew = a % b;
833 
834                 a = aNew;
835                 b = bNew;
836             }
837 
838             sum += a;
839         }
840         return sum;
841     }
842 
sumgcd(RenderScript RS, ScriptC_reduce s, int seed, int size[])843     private boolean sumgcd(RenderScript RS, ScriptC_reduce s, int seed, int size[]) {
844         final int len = size[0];
845 
846         final int[] inputArrayA = createInputArrayInt(len, seed+0);
847         final int[] inputArrayB = createInputArrayInt(len, seed+1);
848 
849         final long javaTimeStart = java.lang.System.currentTimeMillis();
850         final long javaRslt = sumgcd(inputArrayA, inputArrayB);
851         final long javaTimeEnd = java.lang.System.currentTimeMillis();
852 
853         final long rsTimeStart = java.lang.System.currentTimeMillis();
854 
855         Allocation inputAllocationA = Allocation.createSized(RS, Element.I32(RS), len);
856         Allocation inputAllocationB = Allocation.createSized(RS, Element.I32(RS), len);
857 
858         final long copyTimeStart = java.lang.System.currentTimeMillis();
859 
860         inputAllocationA.copyFrom(inputArrayA);
861         inputAllocationB.copyFrom(inputArrayB);
862 
863         final long kernelTimeStart = java.lang.System.currentTimeMillis();
864         final long rsRslt = s.reduce_sumgcd(inputAllocationA, inputAllocationB).get();
865         final long rsTimeEnd = java.lang.System.currentTimeMillis();
866 
867         final boolean success =
868                 result("sumgcd",
869                         new timing(javaTimeStart, javaTimeEnd, rsTimeStart, copyTimeStart, kernelTimeStart, rsTimeEnd,
870                                    inputAllocationA, inputAllocationB),
871                         javaRslt, rsRslt);
872         inputAllocationA.destroy();
873         inputAllocationB.destroy();
874         return success;
875     }
876 
877     ///////////////////////////////////////////////////////////////////
878 
879     // Return an array of sparse integer values from 0 to maxVal inclusive.
880     // The array consists of all values k*sparseness (k a nonnegative integer)
881     // that are less than maxVal, and maxVal itself.  For example, if maxVal
882     // is 20 and sparseness is 6, then the result is { 0, 6, 12, 18, 20 };
883     // and if maxVal is 20 and sparseness is 10, then the result is { 0, 10, 20 }.
884     //
885     // The elements of the array are sorted in increasing order.
886     //
887     // maxVal     -- must be nonnegative
888     // sparseness -- must be positive
computeSizePoints(int maxVal, int sparseness)889     private static int[] computeSizePoints(int maxVal, int sparseness) {
890         assertTrue((maxVal >= 0) && (sparseness > 0));
891 
892         final boolean maxValIsExtra = ((maxVal % sparseness) != 0);
893         int[] result = new int[1 + maxVal/sparseness + (maxValIsExtra ? 1 : 0)];
894 
895         for (int i = 0; i * sparseness <= maxVal; ++i)
896             result[i] = i * sparseness;
897         if (maxValIsExtra)
898             result[result.length - 1] = maxVal;
899 
900         return result;
901     }
902 
903     private static final int maxSeedsPerTest = 10;
904 
905     static interface Test {
906         // A test execution is characterized by two properties: A seed
907         // and a size.
908         //
909         // The seed is used for generating pseudorandom input data.
910         // Ideally, we use different seeds for different tests and for
911         // different executions of the same test at different sizes.
912         // A test with multiple blocks of input data (i.e., for a
913         // reduction with multiple inputs) may want multiple seeds; it
914         // may use the seeds seed..seed+maxSeedsPerTest-1.
915         //
916         // The size indicates the amount of input data.  It is the number
917         // of cells in a particular dimension of the iteration space.
run(RenderScript RS, ScriptC_reduce s, int seed, int[] size)918         boolean run(RenderScript RS, ScriptC_reduce s, int seed, int[] size);
919     };
920 
921     static class TestDescription {
TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize, int myLog2MaxSize, int mySparseness)922         public TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize,
923                                int myLog2MaxSize, int mySparseness) {
924             testName     = myTestName;
925             test         = myTest;
926             seed         = mySeed;
927             defSize      = myDefSize;
928             log2MaxSize  = myLog2MaxSize;
929             sparseness   = mySparseness;
930         };
931 
TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize, int myLog2MaxSize)932         public TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize, int myLog2MaxSize) {
933             testName    = myTestName;
934             test        = myTest;
935             seed        = mySeed;
936             defSize     = myDefSize;
937             log2MaxSize = myLog2MaxSize;
938             sparseness  = 1;
939         };
940 
TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize)941         public TestDescription(String myTestName, Test myTest, int mySeed, int[] myDefSize) {
942             testName    = myTestName;
943             test        = myTest;
944             seed        = mySeed;
945             defSize     = myDefSize;
946             log2MaxSize = -1;
947             sparseness  = 1;
948         };
949 
950         public final String testName;
951 
952         public final Test test;
953 
954         // When executing the test, scale this up by maxSeedsPerTest.
955         public final int seed;
956 
957         // If we're only going to run the test once, what size should
958         // we use?  The length of the array is the number of
959         // dimensions of the input data.
960         public final int[] defSize;
961 
962         // If we're going to run the test over a range of sizes, what
963         // is the maximum size to use?  (This constrains the number of
964         // cells of the input data, not the number of cells ALONG A
965         // PARTICULAR DIMENSION of the input data.)
966         public final int log2MaxSize;
967 
968         // If we're going to run the test "exhaustively" over a range
969         // of sizes, what is the size of a step through the range?
970         //
971         // For 1D, must be 1.
972         public final int sparseness;
973     };
974 
run(TestDescription td, RenderScript RS, ScriptC_reduce s, int seed, int[] size)975     private boolean run(TestDescription td, RenderScript RS, ScriptC_reduce s, int seed, int[] size) {
976         String arrayContent = "";
977         for (int i = 0; i < size.length; ++i) {
978             if (i != 0)
979                 arrayContent += ", ";
980             arrayContent += size[i];
981         }
982         Log.i(TAG, "Running " + td.testName + "(seed = " + seed + ", size[] = {" + arrayContent + "})");
983         return td.test.run(RS, s, seed, size);
984     }
985 
986     private final TestDescription[] correctnessTests = {
987         // alloc and array variants of the same test will use the same
988         // seed, in case results need to be compared.
989 
990         new TestDescription("addint1D", this::addint1D, 0, new int[]{100000}, 20),
991         new TestDescription("addint1D_array", this::addint1D_array, 0, new int[]{100000}, 20),
992         new TestDescription("addint2D", this::addint2D, 1, new int[]{450, 225}, 20, 5),
993         new TestDescription("addint3D", this::addint3D, 2, new int[]{37, 48, 49}, 20, 7),
994         new TestDescription("findMinAndMax", this::findMinAndMax, 3, new int[]{100000}, 20),
995         new TestDescription("findMinAndMax_array", this::findMinAndMax_array, 3, new int[]{100000}, 20),
996         new TestDescription("findMinMat2", this::findMinMat2, 4, new int[]{25000}, 17),
997         new TestDescription("findMinMat4", this::findMinMat4, 5, new int[]{10000}, 15),
998         new TestDescription("fz", this::fz, 6, new int[]{100000}, 20),
999         new TestDescription("fz_array", this::fz_array, 6, new int[]{100000}, 20),
1000         new TestDescription("fz2", this::fz2, 7, new int[]{225, 450}, 20, 5),
1001         new TestDescription("fz3", this::fz3, 8, new int[]{59, 48, 37}, 20, 7),
1002         new TestDescription("histogram", this::histogram, 9, new int[]{100000}, 20),
1003         new TestDescription("histogram_array", this::histogram_array, 9, new int[]{100000}, 20),
1004         // might want to add: new TestDescription("mode", this::mode, 10, new int[]{100000}, 20),
1005         new TestDescription("mode_array", this::mode_array, 10, new int[]{100000}, 20),
1006         new TestDescription("sumgcd", this::sumgcd, 11, new int[]{1 << 16}, 20)
1007     };
1008 
runCorrectnessQuick(RenderScript RS, ScriptC_reduce s)1009     private boolean runCorrectnessQuick(RenderScript RS, ScriptC_reduce s) {
1010         boolean pass = true;
1011 
1012         for (TestDescription td : correctnessTests) {
1013             pass &= run(td, RS, s, maxSeedsPerTest * td.seed, td.defSize);
1014         }
1015 
1016         return pass;
1017     }
1018 
1019     // NOTE: Each test execution gets maxSeedsPerTest, and there are
1020     // up to 3 + 5*log2MaxSize test executions in the full (as opposed
1021     // to quick) correctness run of a particular test description, and
1022     // we need an additional seed for pseudorandom size generation.
1023     // Assuming log2MaxSize does not exceed 32, then it should be
1024     // sufficient to reserve 1 + (3+5*32)*maxSeedsPerTest seeds per
1025     // TestDescription.
1026     //
1027     // See runCorrectness1D().
1028     private static final int seedsPerTestDescriptionCorrectness1D = 1 + (3+5*32)*maxSeedsPerTest;
1029 
1030     // NOTE: Each test execution gets maxSeedsPerTest, and there are
1031     // about 11*((log2MaxSize+1)**2) test executions in the full (as
1032     // opposed to quick) correctness run of a particular test
1033     // description, and we need a seed for pseudorandom size
1034     // generation.  Assuming log2MaxSize does not exceed 32, then it
1035     // should be sufficient to reserve 1 + 11*1089*maxSeedsPerTest
1036     // seeds per TestDescription.
1037     //
1038     // See runCorrectness2D().
1039     private static final int seedsPerTestDescriptionCorrectness2D = 1 + (11*1089)*maxSeedsPerTest;
1040 
1041     // NOTE: Each test execution gets maxSeedsPerTest, and there are
1042     // about 27*((log2MaxSize+1)**3) + 6*((log2MaxSize+1)**2) test
1043     // executions in the full (as opposed to quick) correctness run of
1044     // a particular test description, and we need a seed for (c).
1045     // Assuming log2MaxSize does not exceed 32, then it should
1046     // be sufficient to reserve 1 + (27*(33**3) + 6*(33**2))*maxSeedsPerTest
1047     // seeds per TestDescription, which can be simplified upwards to
1048     // 1 + (28*(33**3))*maxSeedsPerTest seeds per TestDescription.
1049     private static final int seedsPerTestDescriptionCorrectness3D = 1 + (28*35937)*maxSeedsPerTest;
1050 
1051     // Each test execution gets a certain number of seeds, and a full
1052     // (as opposed to quick) correctness run of a particular
1053     // TestDescription consists of some number of executions (each of
1054     // which needs up to maxSeedsPerTest) and may require some
1055     // additional seeds.
1056     private static final int seedsPerTestDescriptionCorrectness =
1057             Math.max(seedsPerTestDescriptionCorrectness1D,
1058                      Math.max(seedsPerTestDescriptionCorrectness2D,
1059                               seedsPerTestDescriptionCorrectness3D));
1060 
runCorrectness(RenderScript RS, ScriptC_reduce s)1061     private boolean runCorrectness(RenderScript RS, ScriptC_reduce s) {
1062         boolean pass = true;
1063 
1064         for (TestDescription td : correctnessTests) {
1065             switch (td.defSize.length) {
1066                 case 1:
1067                     pass &= runCorrectness1D(td, RS, s);
1068                     break;
1069                 case 2:
1070                     pass &= runCorrectness2D(td, RS, s);
1071                     break;
1072                 case 3:
1073                     pass &= runCorrectness3D(td, RS, s);
1074                     break;
1075                 default:
1076                     assertTrue("unexpected defSize.length " + td.defSize.length, false);
1077                     pass &= false;
1078                     break;
1079             }
1080         }
1081 
1082         return pass;
1083     }
1084 
runCorrectness1D(TestDescription td, RenderScript RS, ScriptC_reduce s)1085     private boolean runCorrectness1D(TestDescription td, RenderScript RS, ScriptC_reduce s) {
1086         assertEquals(1, td.sparseness);
1087         final int log2MaxSize = td.log2MaxSize;
1088         assertTrue(log2MaxSize >= 0);
1089 
1090         boolean pass = true;
1091 
1092         // We will execute the test with the following sizes:
1093         // (a) Each power of 2 from zero (2**0) up to log2MaxSize (2**log2MaxSize)
1094         // (b) Each size from (a) +/-1
1095         // (c) 2 random sizes between each pair of adjacent points in (a)
1096         int[] testSizes = new int[
1097             /* a */ (1 + log2MaxSize) +
1098             /* b */ 2*(1 + log2MaxSize) +
1099             /* c */ 2*log2MaxSize];
1100         // See seedsPerTestDescriptionCorrectness1D
1101 
1102         final int seedForPickingTestSizes = td.seed * seedsPerTestDescriptionCorrectness;
1103 
1104         int nextTestIdx = 0;
1105 
1106         // Fill in (a) and (b)
1107         for (int i = 0; i <= log2MaxSize; ++i) {
1108             final int pwrOf2 = 1 << i;
1109             testSizes[nextTestIdx++] = pwrOf2;      /* a */
1110             testSizes[nextTestIdx++] = pwrOf2 - 1;  /* b */
1111             testSizes[nextTestIdx++] = pwrOf2 + 1;  /* b */
1112         }
1113 
1114         // Fill in (c)
1115         Random r = new Random(seedForPickingTestSizes);
1116         for (int i = 0; i < log2MaxSize; ++i) {
1117             final int lo = (1 << i) + 1;
1118             final int hi = 1 << (i + 1);
1119 
1120             if (lo < hi) {
1121                 for (int j = 0; j < 2; ++j) {
1122                     testSizes[nextTestIdx++] = r.nextInt(hi - lo) + lo;
1123                 }
1124             }
1125         }
1126 
1127         Arrays.sort(testSizes);
1128 
1129         int[] lastTestSizeArg = new int[]{-1};
1130         for (int i = 0; i < testSizes.length; ++i) {
1131             if ((testSizes[i] > 0) && (testSizes[i] != lastTestSizeArg[0])) {
1132                 lastTestSizeArg[0] = testSizes[i];
1133                 final int seedForTestExecution = seedForPickingTestSizes + 1 + i*maxSeedsPerTest;
1134                 pass &= run(td, RS, s, seedForTestExecution, lastTestSizeArg);
1135             }
1136         }
1137 
1138         return pass;
1139     }
1140 
runCorrectness2D(TestDescription td, RenderScript RS, ScriptC_reduce s)1141     private boolean runCorrectness2D(TestDescription td, RenderScript RS, ScriptC_reduce s) {
1142         final int log2MaxSize = td.log2MaxSize, maxSize = 1 << log2MaxSize, sparseness = td.sparseness;
1143         assertTrue((log2MaxSize >= 0) && (sparseness >= 1));
1144 
1145         boolean pass = true;
1146 
1147         final int[] sizePoints = computeSizePoints(log2MaxSize, sparseness);
1148 
1149         // We will execute the test with the following sizes:
1150         // (a) Each dimension at a power of 2 from sizePoints[]
1151         ///    such that the sum of the exponents does not exceed
1152         //     log2MaxSize
1153         // (b) Each size from (a) with one or both dimensions +/-1,
1154         //     except where this would exceed 2**log2MaxSize
1155         // (c) Approximately 2*(sizePoints.length**2) random sizes
1156         ArrayList<int[]> testSizesList = new ArrayList<int[]>();
1157         // See seedsPerTestDescriptionCorrectness2D
1158 
1159         final int seedForPickingTestSizes = td.seed * seedsPerTestDescriptionCorrectness;
1160 
1161         // Fill in (a) and (b)
1162         for (int i : sizePoints) {
1163             final int iPwrOf2 = 1 << i;
1164             for (int iDelta = -1; iDelta <= 1; ++iDelta) {
1165                 final int iSize = iPwrOf2 + iDelta;
1166                 for (int j : sizePoints) {
1167                     final int jPwrOf2 = 1 << j;
1168                     for (int jDelta = -1; jDelta <= 1; ++jDelta) {
1169                         final int jSize = jPwrOf2 + jDelta;
1170                         if ((long)iSize * (long)jSize <= maxSize)
1171                             testSizesList.add(new int[]{iSize, jSize});
1172                     }
1173                 }
1174             }
1175         }
1176 
1177         // Fill in (c)
1178         Random r = new Random(seedForPickingTestSizes);
1179         for (int i : sizePoints) {
1180             for (int j : sizePoints) {
1181                 final int size0 = 1 + r.nextInt(1 << i);
1182                 final int size1 = 1 + r.nextInt(maxSize / size0);
1183 
1184                 testSizesList.add(new int[]{size0, size1});
1185                 testSizesList.add(new int[]{size1, size0});
1186             }
1187         }
1188 
1189         int[][] testSizes = testSizesList.toArray(new int[0][]);
1190         Arrays.sort(testSizes,
1191                 (a, b) -> {
1192                     final int comp0 = ((Integer)a[0]).compareTo(b[0]);
1193                     return (comp0 != 0 ? comp0 : ((Integer)a[1]).compareTo(b[1]));
1194                 });
1195 
1196         int[] lastTestSizeArg = null;
1197         for (int i = 0; i < testSizes.length; ++i) {
1198             if ((testSizes[i][0] <= 0) || (testSizes[i][1] <= 0))
1199                 continue;
1200             if ((lastTestSizeArg != null) &&
1201                 (testSizes[i][0] == lastTestSizeArg[0]) &&
1202                 (testSizes[i][1] == lastTestSizeArg[1]))
1203                 continue;
1204             lastTestSizeArg = testSizes[i];
1205             final int seedForTestExecution = seedForPickingTestSizes + 1 + i*maxSeedsPerTest;
1206             pass &= run(td, RS, s, seedForTestExecution, lastTestSizeArg);
1207         }
1208 
1209         return pass;
1210     }
1211 
runCorrectness3D(TestDescription td, RenderScript RS, ScriptC_reduce s)1212     private boolean runCorrectness3D(TestDescription td, RenderScript RS, ScriptC_reduce s) {
1213         final int log2MaxSize = td.log2MaxSize, maxSize = 1 << log2MaxSize, sparseness = td.sparseness;
1214         assertTrue((log2MaxSize >= 0) && (sparseness >= 1));
1215 
1216         boolean pass = true;
1217 
1218         final int[] sizePoints = computeSizePoints(log2MaxSize, sparseness);
1219 
1220         // We will execute the test with the following sizes:
1221         // (a) Each dimension at a power of 2 from sizePoints[]
1222         ///    such that the sum of the exponents does not exceed
1223         //     log2MaxSize
1224         // (b) Each size from (a) with one or both dimensions +/-1,
1225         //     except where this would exceed 2**log2MaxSize
1226         // (c) Approximately 6*(sizePoints.length**2) random sizes
1227         ArrayList<int[]> testSizesList = new ArrayList<int[]>();
1228         // See seedsPerTestDescriptionCorrectness3D
1229 
1230         final int seedForPickingTestSizes = td.seed * seedsPerTestDescriptionCorrectness;
1231 
1232         // Fill in (a) and (b)
1233         for (int i : sizePoints) {
1234             final int iPwrOf2 = 1 << i;
1235             for (int iDelta = -1; iDelta <= 1; ++iDelta) {
1236                 final int iSize = iPwrOf2 + iDelta;
1237                 for (int j : sizePoints) {
1238                     final int jPwrOf2 = 1 << j;
1239                     for (int jDelta = -1; jDelta <= 1; ++jDelta) {
1240                         final int jSize = jPwrOf2 + jDelta;
1241                         for (int k : sizePoints) {
1242                             final int kPwrOf2 = 1 << k;
1243                             for (int kDelta = -1; kDelta <= 1; ++kDelta) {
1244                                 final int kSize = kPwrOf2 + kDelta;
1245                                 if ((long)iSize * (long)jSize * (long)kSize <= maxSize)
1246                                     testSizesList.add(new int[]{iSize, jSize, kSize});
1247                             }
1248                         }
1249                     }
1250                 }
1251             }
1252         }
1253 
1254         // Fill in (c)
1255         Random r = new Random(seedForPickingTestSizes);
1256         for (int i : sizePoints) {
1257             for (int j : sizePoints) {
1258                 final int size0 = 1 + r.nextInt(1 << i);
1259                 final int size1 = 1 + r.nextInt(Math.min(1 << j, maxSize / size0));
1260                 final int size2 = 1 + r.nextInt(maxSize / (size0*size1));
1261 
1262                 testSizesList.add(new int[]{size0, size1, size2});
1263                 testSizesList.add(new int[]{size0, size2, size1});
1264                 testSizesList.add(new int[]{size1, size0, size2});
1265                 testSizesList.add(new int[]{size1, size2, size0});
1266                 testSizesList.add(new int[]{size2, size0, size1});
1267                 testSizesList.add(new int[]{size2, size1, size0});
1268             }
1269         }
1270 
1271         int[][] testSizes = testSizesList.toArray(new int[0][]);
1272         Arrays.sort(testSizes,
1273                 (a, b) -> {
1274                     int comp = ((Integer)a[0]).compareTo(b[0]);
1275                     if (comp == 0)
1276                         comp = ((Integer)a[1]).compareTo(b[1]);
1277                     if (comp == 0)
1278                         comp = ((Integer)a[2]).compareTo(b[2]);
1279                     return comp;
1280                 });
1281 
1282         int[] lastTestSizeArg = null;
1283         for (int i = 0; i < testSizes.length; ++i) {
1284             if ((testSizes[i][0] <= 0) || (testSizes[i][1] <= 0) || (testSizes[i][2] <= 0))
1285                 continue;
1286             if ((lastTestSizeArg != null) &&
1287                 (testSizes[i][0] == lastTestSizeArg[0]) &&
1288                 (testSizes[i][1] == lastTestSizeArg[1]) &&
1289                 (testSizes[i][2] == lastTestSizeArg[2]))
1290                 continue;
1291 
1292             // Apply Z-dimension limiting.
1293             //
1294             // The Z dimension is always handled specially by GPU
1295             // drivers, and a high value for this dimension can have
1296             // serious performance implications.  For example, Cuda
1297             // and OpenCL encourage Z to be the smallest dimension.
1298             if (testSizes[i][2] > 1024)
1299                 continue;
1300 
1301             lastTestSizeArg = testSizes[i];
1302             final int seedForTestExecution = seedForPickingTestSizes + 1 + i*maxSeedsPerTest;
1303             pass &= run(td, RS, s, seedForTestExecution, lastTestSizeArg);
1304         }
1305 
1306         return pass;
1307     }
1308 
1309     private final TestDescription[] performanceTests = {
1310         new TestDescription("addint1D", this::addint1D, 0, new int[]{100000 << 10}),
1311         new TestDescription("addint2D", this::addint2D, 1, new int[]{450 << 5, 225 << 5}),
1312         new TestDescription("addint3D", this::addint3D, 2, new int[]{37 << 3, 48 << 3, 49 << 3}),
1313         new TestDescription("findMinAndMax", this::findMinAndMax, 3, new int[]{100000 << 9}),
1314         new TestDescription("fz", this::fz, 4, new int[]{100000 << 10}),
1315         new TestDescription("fz2", this::fz2, 5, new int[]{225 << 5, 450 << 5}),
1316         new TestDescription("fz3", this::fz3, 6, new int[]{59 << 3, 48 << 3, 37 << 3}),
1317         new TestDescription("histogram", this::histogram, 7, new int[]{100000 << 10}),
1318         // might want to add: new TestDescription("mode", this::mode, 8, new int[]{100000}),
1319         new TestDescription("sumgcd", this::sumgcd, 9, new int[]{1 << 21})
1320     };
1321 
runPerformanceQuick(RenderScript RS, ScriptC_reduce s)1322     private boolean runPerformanceQuick(RenderScript RS, ScriptC_reduce s) {
1323         boolean pass = true;
1324 
1325         for (TestDescription td : performanceTests) {
1326             pass &= run(td, RS, s, maxSeedsPerTest * td.seed, td.defSize);
1327         }
1328 
1329         return pass;
1330     }
1331 
runCorrectnessPatterns(RenderScript RS, ScriptC_reduce s)1332     private boolean runCorrectnessPatterns(RenderScript RS, ScriptC_reduce s) {
1333         // Test some very specific usage patterns.
1334         boolean pass = true;
1335 
1336         pass &= patternDuplicateAnonymousResult(RS, s);
1337         pass &= patternInterleavedReduce(RS, s);
1338         pass &= patternRedundantGet(RS, s);
1339 
1340         return pass;
1341     }
1342 
run()1343     public void run() {
1344         RenderScript pRS = RenderScript.create(mCtx);
1345         ScriptC_reduce s = new ScriptC_reduce(pRS);
1346         s.set_negInf(Float.NEGATIVE_INFINITY);
1347         s.set_posInf(Float.POSITIVE_INFINITY);
1348 
1349         boolean pass = true;
1350 
1351         pass &= runCorrectnessPatterns(pRS, s);
1352         pass &= runCorrectnessQuick(pRS, s);
1353         pass &= runCorrectness(pRS, s);
1354         // pass &= runPerformanceQuick(pRS, s);
1355 
1356         pRS.finish();
1357         pRS.destroy();
1358 
1359         Log.i(TAG, pass ? "PASSED" : "FAILED");
1360         if (pass)
1361             passTest();
1362         else
1363             failTest();
1364     }
1365 }
1366 
1367 // TODO: Add machinery for easily running fuller (i.e., non-sparse) testing.
1368