1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19 
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "utils/Compat.h"
24 #include "memtest.h"
25 
26 // Bandwidth Class definitions.
27 class BandwidthBenchmark {
28 public:
BandwidthBenchmark()29     BandwidthBenchmark()
30         : _size(0),
31           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
32           _num_loops(DEFAULT_NUM_LOOPS) {}
~BandwidthBenchmark()33     virtual ~BandwidthBenchmark() {}
34 
run()35     bool run() {
36         if (_size == 0) {
37             return false;
38         }
39         if (!canRun()) {
40             return false;
41         }
42 
43         bench(_num_warm_loops);
44 
45         nsecs_t t = system_time();
46         bench(_num_loops);
47         t = system_time() - t;
48 
49         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50 
51         return true;
52     }
53 
canRun()54     bool canRun() { return !usesNeon() || isNeonSupported(); }
55 
56     virtual bool setSize(size_t size) = 0;
57 
58     virtual const char *getName() = 0;
59 
60     virtual bool verify() = 0;
61 
usesNeon()62     virtual bool usesNeon() { return false; }
63 
isNeonSupported()64     bool isNeonSupported() {
65 #if defined(__ARM_NEON__)
66         return true;
67 #else
68         return false;
69 #endif
70     }
71 
72     // Accessors/mutators.
mb_per_sec()73     double mb_per_sec() { return _mb_per_sec; }
num_warm_loops()74     size_t num_warm_loops() { return _num_warm_loops; }
num_loops()75     size_t num_loops() { return _num_loops; }
size()76     size_t size() { return _size; }
77 
set_num_warm_loops(size_t num_warm_loops)78     void set_num_warm_loops(size_t num_warm_loops) {
79         _num_warm_loops = num_warm_loops;
80     }
set_num_loops(size_t num_loops)81     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
82 
83     // Static constants
84     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
85     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
86 
87 protected:
88     virtual void bench(size_t num_loops) = 0;
89 
90     double _mb_per_sec;
91     size_t _size;
92     size_t _num_warm_loops;
93     size_t _num_loops;
94 
95 private:
96     // Static constants
97     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
98     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
99 };
100 
101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
102 public:
CopyBandwidthBenchmark()103     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
104 
setSize(size_t size)105     bool setSize(size_t size) {
106         if (_src) {
107            free(_src);
108            _src = NULL;
109         }
110         if (_dst) {
111             free(_dst);
112             _dst = NULL;
113         }
114 
115         if (size == 0) {
116             _size = DEFAULT_COPY_SIZE;
117         } else {
118             _size = size;
119         }
120 
121         _src = reinterpret_cast<char*>(memalign(64, _size));
122         if (!_src) {
123             perror("Failed to allocate memory for test.");
124             return false;
125         }
126         _dst = reinterpret_cast<char*>(memalign(64, _size));
127         if (!_dst) {
128             perror("Failed to allocate memory for test.");
129             return false;
130         }
131 
132         return true;
133     }
~CopyBandwidthBenchmark()134     virtual ~CopyBandwidthBenchmark() {
135         if (_src) {
136             free(_src);
137             _src = NULL;
138         }
139         if (_dst) {
140             free(_dst);
141             _dst = NULL;
142         }
143     }
144 
verify()145     bool verify() {
146         memset(_src, 0x23, _size);
147         memset(_dst, 0, _size);
148         bench(1);
149         if (memcmp(_src, _dst, _size) != 0) {
150             printf("Buffers failed to compare after one loop.\n");
151             return false;
152         }
153 
154         memset(_src, 0x23, _size);
155         memset(_dst, 0, _size);
156         _num_loops = 2;
157         bench(2);
158         if (memcmp(_src, _dst, _size) != 0) {
159             printf("Buffers failed to compare after two loops.\n");
160             return false;
161         }
162 
163         return true;
164     }
165 
166 protected:
167     char *_src;
168     char *_dst;
169 
170     static const unsigned int DEFAULT_COPY_SIZE = 8000;
171 };
172 
173 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
174 public:
CopyLdrdStrdBenchmark()175     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdrdStrdBenchmark()176     virtual ~CopyLdrdStrdBenchmark() {}
177 
getName()178     const char *getName() { return "ldrd/strd"; }
179 
180 protected:
181     // Copy using ldrd/strd instructions.
bench(size_t num_loops)182     void bench(size_t num_loops) {
183         asm volatile(
184             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
185 
186             "mov r0, %0\n"
187             "mov r1, %1\n"
188             "mov r2, %2\n"
189             "mov r3, %3\n"
190 
191             "0:\n"
192             "mov r4, r2, lsr #6\n"
193 
194             "1:\n"
195             "ldrd r6, r7, [r0]\n"
196             "strd r6, r7, [r1]\n"
197             "ldrd r6, r7, [r0, #8]\n"
198             "strd r6, r7, [r1, #8]\n"
199             "ldrd r6, r7, [r0, #16]\n"
200             "strd r6, r7, [r1, #16]\n"
201             "ldrd r6, r7, [r0, #24]\n"
202             "strd r6, r7, [r1, #24]\n"
203             "ldrd r6, r7, [r0, #32]\n"
204             "strd r6, r7, [r1, #32]\n"
205             "ldrd r6, r7, [r0, #40]\n"
206             "strd r6, r7, [r1, #40]\n"
207             "ldrd r6, r7, [r0, #48]\n"
208             "strd r6, r7, [r1, #48]\n"
209             "ldrd r6, r7, [r0, #56]\n"
210             "strd r6, r7, [r1, #56]\n"
211 
212             "add  r0, r0, #64\n"
213             "add  r1, r1, #64\n"
214             "subs r4, r4, #1\n"
215             "bgt 1b\n"
216 
217             "sub r0, r0, r2\n"
218             "sub r1, r1, r2\n"
219             "subs r3, r3, #1\n"
220             "bgt 0b\n"
221 
222             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
223         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
224     }
225 };
226 
227 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
228 public:
CopyLdmiaStmiaBenchmark()229     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdmiaStmiaBenchmark()230     virtual ~CopyLdmiaStmiaBenchmark() {}
231 
getName()232     const char *getName() { return "ldmia/stmia"; }
233 
234 protected:
235     // Copy using ldmia/stmia instructions.
bench(size_t num_loops)236     void bench(size_t num_loops) {
237         asm volatile(
238             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
239 
240             "mov r0, %0\n"
241             "mov r1, %1\n"
242             "mov r2, %2\n"
243             "mov r3, %3\n"
244 
245             "0:\n"
246             "mov r4, r2, lsr #6\n"
247 
248             "1:\n"
249             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
250             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251             "subs r4, r4, #1\n"
252             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
253             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
254             "bgt 1b\n"
255 
256             "sub r0, r0, r2\n"
257             "sub r1, r1, r2\n"
258             "subs r3, r3, #1\n"
259             "bgt 0b\n"
260 
261             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
262         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
263     }
264 };
265 
266 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
267 public:
CopyVld1Vst1Benchmark()268     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
~CopyVld1Vst1Benchmark()269     virtual ~CopyVld1Vst1Benchmark() {}
270 
getName()271     const char *getName() { return "vld1/vst1"; }
272 
usesNeon()273     bool usesNeon() { return true; }
274 
275 protected:
276     // Copy using vld1/vst1 instructions.
277 #if defined(__ARM_NEON__)
bench(size_t num_loops)278     void bench(size_t num_loops) {
279         asm volatile(
280             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
281 
282             "mov r0, %0\n"
283             "mov r1, %1\n"
284             "mov r2, %2\n"
285             "mov r3, %3\n"
286 
287             "0:\n"
288             "mov r4, r2, lsr #6\n"
289 
290             "1:\n"
291             "vld1.8 {d0-d3}, [r0]!\n"
292             "vld1.8 {d4-d7}, [r0]!\n"
293             "subs r4, r4, #1\n"
294             "vst1.8 {d0-d3}, [r1:128]!\n"
295             "vst1.8 {d4-d7}, [r1:128]!\n"
296             "bgt 1b\n"
297 
298             "sub r0, r0, r2\n"
299             "sub r1, r1, r2\n"
300             "subs r3, r3, #1\n"
301             "bgt 0b\n"
302 
303             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
304         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
305 #else
306     void bench(size_t) {
307 #endif
308     }
309 };
310 
311 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
312 public:
CopyVldrVstrBenchmark()313     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldrVstrBenchmark()314     virtual ~CopyVldrVstrBenchmark() {}
315 
getName()316     const char *getName() { return "vldr/vstr"; }
317 
usesNeon()318     bool usesNeon() { return true; }
319 
320 protected:
321     // Copy using vldr/vstr instructions.
322 #if defined(__ARM_NEON__)
bench(size_t num_loops)323     void bench(size_t num_loops) {
324         asm volatile(
325             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
326 
327             "mov r0, %0\n"
328             "mov r1, %1\n"
329             "mov r2, %2\n"
330             "mov r3, %3\n"
331 
332             "0:\n"
333             "mov r4, r2, lsr #6\n"
334 
335             "1:\n"
336             "vldr d0, [r0, #0]\n"
337             "subs r4, r4, #1\n"
338             "vldr d1, [r0, #8]\n"
339             "vstr d0, [r1, #0]\n"
340             "vldr d0, [r0, #16]\n"
341             "vstr d1, [r1, #8]\n"
342             "vldr d1, [r0, #24]\n"
343             "vstr d0, [r1, #16]\n"
344             "vldr d0, [r0, #32]\n"
345             "vstr d1, [r1, #24]\n"
346             "vldr d1, [r0, #40]\n"
347             "vstr d0, [r1, #32]\n"
348             "vldr d0, [r0, #48]\n"
349             "vstr d1, [r1, #40]\n"
350             "vldr d1, [r0, #56]\n"
351             "vstr d0, [r1, #48]\n"
352             "add r0, r0, #64\n"
353             "vstr d1, [r1, #56]\n"
354             "add r1, r1, #64\n"
355             "bgt 1b\n"
356 
357             "sub r0, r0, r2\n"
358             "sub r1, r1, r2\n"
359             "subs r3, r3, #1\n"
360             "bgt 0b\n"
361 
362             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
363         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
364 #else
365     void bench(size_t) {
366 #endif
367     }
368 };
369 
370 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
371 public:
CopyVldmiaVstmiaBenchmark()372     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldmiaVstmiaBenchmark()373     virtual ~CopyVldmiaVstmiaBenchmark() {}
374 
getName()375     const char *getName() { return "vldmia/vstmia"; }
376 
usesNeon()377     bool usesNeon() { return true; }
378 
379 protected:
380     // Copy using vldmia/vstmia instructions.
381 #if defined(__ARM_NEON__)
bench(size_t num_loops)382     void bench(size_t num_loops) {
383         asm volatile(
384             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
385 
386             "mov r0, %0\n"
387             "mov r1, %1\n"
388             "mov r2, %2\n"
389             "mov r3, %3\n"
390 
391             "0:\n"
392             "mov r4, r2, lsr #6\n"
393 
394             "1:\n"
395             "vldmia r0!, {d0-d7}\n"
396             "subs r4, r4, #1\n"
397             "vstmia r1!, {d0-d7}\n"
398             "bgt 1b\n"
399 
400             "sub r0, r0, r2\n"
401             "sub r1, r1, r2\n"
402             "subs r3, r3, #1\n"
403             "bgt 0b\n"
404 
405             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
406         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
407 #else
408     void bench(size_t) {
409 #endif
410     }
411 };
412 
413 class MemcpyBenchmark : public CopyBandwidthBenchmark {
414 public:
MemcpyBenchmark()415     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
~MemcpyBenchmark()416     virtual ~MemcpyBenchmark() {}
417 
getName()418     const char *getName() { return "memcpy"; }
419 
420 protected:
bench(size_t num_loops)421     void bench(size_t num_loops) {
422         for (size_t i = 0; i < num_loops; i++) {
423             memcpy(_dst, _src, _size);
424         }
425     }
426 };
427 
428 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
429 public:
SingleBufferBandwidthBenchmark()430     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
~SingleBufferBandwidthBenchmark()431     virtual ~SingleBufferBandwidthBenchmark() {
432         if (_buffer) {
433             free(_buffer);
434             _buffer = NULL;
435         }
436     }
437 
setSize(size_t size)438     bool setSize(size_t size) {
439         if (_buffer) {
440             free(_buffer);
441             _buffer = NULL;
442         }
443 
444         if (size == 0) {
445             _size = DEFAULT_SINGLE_BUFFER_SIZE;
446         } else {
447             _size = size;
448         }
449 
450         _buffer = reinterpret_cast<char*>(memalign(64, _size));
451         if (!_buffer) {
452             perror("Failed to allocate memory for test.");
453             return false;
454         }
455         memset(_buffer, 0, _size);
456 
457         return true;
458     }
459 
verify()460     bool verify() { return true; }
461 
462 protected:
463     char *_buffer;
464 
465     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
466 };
467 
468 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
469 public:
WriteBandwidthBenchmark()470     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
~WriteBandwidthBenchmark()471     virtual ~WriteBandwidthBenchmark() { }
472 
verify()473     bool verify() {
474         memset(_buffer, 0, _size);
475         bench(1);
476         for (size_t i = 0; i < _size; i++) {
477             if (_buffer[i] != 1) {
478                 printf("Buffer failed to compare after one loop.\n");
479                 return false;
480             }
481         }
482 
483         memset(_buffer, 0, _size);
484         bench(2);
485         for (size_t i = 0; i < _size; i++) {
486             if (_buffer[i] != 2) {
487                 printf("Buffer failed to compare after two loops.\n");
488                 return false;
489             }
490         }
491 
492         return true;
493     }
494 };
495 
496 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
497 public:
WriteStrdBenchmark()498     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
~WriteStrdBenchmark()499     virtual ~WriteStrdBenchmark() {}
500 
getName()501     const char *getName() { return "strd"; }
502 
503 protected:
504     // Write a given value using strd.
bench(size_t num_loops)505     void bench(size_t num_loops) {
506         asm volatile(
507             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
508 
509             "mov r0, %0\n"
510             "mov r1, %1\n"
511             "mov r2, %2\n"
512 
513             "mov r4, #0\n"
514             "mov r5, #0\n"
515 
516             "0:\n"
517             "mov r3, r1, lsr #5\n"
518 
519             "add r4, r4, #0x01010101\n"
520             "mov r5, r4\n"
521 
522             "1:\n"
523             "subs r3, r3, #1\n"
524             "strd r4, r5, [r0]\n"
525             "strd r4, r5, [r0, #8]\n"
526             "strd r4, r5, [r0, #16]\n"
527             "strd r4, r5, [r0, #24]\n"
528             "add  r0, r0, #32\n"
529             "bgt 1b\n"
530 
531             "sub r0, r0, r1\n"
532             "subs r2, r2, #1\n"
533             "bgt 0b\n"
534 
535             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
536           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
537     }
538 };
539 
540 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
541 public:
WriteStmiaBenchmark()542     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteStmiaBenchmark()543     virtual ~WriteStmiaBenchmark() {}
544 
getName()545     const char *getName() { return "stmia"; }
546 
547 protected:
548       // Write a given value using stmia.
bench(size_t num_loops)549       void bench(size_t num_loops) {
550           asm volatile(
551               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
552 
553               "mov r0, %0\n"
554               "mov r1, %1\n"
555               "mov r2, %2\n"
556 
557               "mov r4, #0\n"
558 
559               "0:\n"
560               "mov r3, r1, lsr #5\n"
561 
562               "add r4, r4, #0x01010101\n"
563               "mov r5, r4\n"
564               "mov r6, r4\n"
565               "mov r7, r4\n"
566               "mov r8, r4\n"
567               "mov r9, r4\n"
568               "mov r10, r4\n"
569               "mov r11, r4\n"
570 
571               "1:\n"
572               "subs r3, r3, #1\n"
573               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
574               "bgt 1b\n"
575 
576               "sub r0, r0, r1\n"
577               "subs r2, r2, #1\n"
578               "bgt 0b\n"
579 
580               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
581         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
582     }
583 };
584 
585 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
586 public:
WriteVst1Benchmark()587     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
~WriteVst1Benchmark()588     virtual ~WriteVst1Benchmark() {}
589 
getName()590     const char *getName() { return "vst1"; }
591 
usesNeon()592     bool usesNeon() { return true; }
593 
594 protected:
595     // Write a given value using vst.
596 #if defined(__ARM_NEON__)
bench(size_t num_loops)597     void bench(size_t num_loops) {
598         asm volatile(
599             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
600 
601             "mov r0, %0\n"
602             "mov r1, %1\n"
603             "mov r2, %2\n"
604             "mov r4, #0\n"
605 
606             "0:\n"
607             "mov r3, r1, lsr #5\n"
608 
609             "add r4, r4, #1\n"
610             "vdup.8 d0, r4\n"
611             "vmov d1, d0\n"
612             "vmov d2, d0\n"
613             "vmov d3, d0\n"
614 
615             "1:\n"
616             "subs r3, r3, #1\n"
617             "vst1.8 {d0-d3}, [r0:128]!\n"
618             "bgt 1b\n"
619 
620             "sub r0, r0, r1\n"
621             "subs r2, r2, #1\n"
622             "bgt 0b\n"
623 
624             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
625         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
626 #else
627     void bench(size_t) {
628 #endif
629     }
630 };
631 
632 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
633 public:
WriteVstrBenchmark()634     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstrBenchmark()635     virtual ~WriteVstrBenchmark() {}
636 
getName()637     const char *getName() { return "vstr"; }
638 
usesNeon()639     bool usesNeon() { return true; }
640 
641 protected:
642     // Write a given value using vst.
643 #if defined(__ARM_NEON__)
bench(size_t num_loops)644     void bench(size_t num_loops) {
645         asm volatile(
646             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
647 
648             "mov r0, %0\n"
649             "mov r1, %1\n"
650             "mov r2, %2\n"
651             "mov r4, #0\n"
652 
653             "0:\n"
654             "mov r3, r1, lsr #5\n"
655 
656             "add r4, r4, #1\n"
657             "vdup.8 d0, r4\n"
658             "vmov d1, d0\n"
659             "vmov d2, d0\n"
660             "vmov d3, d0\n"
661 
662             "1:\n"
663             "vstr d0, [r0, #0]\n"
664             "subs r3, r3, #1\n"
665             "vstr d1, [r0, #8]\n"
666             "vstr d0, [r0, #16]\n"
667             "vstr d1, [r0, #24]\n"
668             "add r0, r0, #32\n"
669             "bgt 1b\n"
670 
671             "sub r0, r0, r1\n"
672             "subs r2, r2, #1\n"
673             "bgt 0b\n"
674 
675             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
676         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
677 #else
678     void bench(size_t) {
679 #endif
680     }
681 };
682 
683 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
684 public:
WriteVstmiaBenchmark()685     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstmiaBenchmark()686     virtual ~WriteVstmiaBenchmark() {}
687 
getName()688     const char *getName() { return "vstmia"; }
689 
usesNeon()690     bool usesNeon() { return true; }
691 
692 protected:
693     // Write a given value using vstmia.
694 #if defined(__ARM_NEON__)
bench(size_t num_loops)695     void bench(size_t num_loops) {
696         asm volatile(
697             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
698 
699             "mov r0, %0\n"
700             "mov r1, %1\n"
701             "mov r2, %2\n"
702             "mov r4, #0\n"
703 
704             "0:\n"
705             "mov r3, r1, lsr #5\n"
706 
707             "add r4, r4, #1\n"
708             "vdup.8 d0, r4\n"
709             "vmov d1, d0\n"
710             "vmov d2, d0\n"
711             "vmov d3, d0\n"
712 
713             "1:\n"
714             "subs r3, r3, #1\n"
715             "vstmia r0!, {d0-d3}\n"
716             "bgt 1b\n"
717 
718             "sub r0, r0, r1\n"
719             "subs r2, r2, #1\n"
720             "bgt 0b\n"
721 
722             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
723         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
724 #else
725     void bench(size_t) {
726 #endif
727     }
728 };
729 
730 class MemsetBenchmark : public WriteBandwidthBenchmark {
731 public:
MemsetBenchmark()732     MemsetBenchmark() : WriteBandwidthBenchmark() { }
~MemsetBenchmark()733     virtual ~MemsetBenchmark() {}
734 
getName()735     const char *getName() { return "memset"; }
736 
737 protected:
bench(size_t num_loops)738     void bench(size_t num_loops) {
739         for (size_t i = 0; i < num_loops; i++) {
740             memset(_buffer, (i % 255) + 1, _size);
741         }
742     }
743 };
744 
745 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
746 public:
ReadLdrdBenchmark()747     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdrdBenchmark()748     virtual ~ReadLdrdBenchmark() {}
749 
getName()750     const char *getName() { return "ldrd"; }
751 
752 protected:
753     // Write a given value using strd.
bench(size_t num_loops)754     void bench(size_t num_loops) {
755         asm volatile(
756             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
757 
758             "mov r0, %0\n"
759             "mov r1, %1\n"
760             "mov r2, %2\n"
761 
762             "0:\n"
763             "mov r3, r1, lsr #5\n"
764 
765             "1:\n"
766             "subs r3, r3, #1\n"
767             "ldrd r4, r5, [r0]\n"
768             "ldrd r4, r5, [r0, #8]\n"
769             "ldrd r4, r5, [r0, #16]\n"
770             "ldrd r4, r5, [r0, #24]\n"
771             "add  r0, r0, #32\n"
772             "bgt 1b\n"
773 
774             "sub r0, r0, r1\n"
775             "subs r2, r2, #1\n"
776             "bgt 0b\n"
777 
778             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
779           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
780     }
781 };
782 
783 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
784 public:
ReadLdmiaBenchmark()785     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdmiaBenchmark()786     virtual ~ReadLdmiaBenchmark() {}
787 
getName()788     const char *getName() { return "ldmia"; }
789 
790 protected:
791       // Write a given value using stmia.
bench(size_t num_loops)792       void bench(size_t num_loops) {
793           asm volatile(
794               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
795 
796               "mov r0, %0\n"
797               "mov r1, %1\n"
798               "mov r2, %2\n"
799 
800               "0:\n"
801               "mov r3, r1, lsr #5\n"
802 
803               "1:\n"
804               "subs r3, r3, #1\n"
805               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
806               "bgt 1b\n"
807 
808               "sub r0, r0, r1\n"
809               "subs r2, r2, #1\n"
810               "bgt 0b\n"
811 
812               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
813         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
814     }
815 };
816 
817 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
818 public:
ReadVld1Benchmark()819     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVld1Benchmark()820     virtual ~ReadVld1Benchmark() {}
821 
getName()822     const char *getName() { return "vld1"; }
823 
usesNeon()824     bool usesNeon() { return true; }
825 
826 protected:
827     // Write a given value using vst.
828 #if defined(__ARM_NEON__)
bench(size_t num_loops)829     void bench(size_t num_loops) {
830         asm volatile(
831             "stmfd sp!, {r0,r1,r2,r3}\n"
832 
833             "mov r0, %0\n"
834             "mov r1, %1\n"
835             "mov r2, %2\n"
836 
837             "0:\n"
838             "mov r3, r1, lsr #5\n"
839 
840             "1:\n"
841             "subs r3, r3, #1\n"
842             "vld1.8 {d0-d3}, [r0:128]!\n"
843             "bgt 1b\n"
844 
845             "sub r0, r0, r1\n"
846             "subs r2, r2, #1\n"
847             "bgt 0b\n"
848 
849             "ldmfd sp!, {r0,r1,r2,r3}\n"
850         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
851 #else
852     void bench(size_t) {
853 #endif
854     }
855 };
856 
857 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
858 public:
ReadVldrBenchmark()859     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldrBenchmark()860     virtual ~ReadVldrBenchmark() {}
861 
getName()862     const char *getName() { return "vldr"; }
863 
usesNeon()864     bool usesNeon() { return true; }
865 
866 protected:
867     // Write a given value using vst.
868 #if defined(__ARM_NEON__)
bench(size_t num_loops)869     void bench(size_t num_loops) {
870         asm volatile(
871             "stmfd sp!, {r0,r1,r2,r3}\n"
872 
873             "mov r0, %0\n"
874             "mov r1, %1\n"
875             "mov r2, %2\n"
876 
877             "0:\n"
878             "mov r3, r1, lsr #5\n"
879 
880             "1:\n"
881             "vldr d0, [r0, #0]\n"
882             "subs r3, r3, #1\n"
883             "vldr d1, [r0, #8]\n"
884             "vldr d0, [r0, #16]\n"
885             "vldr d1, [r0, #24]\n"
886             "add r0, r0, #32\n"
887             "bgt 1b\n"
888 
889             "sub r0, r0, r1\n"
890             "subs r2, r2, #1\n"
891             "bgt 0b\n"
892 
893             "ldmfd sp!, {r0,r1,r2,r3}\n"
894         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
895 #else
896     void bench(size_t) {
897 #endif
898     }
899 };
900 
901 
902 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
903 public:
ReadVldmiaBenchmark()904     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldmiaBenchmark()905     virtual ~ReadVldmiaBenchmark() {}
906 
getName()907     const char *getName() { return "vldmia"; }
908 
usesNeon()909     bool usesNeon() { return true; }
910 
911 protected:
912     // Write a given value using vstmia.
913 #if defined(__ARM_NEON__)
bench(size_t num_loops)914     void bench(size_t num_loops) {
915         asm volatile(
916             "stmfd sp!, {r0,r1,r2,r3}\n"
917 
918             "mov r0, %0\n"
919             "mov r1, %1\n"
920             "mov r2, %2\n"
921 
922             "0:\n"
923             "mov r3, r1, lsr #5\n"
924 
925             "1:\n"
926             "subs r3, r3, #1\n"
927             "vldmia r0!, {d0-d3}\n"
928             "bgt 1b\n"
929 
930             "sub r0, r0, r1\n"
931             "subs r2, r2, #1\n"
932             "bgt 0b\n"
933 
934             "ldmfd sp!, {r0,r1,r2,r3}\n"
935         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
936 #else
937     void bench(size_t) {
938 #endif
939     }
940 };
941 
942 #endif  // __BANDWIDTH_H__
943