1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19 
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "utils/Compat.h"
24 #include "memtest.h"
25 
26 // Bandwidth Class definitions.
27 class BandwidthBenchmark {
28 public:
BandwidthBenchmark()29     BandwidthBenchmark()
30         : _size(0),
31           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
32           _num_loops(DEFAULT_NUM_LOOPS) {}
~BandwidthBenchmark()33     virtual ~BandwidthBenchmark() {}
34 
run()35     bool run() {
36         if (_size == 0) {
37             return false;
38         }
39         if (!canRun()) {
40             return false;
41         }
42 
43         bench(_num_warm_loops);
44 
45         nsecs_t t = system_time();
46         bench(_num_loops);
47         t = system_time() - t;
48 
49         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50 
51         return true;
52     }
53 
canRun()54     bool canRun() { return !usesNeon() || isNeonSupported(); }
55 
56     virtual bool setSize(size_t size) = 0;
57 
58     virtual const char *getName() = 0;
59 
60     virtual bool verify() = 0;
61 
usesNeon()62     virtual bool usesNeon() { return false; }
63 
isNeonSupported()64     bool isNeonSupported() {
65 #if defined(__ARM_NEON__)
66         return true;
67 #else
68         return false;
69 #endif
70     }
71 
72     // Accessors/mutators.
mb_per_sec()73     double mb_per_sec() { return _mb_per_sec; }
num_warm_loops()74     size_t num_warm_loops() { return _num_warm_loops; }
num_loops()75     size_t num_loops() { return _num_loops; }
size()76     size_t size() { return _size; }
77 
set_num_warm_loops(size_t num_warm_loops)78     void set_num_warm_loops(size_t num_warm_loops) {
79         _num_warm_loops = num_warm_loops;
80     }
set_num_loops(size_t num_loops)81     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
82 
83     // Static constants
84     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
85     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
86 
87 protected:
88     virtual void bench(size_t num_loops) = 0;
89 
90     double _mb_per_sec;
91     size_t _size;
92     size_t _num_warm_loops;
93     size_t _num_loops;
94 
95 private:
96     // Static constants
97     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
98     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
99 };
100 
101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
102 public:
CopyBandwidthBenchmark()103     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
104 
setSize(size_t size)105     bool setSize(size_t size) {
106         if (_src) {
107            free(_src);
108         }
109         if (_dst) {
110             free(_dst);
111         }
112 
113         if (size == 0) {
114             _size = DEFAULT_COPY_SIZE;
115         } else {
116             _size = size;
117         }
118 
119         _src = reinterpret_cast<char*>(memalign(64, _size));
120         if (!_src) {
121             perror("Failed to allocate memory for test.");
122             return false;
123         }
124         _dst = reinterpret_cast<char*>(memalign(64, _size));
125         if (!_dst) {
126             perror("Failed to allocate memory for test.");
127             return false;
128         }
129 
130         return true;
131     }
~CopyBandwidthBenchmark()132     virtual ~CopyBandwidthBenchmark() {
133         if (_src) {
134             free(_src);
135             _src = NULL;
136         }
137         if (_dst) {
138             free(_dst);
139             _dst = NULL;
140         }
141     }
142 
verify()143     bool verify() {
144         memset(_src, 0x23, _size);
145         memset(_dst, 0, _size);
146         bench(1);
147         if (memcmp(_src, _dst, _size) != 0) {
148             printf("Buffers failed to compare after one loop.\n");
149             return false;
150         }
151 
152         memset(_src, 0x23, _size);
153         memset(_dst, 0, _size);
154         _num_loops = 2;
155         bench(2);
156         if (memcmp(_src, _dst, _size) != 0) {
157             printf("Buffers failed to compare after two loops.\n");
158             return false;
159         }
160 
161         return true;
162     }
163 
164 protected:
165     char *_src;
166     char *_dst;
167 
168     static const unsigned int DEFAULT_COPY_SIZE = 8000;
169 };
170 
171 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
172 public:
CopyLdrdStrdBenchmark()173     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdrdStrdBenchmark()174     virtual ~CopyLdrdStrdBenchmark() {}
175 
getName()176     const char *getName() { return "ldrd/strd"; }
177 
178 protected:
179     // Copy using ldrd/strd instructions.
bench(size_t num_loops)180     void bench(size_t num_loops) {
181         asm volatile(
182             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
183 
184             "mov r0, %0\n"
185             "mov r1, %1\n"
186             "mov r2, %2\n"
187             "mov r3, %3\n"
188 
189             "0:\n"
190             "mov r4, r2, lsr #6\n"
191 
192             "1:\n"
193             "ldrd r6, r7, [r0]\n"
194             "strd r6, r7, [r1]\n"
195             "ldrd r6, r7, [r0, #8]\n"
196             "strd r6, r7, [r1, #8]\n"
197             "ldrd r6, r7, [r0, #16]\n"
198             "strd r6, r7, [r1, #16]\n"
199             "ldrd r6, r7, [r0, #24]\n"
200             "strd r6, r7, [r1, #24]\n"
201             "ldrd r6, r7, [r0, #32]\n"
202             "strd r6, r7, [r1, #32]\n"
203             "ldrd r6, r7, [r0, #40]\n"
204             "strd r6, r7, [r1, #40]\n"
205             "ldrd r6, r7, [r0, #48]\n"
206             "strd r6, r7, [r1, #48]\n"
207             "ldrd r6, r7, [r0, #56]\n"
208             "strd r6, r7, [r1, #56]\n"
209 
210             "add  r0, r0, #64\n"
211             "add  r1, r1, #64\n"
212             "subs r4, r4, #1\n"
213             "bgt 1b\n"
214 
215             "sub r0, r0, r2\n"
216             "sub r1, r1, r2\n"
217             "subs r3, r3, #1\n"
218             "bgt 0b\n"
219 
220             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
221         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
222     }
223 };
224 
225 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
226 public:
CopyLdmiaStmiaBenchmark()227     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdmiaStmiaBenchmark()228     virtual ~CopyLdmiaStmiaBenchmark() {}
229 
getName()230     const char *getName() { return "ldmia/stmia"; }
231 
232 protected:
233     // Copy using ldmia/stmia instructions.
bench(size_t num_loops)234     void bench(size_t num_loops) {
235         asm volatile(
236             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
237 
238             "mov r0, %0\n"
239             "mov r1, %1\n"
240             "mov r2, %2\n"
241             "mov r3, %3\n"
242 
243             "0:\n"
244             "mov r4, r2, lsr #6\n"
245 
246             "1:\n"
247             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
249             "subs r4, r4, #1\n"
250             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
252             "bgt 1b\n"
253 
254             "sub r0, r0, r2\n"
255             "sub r1, r1, r2\n"
256             "subs r3, r3, #1\n"
257             "bgt 0b\n"
258 
259             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
260         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
261     }
262 };
263 
264 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
265 public:
CopyVld1Vst1Benchmark()266     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
~CopyVld1Vst1Benchmark()267     virtual ~CopyVld1Vst1Benchmark() {}
268 
getName()269     const char *getName() { return "vld1/vst1"; }
270 
usesNeon()271     bool usesNeon() { return true; }
272 
273 protected:
274     // Copy using vld1/vst1 instructions.
bench(size_t num_loops)275     void bench(size_t num_loops) {
276 #if defined(__ARM_NEON__)
277         asm volatile(
278             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
279 
280             "mov r0, %0\n"
281             "mov r1, %1\n"
282             "mov r2, %2\n"
283             "mov r3, %3\n"
284 
285             "0:\n"
286             "mov r4, r2, lsr #6\n"
287 
288             "1:\n"
289             "vld1.8 {d0-d3}, [r0]!\n"
290             "vld1.8 {d4-d7}, [r0]!\n"
291             "subs r4, r4, #1\n"
292             "vst1.8 {d0-d3}, [r1:128]!\n"
293             "vst1.8 {d4-d7}, [r1:128]!\n"
294             "bgt 1b\n"
295 
296             "sub r0, r0, r2\n"
297             "sub r1, r1, r2\n"
298             "subs r3, r3, #1\n"
299             "bgt 0b\n"
300 
301             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
302         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
303 #endif
304     }
305 };
306 
307 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
308 public:
CopyVldrVstrBenchmark()309     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldrVstrBenchmark()310     virtual ~CopyVldrVstrBenchmark() {}
311 
getName()312     const char *getName() { return "vldr/vstr"; }
313 
usesNeon()314     bool usesNeon() { return true; }
315 
316 protected:
317     // Copy using vldr/vstr instructions.
bench(size_t num_loops)318     void bench(size_t num_loops) {
319 #if defined(__ARM_NEON__)
320         asm volatile(
321             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
322 
323             "mov r0, %0\n"
324             "mov r1, %1\n"
325             "mov r2, %2\n"
326             "mov r3, %3\n"
327 
328             "0:\n"
329             "mov r4, r2, lsr #6\n"
330 
331             "1:\n"
332             "vldr d0, [r0, #0]\n"
333             "subs r4, r4, #1\n"
334             "vldr d1, [r0, #8]\n"
335             "vstr d0, [r1, #0]\n"
336             "vldr d0, [r0, #16]\n"
337             "vstr d1, [r1, #8]\n"
338             "vldr d1, [r0, #24]\n"
339             "vstr d0, [r1, #16]\n"
340             "vldr d0, [r0, #32]\n"
341             "vstr d1, [r1, #24]\n"
342             "vldr d1, [r0, #40]\n"
343             "vstr d0, [r1, #32]\n"
344             "vldr d0, [r0, #48]\n"
345             "vstr d1, [r1, #40]\n"
346             "vldr d1, [r0, #56]\n"
347             "vstr d0, [r1, #48]\n"
348             "add r0, r0, #64\n"
349             "vstr d1, [r1, #56]\n"
350             "add r1, r1, #64\n"
351             "bgt 1b\n"
352 
353             "sub r0, r0, r2\n"
354             "sub r1, r1, r2\n"
355             "subs r3, r3, #1\n"
356             "bgt 0b\n"
357 
358             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
359         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
360 #endif
361     }
362 };
363 
364 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
365 public:
CopyVldmiaVstmiaBenchmark()366     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldmiaVstmiaBenchmark()367     virtual ~CopyVldmiaVstmiaBenchmark() {}
368 
getName()369     const char *getName() { return "vldmia/vstmia"; }
370 
usesNeon()371     bool usesNeon() { return true; }
372 
373 protected:
374     // Copy using vldmia/vstmia instructions.
bench(size_t num_loops)375     void bench(size_t num_loops) {
376 #if defined(__ARM_NEON__)
377         asm volatile(
378             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
379 
380             "mov r0, %0\n"
381             "mov r1, %1\n"
382             "mov r2, %2\n"
383             "mov r3, %3\n"
384 
385             "0:\n"
386             "mov r4, r2, lsr #6\n"
387 
388             "1:\n"
389             "vldmia r0!, {d0-d7}\n"
390             "subs r4, r4, #1\n"
391             "vstmia r1!, {d0-d7}\n"
392             "bgt 1b\n"
393 
394             "sub r0, r0, r2\n"
395             "sub r1, r1, r2\n"
396             "subs r3, r3, #1\n"
397             "bgt 0b\n"
398 
399             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
400         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
401 #endif
402     }
403 };
404 
405 class MemcpyBenchmark : public CopyBandwidthBenchmark {
406 public:
MemcpyBenchmark()407     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
~MemcpyBenchmark()408     virtual ~MemcpyBenchmark() {}
409 
getName()410     const char *getName() { return "memcpy"; }
411 
412 protected:
bench(size_t num_loops)413     void bench(size_t num_loops) {
414         for (size_t i = 0; i < num_loops; i++) {
415             memcpy(_dst, _src, _size);
416         }
417     }
418 };
419 
420 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
421 public:
SingleBufferBandwidthBenchmark()422     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
~SingleBufferBandwidthBenchmark()423     virtual ~SingleBufferBandwidthBenchmark() {
424         if (_buffer) {
425             free(_buffer);
426             _buffer = NULL;
427         }
428     }
429 
setSize(size_t size)430     bool setSize(size_t size) {
431         if (_buffer) {
432             free(_buffer);
433             _buffer = NULL;
434         }
435 
436         if (_size == 0) {
437             _size = DEFAULT_SINGLE_BUFFER_SIZE;
438         } else {
439             _size = size;
440         }
441 
442         _buffer = reinterpret_cast<char*>(memalign(64, _size));
443         if (!_buffer) {
444             perror("Failed to allocate memory for test.");
445             return false;
446         }
447         memset(_buffer, 0, _size);
448 
449         return true;
450     }
451 
verify()452     bool verify() { return true; }
453 
454 protected:
455     char *_buffer;
456 
457     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
458 };
459 
460 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
461 public:
WriteBandwidthBenchmark()462     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
~WriteBandwidthBenchmark()463     virtual ~WriteBandwidthBenchmark() { }
464 
verify()465     bool verify() {
466         memset(_buffer, 0, _size);
467         bench(1);
468         for (size_t i = 0; i < _size; i++) {
469             if (_buffer[i] != 1) {
470                 printf("Buffer failed to compare after one loop.\n");
471                 return false;
472             }
473         }
474 
475         memset(_buffer, 0, _size);
476         bench(2);
477         for (size_t i = 0; i < _size; i++) {
478             if (_buffer[i] != 2) {
479                 printf("Buffer failed to compare after two loops.\n");
480                 return false;
481             }
482         }
483 
484         return true;
485     }
486 };
487 
488 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
489 public:
WriteStrdBenchmark()490     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
~WriteStrdBenchmark()491     virtual ~WriteStrdBenchmark() {}
492 
getName()493     const char *getName() { return "strd"; }
494 
495 protected:
496     // Write a given value using strd.
bench(size_t num_loops)497     void bench(size_t num_loops) {
498         asm volatile(
499             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
500 
501             "mov r0, %0\n"
502             "mov r1, %1\n"
503             "mov r2, %2\n"
504 
505             "mov r4, #0\n"
506             "mov r5, #0\n"
507 
508             "0:\n"
509             "mov r3, r1, lsr #5\n"
510 
511             "add r4, r4, #0x01010101\n"
512             "mov r5, r4\n"
513 
514             "1:\n"
515             "subs r3, r3, #1\n"
516             "strd r4, r5, [r0]\n"
517             "strd r4, r5, [r0, #8]\n"
518             "strd r4, r5, [r0, #16]\n"
519             "strd r4, r5, [r0, #24]\n"
520             "add  r0, r0, #32\n"
521             "bgt 1b\n"
522 
523             "sub r0, r0, r1\n"
524             "subs r2, r2, #1\n"
525             "bgt 0b\n"
526 
527             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
528           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
529     }
530 };
531 
532 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
533 public:
WriteStmiaBenchmark()534     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteStmiaBenchmark()535     virtual ~WriteStmiaBenchmark() {}
536 
getName()537     const char *getName() { return "stmia"; }
538 
539 protected:
540       // Write a given value using stmia.
bench(size_t num_loops)541       void bench(size_t num_loops) {
542           asm volatile(
543               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
544 
545               "mov r0, %0\n"
546               "mov r1, %1\n"
547               "mov r2, %2\n"
548 
549               "mov r4, #0\n"
550 
551               "0:\n"
552               "mov r3, r1, lsr #5\n"
553 
554               "add r4, r4, #0x01010101\n"
555               "mov r5, r4\n"
556               "mov r6, r4\n"
557               "mov r7, r4\n"
558               "mov r8, r4\n"
559               "mov r9, r4\n"
560               "mov r10, r4\n"
561               "mov r11, r4\n"
562 
563               "1:\n"
564               "subs r3, r3, #1\n"
565               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
566               "bgt 1b\n"
567 
568               "sub r0, r0, r1\n"
569               "subs r2, r2, #1\n"
570               "bgt 0b\n"
571 
572               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
573         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
574     }
575 };
576 
577 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
578 public:
WriteVst1Benchmark()579     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
~WriteVst1Benchmark()580     virtual ~WriteVst1Benchmark() {}
581 
getName()582     const char *getName() { return "vst1"; }
583 
usesNeon()584     bool usesNeon() { return true; }
585 
586 protected:
587     // Write a given value using vst.
bench(size_t num_loops)588     void bench(size_t num_loops) {
589 #if defined(__ARM_NEON__)
590         asm volatile(
591             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
592 
593             "mov r0, %0\n"
594             "mov r1, %1\n"
595             "mov r2, %2\n"
596             "mov r4, #0\n"
597 
598             "0:\n"
599             "mov r3, r1, lsr #5\n"
600 
601             "add r4, r4, #1\n"
602             "vdup.8 d0, r4\n"
603             "vmov d1, d0\n"
604             "vmov d2, d0\n"
605             "vmov d3, d0\n"
606 
607             "1:\n"
608             "subs r3, r3, #1\n"
609             "vst1.8 {d0-d3}, [r0:128]!\n"
610             "bgt 1b\n"
611 
612             "sub r0, r0, r1\n"
613             "subs r2, r2, #1\n"
614             "bgt 0b\n"
615 
616             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
617         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
618 #endif
619     }
620 };
621 
622 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
623 public:
WriteVstrBenchmark()624     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstrBenchmark()625     virtual ~WriteVstrBenchmark() {}
626 
getName()627     const char *getName() { return "vstr"; }
628 
usesNeon()629     bool usesNeon() { return true; }
630 
631 protected:
632     // Write a given value using vst.
bench(size_t num_loops)633     void bench(size_t num_loops) {
634 #if defined(__ARM_NEON__)
635         asm volatile(
636             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
637 
638             "mov r0, %0\n"
639             "mov r1, %1\n"
640             "mov r2, %2\n"
641             "mov r4, #0\n"
642 
643             "0:\n"
644             "mov r3, r1, lsr #5\n"
645 
646             "add r4, r4, #1\n"
647             "vdup.8 d0, r4\n"
648             "vmov d1, d0\n"
649             "vmov d2, d0\n"
650             "vmov d3, d0\n"
651 
652             "1:\n"
653             "vstr d0, [r0, #0]\n"
654             "subs r3, r3, #1\n"
655             "vstr d1, [r0, #8]\n"
656             "vstr d0, [r0, #16]\n"
657             "vstr d1, [r0, #24]\n"
658             "add r0, r0, #32\n"
659             "bgt 1b\n"
660 
661             "sub r0, r0, r1\n"
662             "subs r2, r2, #1\n"
663             "bgt 0b\n"
664 
665             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
666         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
667 #endif
668     }
669 };
670 
671 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
672 public:
WriteVstmiaBenchmark()673     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstmiaBenchmark()674     virtual ~WriteVstmiaBenchmark() {}
675 
getName()676     const char *getName() { return "vstmia"; }
677 
usesNeon()678     bool usesNeon() { return true; }
679 
680 protected:
681     // Write a given value using vstmia.
bench(size_t num_loops)682     void bench(size_t num_loops) {
683 #if defined(__ARM_NEON__)
684         asm volatile(
685             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
686 
687             "mov r0, %0\n"
688             "mov r1, %1\n"
689             "mov r2, %2\n"
690             "mov r4, #0\n"
691 
692             "0:\n"
693             "mov r3, r1, lsr #5\n"
694 
695             "add r4, r4, #1\n"
696             "vdup.8 d0, r4\n"
697             "vmov d1, d0\n"
698             "vmov d2, d0\n"
699             "vmov d3, d0\n"
700 
701             "1:\n"
702             "subs r3, r3, #1\n"
703             "vstmia r0!, {d0-d3}\n"
704             "bgt 1b\n"
705 
706             "sub r0, r0, r1\n"
707             "subs r2, r2, #1\n"
708             "bgt 0b\n"
709 
710             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
711         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
712 #endif
713     }
714 };
715 
716 class MemsetBenchmark : public WriteBandwidthBenchmark {
717 public:
MemsetBenchmark()718     MemsetBenchmark() : WriteBandwidthBenchmark() { }
~MemsetBenchmark()719     virtual ~MemsetBenchmark() {}
720 
getName()721     const char *getName() { return "memset"; }
722 
723 protected:
bench(size_t num_loops)724     void bench(size_t num_loops) {
725         for (size_t i = 0; i < num_loops; i++) {
726             memset(_buffer, (i % 255) + 1, _size);
727         }
728     }
729 };
730 
731 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
732 public:
ReadLdrdBenchmark()733     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdrdBenchmark()734     virtual ~ReadLdrdBenchmark() {}
735 
getName()736     const char *getName() { return "ldrd"; }
737 
738 protected:
739     // Write a given value using strd.
bench(size_t num_loops)740     void bench(size_t num_loops) {
741         asm volatile(
742             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
743 
744             "mov r0, %0\n"
745             "mov r1, %1\n"
746             "mov r2, %2\n"
747 
748             "0:\n"
749             "mov r3, r1, lsr #5\n"
750 
751             "1:\n"
752             "subs r3, r3, #1\n"
753             "ldrd r4, r5, [r0]\n"
754             "ldrd r4, r5, [r0, #8]\n"
755             "ldrd r4, r5, [r0, #16]\n"
756             "ldrd r4, r5, [r0, #24]\n"
757             "add  r0, r0, #32\n"
758             "bgt 1b\n"
759 
760             "sub r0, r0, r1\n"
761             "subs r2, r2, #1\n"
762             "bgt 0b\n"
763 
764             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
765           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
766     }
767 };
768 
769 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
770 public:
ReadLdmiaBenchmark()771     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdmiaBenchmark()772     virtual ~ReadLdmiaBenchmark() {}
773 
getName()774     const char *getName() { return "ldmia"; }
775 
776 protected:
777       // Write a given value using stmia.
bench(size_t num_loops)778       void bench(size_t num_loops) {
779           asm volatile(
780               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
781 
782               "mov r0, %0\n"
783               "mov r1, %1\n"
784               "mov r2, %2\n"
785 
786               "0:\n"
787               "mov r3, r1, lsr #5\n"
788 
789               "1:\n"
790               "subs r3, r3, #1\n"
791               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
792               "bgt 1b\n"
793 
794               "sub r0, r0, r1\n"
795               "subs r2, r2, #1\n"
796               "bgt 0b\n"
797 
798               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
799         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
800     }
801 };
802 
803 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
804 public:
ReadVld1Benchmark()805     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVld1Benchmark()806     virtual ~ReadVld1Benchmark() {}
807 
getName()808     const char *getName() { return "vld1"; }
809 
usesNeon()810     bool usesNeon() { return true; }
811 
812 protected:
813     // Write a given value using vst.
bench(size_t num_loops)814     void bench(size_t num_loops) {
815 #if defined(__ARM_NEON__)
816         asm volatile(
817             "stmfd sp!, {r0,r1,r2,r3}\n"
818 
819             "mov r0, %0\n"
820             "mov r1, %1\n"
821             "mov r2, %2\n"
822 
823             "0:\n"
824             "mov r3, r1, lsr #5\n"
825 
826             "1:\n"
827             "subs r3, r3, #1\n"
828             "vld1.8 {d0-d3}, [r0:128]!\n"
829             "bgt 1b\n"
830 
831             "sub r0, r0, r1\n"
832             "subs r2, r2, #1\n"
833             "bgt 0b\n"
834 
835             "ldmfd sp!, {r0,r1,r2,r3}\n"
836         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
837 #endif
838     }
839 };
840 
841 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
842 public:
ReadVldrBenchmark()843     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldrBenchmark()844     virtual ~ReadVldrBenchmark() {}
845 
getName()846     const char *getName() { return "vldr"; }
847 
usesNeon()848     bool usesNeon() { return true; }
849 
850 protected:
851     // Write a given value using vst.
bench(size_t num_loops)852     void bench(size_t num_loops) {
853 #if defined(__ARM_NEON__)
854         asm volatile(
855             "stmfd sp!, {r0,r1,r2,r3}\n"
856 
857             "mov r0, %0\n"
858             "mov r1, %1\n"
859             "mov r2, %2\n"
860 
861             "0:\n"
862             "mov r3, r1, lsr #5\n"
863 
864             "1:\n"
865             "vldr d0, [r0, #0]\n"
866             "subs r3, r3, #1\n"
867             "vldr d1, [r0, #8]\n"
868             "vldr d0, [r0, #16]\n"
869             "vldr d1, [r0, #24]\n"
870             "add r0, r0, #32\n"
871             "bgt 1b\n"
872 
873             "sub r0, r0, r1\n"
874             "subs r2, r2, #1\n"
875             "bgt 0b\n"
876 
877             "ldmfd sp!, {r0,r1,r2,r3}\n"
878         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
879 #endif
880     }
881 };
882 
883 
884 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
885 public:
ReadVldmiaBenchmark()886     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldmiaBenchmark()887     virtual ~ReadVldmiaBenchmark() {}
888 
getName()889     const char *getName() { return "vldmia"; }
890 
usesNeon()891     bool usesNeon() { return true; }
892 
893 protected:
894     // Write a given value using vstmia.
bench(size_t num_loops)895     void bench(size_t num_loops) {
896 #if defined(__ARM_NEON__)
897         asm volatile(
898             "stmfd sp!, {r0,r1,r2,r3}\n"
899 
900             "mov r0, %0\n"
901             "mov r1, %1\n"
902             "mov r2, %2\n"
903 
904             "0:\n"
905             "mov r3, r1, lsr #5\n"
906 
907             "1:\n"
908             "subs r3, r3, #1\n"
909             "vldmia r0!, {d0-d3}\n"
910             "bgt 1b\n"
911 
912             "sub r0, r0, r1\n"
913             "subs r2, r2, #1\n"
914             "bgt 0b\n"
915 
916             "ldmfd sp!, {r0,r1,r2,r3}\n"
917         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
918 #endif
919     }
920 };
921 
922 #endif  // __BANDWIDTH_H__
923