1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "examples.h"
28 
29 // Macro to compute the number of elements in a vector.
30 #define ARRAY_SIZE(Array) (sizeof(Array) / sizeof((Array)[0]))
31 #define BUF_SIZE (4096)
32 #define __ masm->
33 
34 /*
35  * This example adds two vectors with 1-byte elements using NEON instructions,
36  * and returns the results in the first vector.
37  */
GenerateAdd2Vectors(MacroAssembler * masm)38 void GenerateAdd2Vectors(MacroAssembler* masm) {
39   // void add2_vectors(uint8_t *vec*, const uint8_t *vecB, unsigned size)
40   // Argument locations:
41   //    vecA (pointer) -> x0
42   //    vecB (pointer) -> x1
43   //    size (integer) -> w2
44   // Result returned in vecA.
45 
46   Label loop16, loopr, end;
47 
48   // Loop to add vector elements in 16-byte chunks.
49   __ Bind(&loop16);
50 
51   // Handle vectors smaller than 16-bytes in the remainder loop.
52   __ Cmp(w2, 16);
53   __ B(lo, &loopr);
54   __ Sub(w2, w2, 16);
55 
56   // Add vectors in 16-byte chunks.
57   __ Ld1(v0.V16B(), MemOperand(x0));
58   __ Ld1(v1.V16B(), MemOperand(x1, 16, PostIndex));
59   __ Add(v0.V16B(), v0.V16B(), v1.V16B());
60   __ St1(v0.V16B(), MemOperand(x0, 16, PostIndex));
61 
62   __ B(&loop16);
63 
64   // Loop to add the remaining vector elements.
65   __ Bind(&loopr);
66 
67   // If there are no more vector elements to process, then exit.
68   __ Cbz(w2, &end);
69   __ Sub(w2, w2, 1);
70 
71   // Add remaining vector elements in 1-byte chunks.
72   __ Ldrb(w5, MemOperand(x0));
73   __ Ldrb(w6, MemOperand(x1, 1, PostIndex));
74   __ Add(w5, w5, w6);
75   __ Strb(w5, MemOperand(x0, 1, PostIndex));
76 
77   __ B(&loopr);
78 
79   __ Bind(&end);
80 
81   __ Ret();
82 }
83 
84 
PrintVector(const uint8_t * vec,unsigned num)85 void PrintVector(const uint8_t *vec, unsigned num) {
86   unsigned i;
87   printf("( ");
88   if (num > 0) {
89     for (i = 0; i < num - 1; ++i) {
90       printf("%d, ", vec[i]);
91     }
92     printf("%d", vec[i]);
93   }
94   printf(" )\n");
95 }
96 
97 
98 #ifndef TEST_EXAMPLES
main(void)99 int main(void) {
100   // Create and initialize the assembler.
101   byte assm_buf[BUF_SIZE];
102   MacroAssembler masm(assm_buf, BUF_SIZE);
103 
104   // Generate native code for the example function.
105   Label add2_vectors;
106   masm.Bind(&add2_vectors);
107   GenerateAdd2Vectors(&masm);
108   masm.FinalizeCode();
109 
110   // Initialize input data for the example function.
111   uint8_t vecA[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
112                     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
113                     16, 17, 18, 19, 20};
114   uint8_t vecB[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
115                     30, 31,
116                     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
117                     30, 31,
118                     32, 33, 34, 35, 36};
119   uint8_t vecC[ARRAY_SIZE(vecA)];
120 
121   // Check whether the number of elements in both vectors match.
122   assert(ARRAY_SIZE(vecA) == ARRAY_SIZE(vecB));
123 
124   // Compute the result in C.
125   for (unsigned i = 0; i < ARRAY_SIZE(vecA); i++) {
126     vecC[i] = vecA[i] + vecB[i];
127   }
128 
129 #ifdef USE_SIMULATOR
130   uintptr_t vecA_addr = reinterpret_cast<uintptr_t>(vecA);
131   uintptr_t vecB_addr = reinterpret_cast<uintptr_t>(vecB);
132 
133   // Configure register environment in the simulator.
134   Decoder decoder;
135   Simulator simulator(&decoder);
136   simulator.set_xreg(0, vecA_addr);
137   simulator.set_xreg(1, vecB_addr);
138   simulator.set_xreg(2, ARRAY_SIZE(vecA));
139   PrintVector(vecA, ARRAY_SIZE(vecA));
140   printf(" +\n");
141   PrintVector(vecB, ARRAY_SIZE(vecB));
142 
143   // Run the example function in the simulator.
144   simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&add2_vectors));
145   printf(" =\n");
146   PrintVector(vecA, ARRAY_SIZE(vecA));
147 #else
148   // Placeholder to run test natively.
149   printf("Running tests natively is not supported yet.\n");
150   return 0;
151 #endif  // USE_SIMULATOR
152   // Check that the computed value in NEON matches the C version.
153   for (unsigned i = 0; i < ARRAY_SIZE(vecA); i++) {
154     assert(vecC[i] == vecA[i]);
155   }
156 
157   return 0;
158 }
159 #endif  // TEST_EXAMPLES
160