1 #include <stdio.h>
2 
3 #define N 256
4 
5 unsigned long long reg_val_double[N];
6 
init_reg_val_double()7 void init_reg_val_double()
8 {
9    unsigned long c = 19650218UL;
10    int i;
11    reg_val_double[0]= c & 0xffffffffUL;
12    for (i = 1; i < N; i++) {
13       reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^
14                           (reg_val_double[i - 1] >> 30)) + i);
15    }
16 }
17 
18 
19 /* Make a copy of original array to prevent the unexpected changes by Atomic Add
20    Instructions */
21 unsigned long long reg_val_double_copy[N];
22 
copy_reg_val_double()23 void copy_reg_val_double()
24 {
25    int i;
26    for (i = 0; i < N; i++) {
27       reg_val_double_copy[i] = reg_val_double[i];
28    }
29 }
30 
31 /* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear
32    instructions. After executing each instruction we must check both memory
33    location and register value.
34 
35    1: Move arguments (offset and base address) to registers
36    2: Add offset and base address to make absolute address
37    3: Execute instruction
38    4: Move result from register ($t3)
39    5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses)
40 */
41 #define TEST1_32(instruction, offset,mem)                    \
42 {                                                            \
43    unsigned long out = 0;                                    \
44    unsigned long res_mem = 0;                                \
45    __asm__ volatile(                                         \
46      "move         $t0, %2"        "\n\t"                    \
47      "move         $t1, %3"        "\n\t"                    \
48      "daddu        $t0, $t1, $t0"  "\n\t"                    \
49      instruction " $t3, ($t0)"     "\n\t"                    \
50      "move         %0,  $t3"       "\n\t"                    \
51      "lw           %1,  0($t0)"    "\n\t"                    \
52      : "=&r" (out), "=&r"(res_mem)                           \
53      : "r" (mem) , "r" (offset)                              \
54      : "$12", "$13", "cc", "memory"                          \
55      );                                                      \
56    printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
57           instruction, offset, out, res_mem);                \
58 }
59 
60 #define TEST1_64(instruction, offset,mem)                     \
61 {                                                             \
62    unsigned long out = 0;                                     \
63    unsigned long res_mem = 0;                                 \
64    __asm__ volatile(                                          \
65      "move         $t0, %2"        "\n\t"                     \
66      "move         $t1, %3"        "\n\t"                     \
67      "daddu        $t0, $t1, $t0"  "\n\t"                     \
68      instruction " $t3, ($t0)"     "\n\t"                     \
69      "move         %0,  $t3"       "\n\t"                     \
70      "ld           %1,  0($t0)"    "\n\t"                     \
71      : "=&r" (out), "=&r"(res_mem)                            \
72      : "r" (mem) , "r" (offset)                               \
73      : "$12", "$13", "cc", "memory"                           \
74      );                                                       \
75    printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
76           instruction, offset, out, res_mem);                 \
77 }
78 
79 /* Test 2 macro is used for pop/dpop/baddu instructions. After executing each
80    instructions the macro performs following operations:
81 
82    1: Move arguments to registers
83    2: Execute instruction
84    3: Move result to register ($t3)
85 */
86 #define TEST2(instruction, RSVal, RTVal)                            \
87 {                                                                   \
88    unsigned long out;                                               \
89    __asm__ volatile(                                                \
90       "move $t1, %1"  "\n\t"                                        \
91       "move $t2, %2"  "\n\t"                                        \
92       instruction     "\n\t"                                        \
93       "move %0, $t3"  "\n\t"                                        \
94       : "=&r" (out)                                                 \
95       : "r" (RSVal), "r" (RTVal)                                    \
96       : "$12", "$13", "cc", "memory"                                \
97         );                                                          \
98    printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n",                 \
99           instruction, out, (long long) RSVal, (long long) RTVal);  \
100 }
101 
102 /* TEST3 macro is used for store atomic add and store atomic add doubleword
103    instructions. Following operations are performed by the test macro:
104 
105    1: Move arguments to the register
106    2: Add offset and base address to make absolute address
107    3: Execute instruction
108    4: Load memory data
109 */
110 #define TEST3(instruction, offset, mem, value)                   \
111 {                                                                \
112     unsigned long out = 0;                                       \
113     unsigned long outPre = 0;                                    \
114    __asm__ volatile(                                             \
115      "move         $t0, %2"        "\n\t"                        \
116      "move         $t1, %3"        "\n\t"                        \
117      "daddu        $t0, $t1, $t0"  "\n\t"                        \
118      "ld           %1,  0($t0)"    "\n\t"                        \
119      "move         $t2, %4"        "\n\t"                        \
120      instruction " $t2, ($t0)"     "\n\t"                        \
121      "ld           %0,  0($t0)"    "\n\t"                        \
122      : "=&r" (out), "=&r" (outPre)                               \
123      : "r" (mem) , "r" (offset), "r" (value)                     \
124      : "$12", "$13", "$14", "cc", "memory"                       \
125      );                                                          \
126      printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n",  \
127             instruction, value, outPre, out);                    \
128 }
129 
130 /* TEST4_32/64 is used for load atomic add/swap instructions. Following
131    operations are performed by macro after execution of each instruction:
132 
133    1: Move arguments to register.
134    2: Add offset and base address to make absolute address.
135    3: Execute instruction.
136    4: Move result to register.
137    5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit).
138 */
139 #define TEST4_32(instruction, offset, mem)                   \
140 {                                                            \
141     unsigned long out = 0;                                   \
142     unsigned long res_mem = 0;                               \
143    __asm__ volatile(                                         \
144       "move         $t0, %2"          "\n\t"                 \
145       "move         $t1, %3"          "\n\t"                 \
146       "daddu        $t0, $t0, $t1"    "\n\t"                 \
147       instruction " $t3, ($t0), $t1"  "\n\t"                 \
148       "move         %0,  $t3"         "\n\t"                 \
149       "lw           %1,  0($t0)"      "\n\t"                 \
150       : "=&r" (out), "=&r"(res_mem)                          \
151       : "r" (mem) , "r" (offset)                             \
152       : "$12", "$13", "cc", "memory"                         \
153      );                                                      \
154    printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
155           instruction, offset, out, res_mem);                \
156 }
157 
158 #define TEST4_64(instruction, offset, mem)                    \
159 {                                                             \
160     unsigned long out = 0;                                    \
161     unsigned long res_mem = 0;                                \
162    __asm__ volatile(                                          \
163       "move         $t0, %2"          "\n\t"                  \
164       "move         $t1, %3"          "\n\t"                  \
165       "daddu        $t0, $t0,   $t1"  "\n\t"                  \
166       instruction " $t3, ($t0), $t1"  "\n\t"                  \
167       "move         %0,  $t3"         "\n\t"                  \
168       "ld           %1,  0($t0)"      "\n\t"                  \
169      : "=&r" (out), "=&r"(res_mem)                            \
170      : "r" (mem) , "r" (offset)                               \
171      : "$12", "$13", "cc", "memory"                           \
172      );                                                       \
173    printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
174           instruction, offset, out, res_mem);                 \
175 }
176 
177 typedef enum {
178    BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD,
179    LAS, LASD, LAC, LACD
180 } cvm_op;
181 
main()182 int main()
183 {
184 #if (_MIPS_ARCH_OCTEON2)
185    init_reg_val_double();
186    int i,j;
187    cvm_op op;
188    for (op = BADDU; op <= LACD; op++) {
189       switch(op){
190          /* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */
191          case BADDU: {
192             for(i = 4; i < N; i += 4)
193                for(j = 4; j < N; j += 4)
194                   TEST2("baddu $t3, $t1, $t2", reg_val_double[i],
195                                                reg_val_double[j]);
196             break;
197          }
198          case POP: {  /* Count Ones in a Word - POP */
199             for(j = 4; j < N; j += 4)
200                TEST2("pop $t3, $t1", reg_val_double[j], 0);
201             break;
202          }
203          case DPOP: {  /* Count Ones in a Doubleword - DPOP */
204             for(j = 8; j < N; j += 8)
205                TEST2("dpop $t3, $t1", reg_val_double[j], 0);
206             break;
207          }
208          case SAA: {  /* Atomic Add Word - saa rt, (base). */
209             copy_reg_val_double();
210             for(j = 4; j < N; j += 4)
211                TEST3("saa", j, reg_val_double_copy, reg_val_double[j]);
212             break;
213          }
214          case SAAD: {  /* Atomic Add Double - saad rt, (base). */
215             copy_reg_val_double();
216             for(j = 8; j < N; j += 8)
217                TEST3("saad", j, reg_val_double_copy, reg_val_double[j]);
218             break;
219          }
220          case LAA: {  /* Load Atomic Add Word - laa rd, (base), rt. */
221             copy_reg_val_double();
222             for(j = 4; j < N; j += 4)
223                TEST4_32("laa", j, reg_val_double_copy);
224             break;
225          }
226          case LAAD: {  /* Load Atomic Add Double - laad rd, (base), rt */
227             copy_reg_val_double();
228             for(j = 8; j < N; j += 8)
229                TEST4_64("laad ", j, reg_val_double_copy);
230             break;
231          }
232          case LAW: {  /* Load Atomic Swap Word - law rd, (base), rt */
233             copy_reg_val_double();
234             for(j = 4; j < N; j += 4)
235                TEST4_32("law", j, reg_val_double_copy);
236             break;
237          }
238          case LAWD: {  /* Load Atomic Swap Double - lawd rd, (base), rt */
239             copy_reg_val_double();
240             for(j = 8; j < N; j += 8)
241                TEST4_64("lawd", j, reg_val_double_copy);
242             break;
243          }
244          case LAI: {  /* Load Atomic Increment Word - lai rd, (base) */
245             copy_reg_val_double();
246             for(i = 4; i < N; i += 4)
247                TEST1_32("lai", i, reg_val_double_copy);
248             break;
249          }
250          case LAID: {  /* Load Atomic Increment Double - laid rd, (base) */
251             copy_reg_val_double();
252             for(i = 8; i < N; i += 8)
253               TEST1_64("laid ", i, reg_val_double_copy);
254             break;
255          }
256          case LAD: {  /* Load Atomic Decrement Word - lad rd, (base) */
257             copy_reg_val_double();
258             for(i = 4; i < N; i += 4)
259                TEST1_32("lad", i, reg_val_double_copy);
260             break;
261          }
262          case LADD: {  /* Load Atomic Decrement Double - ladd rd, (base) */
263             copy_reg_val_double();
264             for(i = 8; i < N; i += 8)
265                TEST1_64("ladd",i, reg_val_double_copy);
266             break;
267          }
268          case LAS:{   /* Load Atomic Set Word - las rd, (base) */
269             copy_reg_val_double();
270             for(i = 4; i < N; i += 4)
271                TEST1_32("las",i, reg_val_double_copy);
272             break;
273          }
274          case LASD:{  /* Load Atomic Set Word - lasd rd, (base) */
275             copy_reg_val_double();
276             for(i = 8; i < N; i += 8)
277                TEST1_64("lasd",i, reg_val_double_copy);
278             break;
279          }
280          case LAC: {  /* Load Atomic Clear Word - lac rd, (base) */
281             copy_reg_val_double();
282             for(i = 4; i < N; i += 4)
283                TEST1_32("lac",i, reg_val_double_copy);
284             break;
285          }
286          case LACD: {  /* Load Atomic Clear Double - lacd rd, (base) */
287             copy_reg_val_double();
288             for(i = 8; i < N; i += 8)
289                TEST1_64("lacd",i, reg_val_double_copy);
290             break;
291          }
292          default:
293             printf("Nothing to be executed \n");
294       }
295    }
296 #endif
297    return 0;
298 }
299