1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: add_v16i8: 6 7 %1 = load <16 x i8>, <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <16 x i8>, <16 x i8>* %b 10 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 11 %3 = add <16 x i8> %1, %2 12 ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <16 x i8> %3, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size add_v16i8 18} 19 20define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 21 ; CHECK: add_v8i16: 22 23 %1 = load <8 x i16>, <8 x i16>* %a 24 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <8 x i16>, <8 x i16>* %b 26 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 27 %3 = add <8 x i16> %1, %2 28 ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <8 x i16> %3, <8 x i16>* %c 30 ; CHECK-DAG: st.h [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size add_v8i16 34} 35 36define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 37 ; CHECK: add_v4i32: 38 39 %1 = load <4 x i32>, <4 x i32>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x i32>, <4 x i32>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = add <4 x i32> %1, %2 44 ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x i32> %3, <4 x i32>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size add_v4i32 50} 51 52define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 53 ; CHECK: add_v2i64: 54 55 %1 = load <2 x i64>, <2 x i64>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x i64>, <2 x i64>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = add <2 x i64> %1, %2 60 ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x i64> %3, <2 x i64>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size add_v2i64 66} 67 68define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 69 ; CHECK: add_v16i8_i: 70 71 %1 = load <16 x i8>, <16 x i8>* %a 72 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 73 %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, 74 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 75 ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1 76 store <16 x i8> %2, <16 x i8>* %c 77 ; CHECK-DAG: st.b [[R3]], 0($4) 78 79 ret void 80 ; CHECK: .size add_v16i8_i 81} 82 83define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 84 ; CHECK: add_v8i16_i: 85 86 %1 = load <8 x i16>, <8 x i16>* %a 87 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 88 %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, 89 i16 1, i16 1, i16 1, i16 1> 90 ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1 91 store <8 x i16> %2, <8 x i16>* %c 92 ; CHECK-DAG: st.h [[R3]], 0($4) 93 94 ret void 95 ; CHECK: .size add_v8i16_i 96} 97 98define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 99 ; CHECK: add_v4i32_i: 100 101 %1 = load <4 x i32>, <4 x i32>* %a 102 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 103 %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 104 ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1 105 store <4 x i32> %2, <4 x i32>* %c 106 ; CHECK-DAG: st.w [[R3]], 0($4) 107 108 ret void 109 ; CHECK: .size add_v4i32_i 110} 111 112define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 113 ; CHECK: add_v2i64_i: 114 115 %1 = load <2 x i64>, <2 x i64>* %a 116 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 117 %2 = add <2 x i64> %1, <i64 1, i64 1> 118 ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1 119 store <2 x i64> %2, <2 x i64>* %c 120 ; CHECK-DAG: st.d [[R3]], 0($4) 121 122 ret void 123 ; CHECK: .size add_v2i64_i 124} 125 126define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 127 ; CHECK: sub_v16i8: 128 129 %1 = load <16 x i8>, <16 x i8>* %a 130 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 131 %2 = load <16 x i8>, <16 x i8>* %b 132 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 133 %3 = sub <16 x i8> %1, %2 134 ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 135 store <16 x i8> %3, <16 x i8>* %c 136 ; CHECK-DAG: st.b [[R3]], 0($4) 137 138 ret void 139 ; CHECK: .size sub_v16i8 140} 141 142define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 143 ; CHECK: sub_v8i16: 144 145 %1 = load <8 x i16>, <8 x i16>* %a 146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 147 %2 = load <8 x i16>, <8 x i16>* %b 148 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 149 %3 = sub <8 x i16> %1, %2 150 ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 151 store <8 x i16> %3, <8 x i16>* %c 152 ; CHECK-DAG: st.h [[R3]], 0($4) 153 154 ret void 155 ; CHECK: .size sub_v8i16 156} 157 158define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 159 ; CHECK: sub_v4i32: 160 161 %1 = load <4 x i32>, <4 x i32>* %a 162 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 163 %2 = load <4 x i32>, <4 x i32>* %b 164 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 165 %3 = sub <4 x i32> %1, %2 166 ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 167 store <4 x i32> %3, <4 x i32>* %c 168 ; CHECK-DAG: st.w [[R3]], 0($4) 169 170 ret void 171 ; CHECK: .size sub_v4i32 172} 173 174define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 175 ; CHECK: sub_v2i64: 176 177 %1 = load <2 x i64>, <2 x i64>* %a 178 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 179 %2 = load <2 x i64>, <2 x i64>* %b 180 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 181 %3 = sub <2 x i64> %1, %2 182 ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 183 store <2 x i64> %3, <2 x i64>* %c 184 ; CHECK-DAG: st.d [[R3]], 0($4) 185 186 ret void 187 ; CHECK: .size sub_v2i64 188} 189 190define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 191 ; CHECK: sub_v16i8_i: 192 193 %1 = load <16 x i8>, <16 x i8>* %a 194 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 195 %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, 196 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 197 ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1 198 store <16 x i8> %2, <16 x i8>* %c 199 ; CHECK-DAG: st.b [[R3]], 0($4) 200 201 ret void 202 ; CHECK: .size sub_v16i8_i 203} 204 205define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 206 ; CHECK: sub_v8i16_i: 207 208 %1 = load <8 x i16>, <8 x i16>* %a 209 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 210 %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, 211 i16 1, i16 1, i16 1, i16 1> 212 ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1 213 store <8 x i16> %2, <8 x i16>* %c 214 ; CHECK-DAG: st.h [[R3]], 0($4) 215 216 ret void 217 ; CHECK: .size sub_v8i16_i 218} 219 220define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 221 ; CHECK: sub_v4i32_i: 222 223 %1 = load <4 x i32>, <4 x i32>* %a 224 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 225 %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 226 ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1 227 store <4 x i32> %2, <4 x i32>* %c 228 ; CHECK-DAG: st.w [[R3]], 0($4) 229 230 ret void 231 ; CHECK: .size sub_v4i32_i 232} 233 234define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 235 ; CHECK: sub_v2i64_i: 236 237 %1 = load <2 x i64>, <2 x i64>* %a 238 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 239 %2 = sub <2 x i64> %1, <i64 1, i64 1> 240 ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1 241 store <2 x i64> %2, <2 x i64>* %c 242 ; CHECK-DAG: st.d [[R3]], 0($4) 243 244 ret void 245 ; CHECK: .size sub_v2i64_i 246} 247 248define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 249 ; CHECK: mul_v16i8: 250 251 %1 = load <16 x i8>, <16 x i8>* %a 252 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 253 %2 = load <16 x i8>, <16 x i8>* %b 254 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 255 %3 = mul <16 x i8> %1, %2 256 ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 257 store <16 x i8> %3, <16 x i8>* %c 258 ; CHECK-DAG: st.b [[R3]], 0($4) 259 260 ret void 261 ; CHECK: .size mul_v16i8 262} 263 264define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 265 ; CHECK: mul_v8i16: 266 267 %1 = load <8 x i16>, <8 x i16>* %a 268 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 269 %2 = load <8 x i16>, <8 x i16>* %b 270 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 271 %3 = mul <8 x i16> %1, %2 272 ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 273 store <8 x i16> %3, <8 x i16>* %c 274 ; CHECK-DAG: st.h [[R3]], 0($4) 275 276 ret void 277 ; CHECK: .size mul_v8i16 278} 279 280define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 281 ; CHECK: mul_v4i32: 282 283 %1 = load <4 x i32>, <4 x i32>* %a 284 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 285 %2 = load <4 x i32>, <4 x i32>* %b 286 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 287 %3 = mul <4 x i32> %1, %2 288 ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 289 store <4 x i32> %3, <4 x i32>* %c 290 ; CHECK-DAG: st.w [[R3]], 0($4) 291 292 ret void 293 ; CHECK: .size mul_v4i32 294} 295 296define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 297 ; CHECK: mul_v2i64: 298 299 %1 = load <2 x i64>, <2 x i64>* %a 300 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 301 %2 = load <2 x i64>, <2 x i64>* %b 302 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 303 %3 = mul <2 x i64> %1, %2 304 ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 305 store <2 x i64> %3, <2 x i64>* %c 306 ; CHECK-DAG: st.d [[R3]], 0($4) 307 308 ret void 309 ; CHECK: .size mul_v2i64 310} 311 312define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, 313 <16 x i8>* %c) nounwind { 314 ; CHECK: maddv_v16i8: 315 316 %1 = load <16 x i8>, <16 x i8>* %a 317 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 318 %2 = load <16 x i8>, <16 x i8>* %b 319 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 320 %3 = load <16 x i8>, <16 x i8>* %c 321 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) 322 %4 = mul <16 x i8> %2, %3 323 %5 = add <16 x i8> %4, %1 324 ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]] 325 store <16 x i8> %5, <16 x i8>* %d 326 ; CHECK-DAG: st.b [[R1]], 0($4) 327 328 ret void 329 ; CHECK: .size maddv_v16i8 330} 331 332define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, 333 <8 x i16>* %c) nounwind { 334 ; CHECK: maddv_v8i16: 335 336 %1 = load <8 x i16>, <8 x i16>* %a 337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 338 %2 = load <8 x i16>, <8 x i16>* %b 339 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 340 %3 = load <8 x i16>, <8 x i16>* %c 341 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7) 342 %4 = mul <8 x i16> %2, %3 343 %5 = add <8 x i16> %4, %1 344 ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]] 345 store <8 x i16> %5, <8 x i16>* %d 346 ; CHECK-DAG: st.h [[R1]], 0($4) 347 348 ret void 349 ; CHECK: .size maddv_v8i16 350} 351 352define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, 353 <4 x i32>* %c) nounwind { 354 ; CHECK: maddv_v4i32: 355 356 %1 = load <4 x i32>, <4 x i32>* %a 357 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 358 %2 = load <4 x i32>, <4 x i32>* %b 359 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 360 %3 = load <4 x i32>, <4 x i32>* %c 361 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 362 %4 = mul <4 x i32> %2, %3 363 %5 = add <4 x i32> %4, %1 364 ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]] 365 store <4 x i32> %5, <4 x i32>* %d 366 ; CHECK-DAG: st.w [[R1]], 0($4) 367 368 ret void 369 ; CHECK: .size maddv_v4i32 370} 371 372define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, 373 <2 x i64>* %c) nounwind { 374 ; CHECK: maddv_v2i64: 375 376 %1 = load <2 x i64>, <2 x i64>* %a 377 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 378 %2 = load <2 x i64>, <2 x i64>* %b 379 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 380 %3 = load <2 x i64>, <2 x i64>* %c 381 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 382 %4 = mul <2 x i64> %2, %3 383 %5 = add <2 x i64> %4, %1 384 ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]] 385 store <2 x i64> %5, <2 x i64>* %d 386 ; CHECK-DAG: st.d [[R1]], 0($4) 387 388 ret void 389 ; CHECK: .size maddv_v2i64 390} 391 392define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, 393 <16 x i8>* %c) nounwind { 394 ; CHECK: msubv_v16i8: 395 396 %1 = load <16 x i8>, <16 x i8>* %a 397 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 398 %2 = load <16 x i8>, <16 x i8>* %b 399 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 400 %3 = load <16 x i8>, <16 x i8>* %c 401 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) 402 %4 = mul <16 x i8> %2, %3 403 %5 = sub <16 x i8> %1, %4 404 ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]] 405 store <16 x i8> %5, <16 x i8>* %d 406 ; CHECK-DAG: st.b [[R1]], 0($4) 407 408 ret void 409 ; CHECK: .size msubv_v16i8 410} 411 412define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, 413 <8 x i16>* %c) nounwind { 414 ; CHECK: msubv_v8i16: 415 416 %1 = load <8 x i16>, <8 x i16>* %a 417 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 418 %2 = load <8 x i16>, <8 x i16>* %b 419 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 420 %3 = load <8 x i16>, <8 x i16>* %c 421 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7) 422 %4 = mul <8 x i16> %2, %3 423 %5 = sub <8 x i16> %1, %4 424 ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]] 425 store <8 x i16> %5, <8 x i16>* %d 426 ; CHECK-DAG: st.h [[R1]], 0($4) 427 428 ret void 429 ; CHECK: .size msubv_v8i16 430} 431 432define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, 433 <4 x i32>* %c) nounwind { 434 ; CHECK: msubv_v4i32: 435 436 %1 = load <4 x i32>, <4 x i32>* %a 437 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 438 %2 = load <4 x i32>, <4 x i32>* %b 439 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 440 %3 = load <4 x i32>, <4 x i32>* %c 441 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 442 %4 = mul <4 x i32> %2, %3 443 %5 = sub <4 x i32> %1, %4 444 ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]] 445 store <4 x i32> %5, <4 x i32>* %d 446 ; CHECK-DAG: st.w [[R1]], 0($4) 447 448 ret void 449 ; CHECK: .size msubv_v4i32 450} 451 452define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, 453 <2 x i64>* %c) nounwind { 454 ; CHECK: msubv_v2i64: 455 456 %1 = load <2 x i64>, <2 x i64>* %a 457 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 458 %2 = load <2 x i64>, <2 x i64>* %b 459 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 460 %3 = load <2 x i64>, <2 x i64>* %c 461 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 462 %4 = mul <2 x i64> %2, %3 463 %5 = sub <2 x i64> %1, %4 464 ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]] 465 store <2 x i64> %5, <2 x i64>* %d 466 ; CHECK-DAG: st.d [[R1]], 0($4) 467 468 ret void 469 ; CHECK: .size msubv_v2i64 470} 471 472define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 473 ; CHECK: div_s_v16i8: 474 475 %1 = load <16 x i8>, <16 x i8>* %a 476 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 477 %2 = load <16 x i8>, <16 x i8>* %b 478 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 479 %3 = sdiv <16 x i8> %1, %2 480 ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 481 store <16 x i8> %3, <16 x i8>* %c 482 ; CHECK-DAG: st.b [[R3]], 0($4) 483 484 ret void 485 ; CHECK: .size div_s_v16i8 486} 487 488define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 489 ; CHECK: div_s_v8i16: 490 491 %1 = load <8 x i16>, <8 x i16>* %a 492 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 493 %2 = load <8 x i16>, <8 x i16>* %b 494 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 495 %3 = sdiv <8 x i16> %1, %2 496 ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 497 store <8 x i16> %3, <8 x i16>* %c 498 ; CHECK-DAG: st.h [[R3]], 0($4) 499 500 ret void 501 ; CHECK: .size div_s_v8i16 502} 503 504define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 505 ; CHECK: div_s_v4i32: 506 507 %1 = load <4 x i32>, <4 x i32>* %a 508 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 509 %2 = load <4 x i32>, <4 x i32>* %b 510 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 511 %3 = sdiv <4 x i32> %1, %2 512 ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 513 store <4 x i32> %3, <4 x i32>* %c 514 ; CHECK-DAG: st.w [[R3]], 0($4) 515 516 ret void 517 ; CHECK: .size div_s_v4i32 518} 519 520define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 521 ; CHECK: div_s_v2i64: 522 523 %1 = load <2 x i64>, <2 x i64>* %a 524 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 525 %2 = load <2 x i64>, <2 x i64>* %b 526 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 527 %3 = sdiv <2 x i64> %1, %2 528 ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 529 store <2 x i64> %3, <2 x i64>* %c 530 ; CHECK-DAG: st.d [[R3]], 0($4) 531 532 ret void 533 ; CHECK: .size div_s_v2i64 534} 535 536define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 537 ; CHECK: div_u_v16i8: 538 539 %1 = load <16 x i8>, <16 x i8>* %a 540 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 541 %2 = load <16 x i8>, <16 x i8>* %b 542 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 543 %3 = udiv <16 x i8> %1, %2 544 ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 545 store <16 x i8> %3, <16 x i8>* %c 546 ; CHECK-DAG: st.b [[R3]], 0($4) 547 548 ret void 549 ; CHECK: .size div_u_v16i8 550} 551 552define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 553 ; CHECK: div_u_v8i16: 554 555 %1 = load <8 x i16>, <8 x i16>* %a 556 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 557 %2 = load <8 x i16>, <8 x i16>* %b 558 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 559 %3 = udiv <8 x i16> %1, %2 560 ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 561 store <8 x i16> %3, <8 x i16>* %c 562 ; CHECK-DAG: st.h [[R3]], 0($4) 563 564 ret void 565 ; CHECK: .size div_u_v8i16 566} 567 568define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 569 ; CHECK: div_u_v4i32: 570 571 %1 = load <4 x i32>, <4 x i32>* %a 572 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 573 %2 = load <4 x i32>, <4 x i32>* %b 574 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 575 %3 = udiv <4 x i32> %1, %2 576 ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 577 store <4 x i32> %3, <4 x i32>* %c 578 ; CHECK-DAG: st.w [[R3]], 0($4) 579 580 ret void 581 ; CHECK: .size div_u_v4i32 582} 583 584define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 585 ; CHECK: div_u_v2i64: 586 587 %1 = load <2 x i64>, <2 x i64>* %a 588 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 589 %2 = load <2 x i64>, <2 x i64>* %b 590 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 591 %3 = udiv <2 x i64> %1, %2 592 ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 593 store <2 x i64> %3, <2 x i64>* %c 594 ; CHECK-DAG: st.d [[R3]], 0($4) 595 596 ret void 597 ; CHECK: .size div_u_v2i64 598} 599 600define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 601 ; CHECK: mod_s_v16i8: 602 603 %1 = load <16 x i8>, <16 x i8>* %a 604 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 605 %2 = load <16 x i8>, <16 x i8>* %b 606 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 607 %3 = srem <16 x i8> %1, %2 608 ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 609 store <16 x i8> %3, <16 x i8>* %c 610 ; CHECK-DAG: st.b [[R3]], 0($4) 611 612 ret void 613 ; CHECK: .size mod_s_v16i8 614} 615 616define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 617 ; CHECK: mod_s_v8i16: 618 619 %1 = load <8 x i16>, <8 x i16>* %a 620 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 621 %2 = load <8 x i16>, <8 x i16>* %b 622 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 623 %3 = srem <8 x i16> %1, %2 624 ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 625 store <8 x i16> %3, <8 x i16>* %c 626 ; CHECK-DAG: st.h [[R3]], 0($4) 627 628 ret void 629 ; CHECK: .size mod_s_v8i16 630} 631 632define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 633 ; CHECK: mod_s_v4i32: 634 635 %1 = load <4 x i32>, <4 x i32>* %a 636 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 637 %2 = load <4 x i32>, <4 x i32>* %b 638 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 639 %3 = srem <4 x i32> %1, %2 640 ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 641 store <4 x i32> %3, <4 x i32>* %c 642 ; CHECK-DAG: st.w [[R3]], 0($4) 643 644 ret void 645 ; CHECK: .size mod_s_v4i32 646} 647 648define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 649 ; CHECK: mod_s_v2i64: 650 651 %1 = load <2 x i64>, <2 x i64>* %a 652 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 653 %2 = load <2 x i64>, <2 x i64>* %b 654 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 655 %3 = srem <2 x i64> %1, %2 656 ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 657 store <2 x i64> %3, <2 x i64>* %c 658 ; CHECK-DAG: st.d [[R3]], 0($4) 659 660 ret void 661 ; CHECK: .size mod_s_v2i64 662} 663 664define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 665 ; CHECK: mod_u_v16i8: 666 667 %1 = load <16 x i8>, <16 x i8>* %a 668 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 669 %2 = load <16 x i8>, <16 x i8>* %b 670 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 671 %3 = urem <16 x i8> %1, %2 672 ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 673 store <16 x i8> %3, <16 x i8>* %c 674 ; CHECK-DAG: st.b [[R3]], 0($4) 675 676 ret void 677 ; CHECK: .size mod_u_v16i8 678} 679 680define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 681 ; CHECK: mod_u_v8i16: 682 683 %1 = load <8 x i16>, <8 x i16>* %a 684 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 685 %2 = load <8 x i16>, <8 x i16>* %b 686 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 687 %3 = urem <8 x i16> %1, %2 688 ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 689 store <8 x i16> %3, <8 x i16>* %c 690 ; CHECK-DAG: st.h [[R3]], 0($4) 691 692 ret void 693 ; CHECK: .size mod_u_v8i16 694} 695 696define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 697 ; CHECK: mod_u_v4i32: 698 699 %1 = load <4 x i32>, <4 x i32>* %a 700 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 701 %2 = load <4 x i32>, <4 x i32>* %b 702 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 703 %3 = urem <4 x i32> %1, %2 704 ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 705 store <4 x i32> %3, <4 x i32>* %c 706 ; CHECK-DAG: st.w [[R3]], 0($4) 707 708 ret void 709 ; CHECK: .size mod_u_v4i32 710} 711 712define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 713 ; CHECK: mod_u_v2i64: 714 715 %1 = load <2 x i64>, <2 x i64>* %a 716 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 717 %2 = load <2 x i64>, <2 x i64>* %b 718 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 719 %3 = urem <2 x i64> %1, %2 720 ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 721 store <2 x i64> %3, <2 x i64>* %c 722 ; CHECK-DAG: st.d [[R3]], 0($4) 723 724 ret void 725 ; CHECK: .size mod_u_v2i64 726} 727