1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: and_v16i8: 6 7 %1 = load <16 x i8>, <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <16 x i8>, <16 x i8>* %b 10 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 11 %3 = and <16 x i8> %1, %2 12 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <16 x i8> %3, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size and_v16i8 18} 19 20define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 21 ; CHECK: and_v8i16: 22 23 %1 = load <8 x i16>, <8 x i16>* %a 24 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <8 x i16>, <8 x i16>* %b 26 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 27 %3 = and <8 x i16> %1, %2 28 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <8 x i16> %3, <8 x i16>* %c 30 ; CHECK-DAG: st.h [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size and_v8i16 34} 35 36define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 37 ; CHECK: and_v4i32: 38 39 %1 = load <4 x i32>, <4 x i32>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x i32>, <4 x i32>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = and <4 x i32> %1, %2 44 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x i32> %3, <4 x i32>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size and_v4i32 50} 51 52define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 53 ; CHECK: and_v2i64: 54 55 %1 = load <2 x i64>, <2 x i64>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x i64>, <2 x i64>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = and <2 x i64> %1, %2 60 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x i64> %3, <2 x i64>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size and_v2i64 66} 67 68define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 69 ; CHECK: and_v16i8_i: 70 71 %1 = load <16 x i8>, <16 x i8>* %a 72 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 73 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 74 ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1 75 store <16 x i8> %2, <16 x i8>* %c 76 ; CHECK-DAG: st.b [[R4]], 0($4) 77 78 ret void 79 ; CHECK: .size and_v16i8_i 80} 81 82define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 83 ; CHECK: and_v8i16_i: 84 85 %1 = load <8 x i16>, <8 x i16>* %a 86 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 87 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 88 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 89 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 90 store <8 x i16> %2, <8 x i16>* %c 91 ; CHECK-DAG: st.h [[R4]], 0($4) 92 93 ret void 94 ; CHECK: .size and_v8i16_i 95} 96 97define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 98 ; CHECK: and_v4i32_i: 99 100 %1 = load <4 x i32>, <4 x i32>* %a 101 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 102 %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 103 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 104 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 105 store <4 x i32> %2, <4 x i32>* %c 106 ; CHECK-DAG: st.w [[R4]], 0($4) 107 108 ret void 109 ; CHECK: .size and_v4i32_i 110} 111 112define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 113 ; CHECK: and_v2i64_i: 114 115 %1 = load <2 x i64>, <2 x i64>* %a 116 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 117 %2 = and <2 x i64> %1, <i64 1, i64 1> 118 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 119 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 120 store <2 x i64> %2, <2 x i64>* %c 121 ; CHECK-DAG: st.d [[R4]], 0($4) 122 123 ret void 124 ; CHECK: .size and_v2i64_i 125} 126 127define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 128 ; CHECK: or_v16i8: 129 130 %1 = load <16 x i8>, <16 x i8>* %a 131 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 132 %2 = load <16 x i8>, <16 x i8>* %b 133 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 134 %3 = or <16 x i8> %1, %2 135 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 136 store <16 x i8> %3, <16 x i8>* %c 137 ; CHECK-DAG: st.b [[R3]], 0($4) 138 139 ret void 140 ; CHECK: .size or_v16i8 141} 142 143define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 144 ; CHECK: or_v8i16: 145 146 %1 = load <8 x i16>, <8 x i16>* %a 147 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 148 %2 = load <8 x i16>, <8 x i16>* %b 149 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 150 %3 = or <8 x i16> %1, %2 151 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 152 store <8 x i16> %3, <8 x i16>* %c 153 ; CHECK-DAG: st.h [[R3]], 0($4) 154 155 ret void 156 ; CHECK: .size or_v8i16 157} 158 159define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 160 ; CHECK: or_v4i32: 161 162 %1 = load <4 x i32>, <4 x i32>* %a 163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 164 %2 = load <4 x i32>, <4 x i32>* %b 165 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 166 %3 = or <4 x i32> %1, %2 167 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 168 store <4 x i32> %3, <4 x i32>* %c 169 ; CHECK-DAG: st.w [[R3]], 0($4) 170 171 ret void 172 ; CHECK: .size or_v4i32 173} 174 175define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 176 ; CHECK: or_v2i64: 177 178 %1 = load <2 x i64>, <2 x i64>* %a 179 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 180 %2 = load <2 x i64>, <2 x i64>* %b 181 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 182 %3 = or <2 x i64> %1, %2 183 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 184 store <2 x i64> %3, <2 x i64>* %c 185 ; CHECK-DAG: st.d [[R3]], 0($4) 186 187 ret void 188 ; CHECK: .size or_v2i64 189} 190 191define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 192 ; CHECK: or_v16i8_i: 193 194 %1 = load <16 x i8>, <16 x i8>* %a 195 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 196 %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 197 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3 198 store <16 x i8> %2, <16 x i8>* %c 199 ; CHECK-DAG: st.b [[R4]], 0($4) 200 201 ret void 202 ; CHECK: .size or_v16i8_i 203} 204 205define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 206 ; CHECK: or_v8i16_i: 207 208 %1 = load <8 x i16>, <8 x i16>* %a 209 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 210 %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 211 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 212 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 213 store <8 x i16> %2, <8 x i16>* %c 214 ; CHECK-DAG: st.h [[R4]], 0($4) 215 216 ret void 217 ; CHECK: .size or_v8i16_i 218} 219 220define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 221 ; CHECK: or_v4i32_i: 222 223 %1 = load <4 x i32>, <4 x i32>* %a 224 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 225 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 226 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 227 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 228 store <4 x i32> %2, <4 x i32>* %c 229 ; CHECK-DAG: st.w [[R4]], 0($4) 230 231 ret void 232 ; CHECK: .size or_v4i32_i 233} 234 235define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 236 ; CHECK: or_v2i64_i: 237 238 %1 = load <2 x i64>, <2 x i64>* %a 239 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 240 %2 = or <2 x i64> %1, <i64 3, i64 3> 241 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 242 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 243 store <2 x i64> %2, <2 x i64>* %c 244 ; CHECK-DAG: st.d [[R4]], 0($4) 245 246 ret void 247 ; CHECK: .size or_v2i64_i 248} 249 250define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 251 ; CHECK: nor_v16i8: 252 253 %1 = load <16 x i8>, <16 x i8>* %a 254 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 255 %2 = load <16 x i8>, <16 x i8>* %b 256 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 257 %3 = or <16 x i8> %1, %2 258 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 259 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 260 store <16 x i8> %4, <16 x i8>* %c 261 ; CHECK-DAG: st.b [[R3]], 0($4) 262 263 ret void 264 ; CHECK: .size nor_v16i8 265} 266 267define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 268 ; CHECK: nor_v8i16: 269 270 %1 = load <8 x i16>, <8 x i16>* %a 271 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 272 %2 = load <8 x i16>, <8 x i16>* %b 273 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 274 %3 = or <8 x i16> %1, %2 275 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 276 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 277 store <8 x i16> %4, <8 x i16>* %c 278 ; CHECK-DAG: st.h [[R3]], 0($4) 279 280 ret void 281 ; CHECK: .size nor_v8i16 282} 283 284define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 285 ; CHECK: nor_v4i32: 286 287 %1 = load <4 x i32>, <4 x i32>* %a 288 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 289 %2 = load <4 x i32>, <4 x i32>* %b 290 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 291 %3 = or <4 x i32> %1, %2 292 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1> 293 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 294 store <4 x i32> %4, <4 x i32>* %c 295 ; CHECK-DAG: st.w [[R3]], 0($4) 296 297 ret void 298 ; CHECK: .size nor_v4i32 299} 300 301define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 302 ; CHECK: nor_v2i64: 303 304 %1 = load <2 x i64>, <2 x i64>* %a 305 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 306 %2 = load <2 x i64>, <2 x i64>* %b 307 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 308 %3 = or <2 x i64> %1, %2 309 %4 = xor <2 x i64> %3, <i64 -1, i64 -1> 310 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 311 store <2 x i64> %4, <2 x i64>* %c 312 ; CHECK-DAG: st.d [[R3]], 0($4) 313 314 ret void 315 ; CHECK: .size nor_v2i64 316} 317 318define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 319 ; CHECK: nor_v16i8_i: 320 321 %1 = load <16 x i8>, <16 x i8>* %a 322 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 323 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 324 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 325 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1 326 store <16 x i8> %3, <16 x i8>* %c 327 ; CHECK-DAG: st.b [[R4]], 0($4) 328 329 ret void 330 ; CHECK: .size nor_v16i8_i 331} 332 333define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 334 ; CHECK: nor_v8i16_i: 335 336 %1 = load <8 x i16>, <8 x i16>* %a 337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 338 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 339 %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 340 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 341 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 342 store <8 x i16> %3, <8 x i16>* %c 343 ; CHECK-DAG: st.h [[R4]], 0($4) 344 345 ret void 346 ; CHECK: .size nor_v8i16_i 347} 348 349define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 350 ; CHECK: nor_v4i32_i: 351 352 %1 = load <4 x i32>, <4 x i32>* %a 353 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 354 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 355 %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1> 356 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 357 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 358 store <4 x i32> %3, <4 x i32>* %c 359 ; CHECK-DAG: st.w [[R4]], 0($4) 360 361 ret void 362 ; CHECK: .size nor_v4i32_i 363} 364 365define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 366 ; CHECK: nor_v2i64_i: 367 368 %1 = load <2 x i64>, <2 x i64>* %a 369 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 370 %2 = or <2 x i64> %1, <i64 1, i64 1> 371 %3 = xor <2 x i64> %2, <i64 -1, i64 -1> 372 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 373 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 374 store <2 x i64> %3, <2 x i64>* %c 375 ; CHECK-DAG: st.d [[R4]], 0($4) 376 377 ret void 378 ; CHECK: .size nor_v2i64_i 379} 380 381define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 382 ; CHECK: xor_v16i8: 383 384 %1 = load <16 x i8>, <16 x i8>* %a 385 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 386 %2 = load <16 x i8>, <16 x i8>* %b 387 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 388 %3 = xor <16 x i8> %1, %2 389 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 390 store <16 x i8> %3, <16 x i8>* %c 391 ; CHECK-DAG: st.b [[R3]], 0($4) 392 393 ret void 394 ; CHECK: .size xor_v16i8 395} 396 397define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 398 ; CHECK: xor_v8i16: 399 400 %1 = load <8 x i16>, <8 x i16>* %a 401 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 402 %2 = load <8 x i16>, <8 x i16>* %b 403 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 404 %3 = xor <8 x i16> %1, %2 405 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 406 store <8 x i16> %3, <8 x i16>* %c 407 ; CHECK-DAG: st.h [[R3]], 0($4) 408 409 ret void 410 ; CHECK: .size xor_v8i16 411} 412 413define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 414 ; CHECK: xor_v4i32: 415 416 %1 = load <4 x i32>, <4 x i32>* %a 417 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 418 %2 = load <4 x i32>, <4 x i32>* %b 419 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 420 %3 = xor <4 x i32> %1, %2 421 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 422 store <4 x i32> %3, <4 x i32>* %c 423 ; CHECK-DAG: st.w [[R3]], 0($4) 424 425 ret void 426 ; CHECK: .size xor_v4i32 427} 428 429define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 430 ; CHECK: xor_v2i64: 431 432 %1 = load <2 x i64>, <2 x i64>* %a 433 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 434 %2 = load <2 x i64>, <2 x i64>* %b 435 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 436 %3 = xor <2 x i64> %1, %2 437 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 438 store <2 x i64> %3, <2 x i64>* %c 439 ; CHECK-DAG: st.d [[R3]], 0($4) 440 441 ret void 442 ; CHECK: .size xor_v2i64 443} 444 445define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 446 ; CHECK: xor_v16i8_i: 447 448 %1 = load <16 x i8>, <16 x i8>* %a 449 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 450 %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 451 ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3 452 store <16 x i8> %2, <16 x i8>* %c 453 ; CHECK-DAG: st.b [[R4]], 0($4) 454 455 ret void 456 ; CHECK: .size xor_v16i8_i 457} 458 459define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 460 ; CHECK: xor_v8i16_i: 461 462 %1 = load <8 x i16>, <8 x i16>* %a 463 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 464 %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 465 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 466 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 467 store <8 x i16> %2, <8 x i16>* %c 468 ; CHECK-DAG: st.h [[R4]], 0($4) 469 470 ret void 471 ; CHECK: .size xor_v8i16_i 472} 473 474define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 475 ; CHECK: xor_v4i32_i: 476 477 %1 = load <4 x i32>, <4 x i32>* %a 478 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 479 %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 480 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 481 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 482 store <4 x i32> %2, <4 x i32>* %c 483 ; CHECK-DAG: st.w [[R4]], 0($4) 484 485 ret void 486 ; CHECK: .size xor_v4i32_i 487} 488 489define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 490 ; CHECK: xor_v2i64_i: 491 492 %1 = load <2 x i64>, <2 x i64>* %a 493 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 494 %2 = xor <2 x i64> %1, <i64 3, i64 3> 495 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 496 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 497 store <2 x i64> %2, <2 x i64>* %c 498 ; CHECK-DAG: st.d [[R4]], 0($4) 499 500 ret void 501 ; CHECK: .size xor_v2i64_i 502} 503 504define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 505 ; CHECK: sll_v16i8: 506 507 %1 = load <16 x i8>, <16 x i8>* %a 508 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 509 %2 = load <16 x i8>, <16 x i8>* %b 510 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 511 %3 = shl <16 x i8> %1, %2 512 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 513 store <16 x i8> %3, <16 x i8>* %c 514 ; CHECK-DAG: st.b [[R3]], 0($4) 515 516 ret void 517 ; CHECK: .size sll_v16i8 518} 519 520define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 521 ; CHECK: sll_v8i16: 522 523 %1 = load <8 x i16>, <8 x i16>* %a 524 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 525 %2 = load <8 x i16>, <8 x i16>* %b 526 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 527 %3 = shl <8 x i16> %1, %2 528 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 529 store <8 x i16> %3, <8 x i16>* %c 530 ; CHECK-DAG: st.h [[R3]], 0($4) 531 532 ret void 533 ; CHECK: .size sll_v8i16 534} 535 536define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 537 ; CHECK: sll_v4i32: 538 539 %1 = load <4 x i32>, <4 x i32>* %a 540 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 541 %2 = load <4 x i32>, <4 x i32>* %b 542 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 543 %3 = shl <4 x i32> %1, %2 544 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 545 store <4 x i32> %3, <4 x i32>* %c 546 ; CHECK-DAG: st.w [[R3]], 0($4) 547 548 ret void 549 ; CHECK: .size sll_v4i32 550} 551 552define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 553 ; CHECK: sll_v2i64: 554 555 %1 = load <2 x i64>, <2 x i64>* %a 556 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 557 %2 = load <2 x i64>, <2 x i64>* %b 558 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 559 %3 = shl <2 x i64> %1, %2 560 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 561 store <2 x i64> %3, <2 x i64>* %c 562 ; CHECK-DAG: st.d [[R3]], 0($4) 563 564 ret void 565 ; CHECK: .size sll_v2i64 566} 567 568define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 569 ; CHECK: sll_v16i8_i: 570 571 %1 = load <16 x i8>, <16 x i8>* %a 572 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 573 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 574 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1 575 store <16 x i8> %2, <16 x i8>* %c 576 ; CHECK-DAG: st.b [[R4]], 0($4) 577 578 ret void 579 ; CHECK: .size sll_v16i8_i 580} 581 582define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 583 ; CHECK: sll_v8i16_i: 584 585 %1 = load <8 x i16>, <8 x i16>* %a 586 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 587 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 588 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1 589 store <8 x i16> %2, <8 x i16>* %c 590 ; CHECK-DAG: st.h [[R4]], 0($4) 591 592 ret void 593 ; CHECK: .size sll_v8i16_i 594} 595 596define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 597 ; CHECK: sll_v4i32_i: 598 599 %1 = load <4 x i32>, <4 x i32>* %a 600 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 601 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 602 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1 603 store <4 x i32> %2, <4 x i32>* %c 604 ; CHECK-DAG: st.w [[R4]], 0($4) 605 606 ret void 607 ; CHECK: .size sll_v4i32_i 608} 609 610define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 611 ; CHECK: sll_v2i64_i: 612 613 %1 = load <2 x i64>, <2 x i64>* %a 614 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 615 %2 = shl <2 x i64> %1, <i64 1, i64 1> 616 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1 617 store <2 x i64> %2, <2 x i64>* %c 618 ; CHECK-DAG: st.d [[R4]], 0($4) 619 620 ret void 621 ; CHECK: .size sll_v2i64_i 622} 623 624define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 625 ; CHECK: sra_v16i8: 626 627 %1 = load <16 x i8>, <16 x i8>* %a 628 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 629 %2 = load <16 x i8>, <16 x i8>* %b 630 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 631 %3 = ashr <16 x i8> %1, %2 632 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 633 store <16 x i8> %3, <16 x i8>* %c 634 ; CHECK-DAG: st.b [[R3]], 0($4) 635 636 ret void 637 ; CHECK: .size sra_v16i8 638} 639 640define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 641 ; CHECK: sra_v8i16: 642 643 %1 = load <8 x i16>, <8 x i16>* %a 644 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 645 %2 = load <8 x i16>, <8 x i16>* %b 646 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 647 %3 = ashr <8 x i16> %1, %2 648 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 649 store <8 x i16> %3, <8 x i16>* %c 650 ; CHECK-DAG: st.h [[R3]], 0($4) 651 652 ret void 653 ; CHECK: .size sra_v8i16 654} 655 656define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 657 ; CHECK: sra_v4i32: 658 659 %1 = load <4 x i32>, <4 x i32>* %a 660 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 661 %2 = load <4 x i32>, <4 x i32>* %b 662 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 663 %3 = ashr <4 x i32> %1, %2 664 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 665 store <4 x i32> %3, <4 x i32>* %c 666 ; CHECK-DAG: st.w [[R3]], 0($4) 667 668 ret void 669 ; CHECK: .size sra_v4i32 670} 671 672define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 673 ; CHECK: sra_v2i64: 674 675 %1 = load <2 x i64>, <2 x i64>* %a 676 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 677 %2 = load <2 x i64>, <2 x i64>* %b 678 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 679 %3 = ashr <2 x i64> %1, %2 680 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 681 store <2 x i64> %3, <2 x i64>* %c 682 ; CHECK-DAG: st.d [[R3]], 0($4) 683 684 ret void 685 ; CHECK: .size sra_v2i64 686} 687 688define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 689 ; CHECK: sra_v16i8_i: 690 691 %1 = load <16 x i8>, <16 x i8>* %a 692 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 693 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 694 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1 695 store <16 x i8> %2, <16 x i8>* %c 696 ; CHECK-DAG: st.b [[R4]], 0($4) 697 698 ret void 699 ; CHECK: .size sra_v16i8_i 700} 701 702define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 703 ; CHECK: sra_v8i16_i: 704 705 %1 = load <8 x i16>, <8 x i16>* %a 706 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 707 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 708 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1 709 store <8 x i16> %2, <8 x i16>* %c 710 ; CHECK-DAG: st.h [[R4]], 0($4) 711 712 ret void 713 ; CHECK: .size sra_v8i16_i 714} 715 716define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 717 ; CHECK: sra_v4i32_i: 718 719 %1 = load <4 x i32>, <4 x i32>* %a 720 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 721 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 722 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1 723 store <4 x i32> %2, <4 x i32>* %c 724 ; CHECK-DAG: st.w [[R4]], 0($4) 725 726 ret void 727 ; CHECK: .size sra_v4i32_i 728} 729 730define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 731 ; CHECK: sra_v2i64_i: 732 733 %1 = load <2 x i64>, <2 x i64>* %a 734 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 735 %2 = ashr <2 x i64> %1, <i64 1, i64 1> 736 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1 737 store <2 x i64> %2, <2 x i64>* %c 738 ; CHECK-DAG: st.d [[R4]], 0($4) 739 740 ret void 741 ; CHECK: .size sra_v2i64_i 742} 743 744define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 745 ; CHECK: srl_v16i8: 746 747 %1 = load <16 x i8>, <16 x i8>* %a 748 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 749 %2 = load <16 x i8>, <16 x i8>* %b 750 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 751 %3 = lshr <16 x i8> %1, %2 752 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 753 store <16 x i8> %3, <16 x i8>* %c 754 ; CHECK-DAG: st.b [[R3]], 0($4) 755 756 ret void 757 ; CHECK: .size srl_v16i8 758} 759 760define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 761 ; CHECK: srl_v8i16: 762 763 %1 = load <8 x i16>, <8 x i16>* %a 764 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 765 %2 = load <8 x i16>, <8 x i16>* %b 766 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 767 %3 = lshr <8 x i16> %1, %2 768 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 769 store <8 x i16> %3, <8 x i16>* %c 770 ; CHECK-DAG: st.h [[R3]], 0($4) 771 772 ret void 773 ; CHECK: .size srl_v8i16 774} 775 776define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 777 ; CHECK: srl_v4i32: 778 779 %1 = load <4 x i32>, <4 x i32>* %a 780 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 781 %2 = load <4 x i32>, <4 x i32>* %b 782 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 783 %3 = lshr <4 x i32> %1, %2 784 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 785 store <4 x i32> %3, <4 x i32>* %c 786 ; CHECK-DAG: st.w [[R3]], 0($4) 787 788 ret void 789 ; CHECK: .size srl_v4i32 790} 791 792define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 793 ; CHECK: srl_v2i64: 794 795 %1 = load <2 x i64>, <2 x i64>* %a 796 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 797 %2 = load <2 x i64>, <2 x i64>* %b 798 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 799 %3 = lshr <2 x i64> %1, %2 800 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 801 store <2 x i64> %3, <2 x i64>* %c 802 ; CHECK-DAG: st.d [[R3]], 0($4) 803 804 ret void 805 ; CHECK: .size srl_v2i64 806} 807 808define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 809 ; CHECK: srl_v16i8_i: 810 811 %1 = load <16 x i8>, <16 x i8>* %a 812 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 813 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 814 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1 815 store <16 x i8> %2, <16 x i8>* %c 816 ; CHECK-DAG: st.b [[R4]], 0($4) 817 818 ret void 819 ; CHECK: .size srl_v16i8_i 820} 821 822define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 823 ; CHECK: srl_v8i16_i: 824 825 %1 = load <8 x i16>, <8 x i16>* %a 826 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 827 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 828 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1 829 store <8 x i16> %2, <8 x i16>* %c 830 ; CHECK-DAG: st.h [[R4]], 0($4) 831 832 ret void 833 ; CHECK: .size srl_v8i16_i 834} 835 836define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 837 ; CHECK: srl_v4i32_i: 838 839 %1 = load <4 x i32>, <4 x i32>* %a 840 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 841 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 842 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1 843 store <4 x i32> %2, <4 x i32>* %c 844 ; CHECK-DAG: st.w [[R4]], 0($4) 845 846 ret void 847 ; CHECK: .size srl_v4i32_i 848} 849 850define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 851 ; CHECK: srl_v2i64_i: 852 853 %1 = load <2 x i64>, <2 x i64>* %a 854 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 855 %2 = lshr <2 x i64> %1, <i64 1, i64 1> 856 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1 857 store <2 x i64> %2, <2 x i64>* %c 858 ; CHECK-DAG: st.d [[R4]], 0($4) 859 860 ret void 861 ; CHECK: .size srl_v2i64_i 862} 863 864define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 865 ; CHECK: ctpop_v16i8: 866 867 %1 = load <16 x i8>, <16 x i8>* %a 868 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 869 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1) 870 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]] 871 store <16 x i8> %2, <16 x i8>* %c 872 ; CHECK-DAG: st.b [[R3]], 0($4) 873 874 ret void 875 ; CHECK: .size ctpop_v16i8 876} 877 878define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 879 ; CHECK: ctpop_v8i16: 880 881 %1 = load <8 x i16>, <8 x i16>* %a 882 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 883 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1) 884 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]] 885 store <8 x i16> %2, <8 x i16>* %c 886 ; CHECK-DAG: st.h [[R3]], 0($4) 887 888 ret void 889 ; CHECK: .size ctpop_v8i16 890} 891 892define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 893 ; CHECK: ctpop_v4i32: 894 895 %1 = load <4 x i32>, <4 x i32>* %a 896 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 897 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1) 898 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]] 899 store <4 x i32> %2, <4 x i32>* %c 900 ; CHECK-DAG: st.w [[R3]], 0($4) 901 902 ret void 903 ; CHECK: .size ctpop_v4i32 904} 905 906define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 907 ; CHECK: ctpop_v2i64: 908 909 %1 = load <2 x i64>, <2 x i64>* %a 910 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 911 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1) 912 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]] 913 store <2 x i64> %2, <2 x i64>* %c 914 ; CHECK-DAG: st.d [[R3]], 0($4) 915 916 ret void 917 ; CHECK: .size ctpop_v2i64 918} 919 920define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 921 ; CHECK: ctlz_v16i8: 922 923 %1 = load <16 x i8>, <16 x i8>* %a 924 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 925 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1) 926 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]] 927 store <16 x i8> %2, <16 x i8>* %c 928 ; CHECK-DAG: st.b [[R3]], 0($4) 929 930 ret void 931 ; CHECK: .size ctlz_v16i8 932} 933 934define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 935 ; CHECK: ctlz_v8i16: 936 937 %1 = load <8 x i16>, <8 x i16>* %a 938 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 939 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1) 940 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]] 941 store <8 x i16> %2, <8 x i16>* %c 942 ; CHECK-DAG: st.h [[R3]], 0($4) 943 944 ret void 945 ; CHECK: .size ctlz_v8i16 946} 947 948define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 949 ; CHECK: ctlz_v4i32: 950 951 %1 = load <4 x i32>, <4 x i32>* %a 952 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 953 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1) 954 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]] 955 store <4 x i32> %2, <4 x i32>* %c 956 ; CHECK-DAG: st.w [[R3]], 0($4) 957 958 ret void 959 ; CHECK: .size ctlz_v4i32 960} 961 962define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 963 ; CHECK: ctlz_v2i64: 964 965 %1 = load <2 x i64>, <2 x i64>* %a 966 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 967 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1) 968 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]] 969 store <2 x i64> %2, <2 x i64>* %c 970 ; CHECK-DAG: st.d [[R3]], 0($4) 971 972 ret void 973 ; CHECK: .size ctlz_v2i64 974} 975 976define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind { 977 ; CHECK: bsel_v16i8: 978 979 %1 = load <16 x i8>, <16 x i8>* %a 980 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 981 %2 = load <16 x i8>, <16 x i8>* %b 982 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 983 %3 = load <16 x i8>, <16 x i8>* %m 984 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) 985 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, 986 i8 -1, i8 -1, i8 -1, i8 -1, 987 i8 -1, i8 -1, i8 -1, i8 -1, 988 i8 -1, i8 -1, i8 -1, i8 -1> 989 %5 = and <16 x i8> %1, %3 990 %6 = and <16 x i8> %2, %4 991 %7 = or <16 x i8> %5, %6 992 ; bmnz is the same operation 993 ; (vselect Mask, IfSet, IfClr) -> (BMNZ IfClr, IfSet, Mask) 994 ; CHECK-DAG: bmnz.v [[R2]], [[R1]], [[R3]] 995 store <16 x i8> %7, <16 x i8>* %c 996 ; CHECK-DAG: st.b [[R2]], 0($4) 997 998 ret void 999 ; CHECK: .size bsel_v16i8 1000} 1001 1002define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind { 1003 ; CHECK: bsel_v16i8_i: 1004 1005 %1 = load <16 x i8>, <16 x i8>* %a 1006 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1007 %2 = load <16 x i8>, <16 x i8>* %m 1008 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6) 1009 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, 1010 i8 -1, i8 -1, i8 -1, i8 -1, 1011 i8 -1, i8 -1, i8 -1, i8 -1, 1012 i8 -1, i8 -1, i8 -1, i8 -1> 1013 %4 = and <16 x i8> %1, %3 1014 %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6, 1015 i8 6, i8 6, i8 6, i8 6, 1016 i8 6, i8 6, i8 6, i8 6, 1017 i8 6, i8 6, i8 6, i8 6>, %2 1018 %6 = or <16 x i8> %4, %5 1019 ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6 1020 store <16 x i8> %6, <16 x i8>* %c 1021 ; CHECK-DAG: st.b [[R3]], 0($4) 1022 1023 ret void 1024 ; CHECK: .size bsel_v16i8_i 1025} 1026 1027define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1028 ; CHECK: bsel_v8i16: 1029 1030 %1 = load <8 x i16>, <8 x i16>* %a 1031 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1032 %2 = load <8 x i16>, <8 x i16>* %b 1033 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1034 %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6, 1035 i16 6, i16 6, i16 6, i16 6> 1036 %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529, 1037 i16 65529, i16 65529, i16 65529, i16 65529> 1038 %5 = or <8 x i16> %3, %4 1039 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6 1040 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1041 store <8 x i16> %5, <8 x i16>* %c 1042 ; CHECK-DAG: st.h [[R3]], 0($4) 1043 1044 ret void 1045 ; CHECK: .size bsel_v8i16 1046} 1047 1048define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1049 ; CHECK: bsel_v4i32: 1050 1051 %1 = load <4 x i32>, <4 x i32>* %a 1052 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1053 %2 = load <4 x i32>, <4 x i32>* %b 1054 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1055 %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6> 1056 %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289> 1057 %5 = or <4 x i32> %3, %4 1058 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6 1059 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1060 store <4 x i32> %5, <4 x i32>* %c 1061 ; CHECK-DAG: st.w [[R3]], 0($4) 1062 1063 ret void 1064 ; CHECK: .size bsel_v4i32 1065} 1066 1067define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1068 ; CHECK: bsel_v2i64: 1069 1070 %1 = load <2 x i64>, <2 x i64>* %a 1071 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1072 %2 = load <2 x i64>, <2 x i64>* %b 1073 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1074 %3 = and <2 x i64> %1, <i64 6, i64 6> 1075 %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609> 1076 %5 = or <2 x i64> %3, %4 1077 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6 1078 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1079 store <2 x i64> %5, <2 x i64>* %c 1080 ; CHECK-DAG: st.d [[R3]], 0($4) 1081 1082 ret void 1083 ; CHECK: .size bsel_v2i64 1084} 1085 1086define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1087 ; CHECK: binsl_v16i8_i: 1088 1089 %1 = load <16 x i8>, <16 x i8>* %a 1090 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1091 %2 = load <16 x i8>, <16 x i8>* %b 1092 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1093 %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192, 1094 i8 192, i8 192, i8 192, i8 192, 1095 i8 192, i8 192, i8 192, i8 192, 1096 i8 192, i8 192, i8 192, i8 192> 1097 %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63, 1098 i8 63, i8 63, i8 63, i8 63, 1099 i8 63, i8 63, i8 63, i8 63, 1100 i8 63, i8 63, i8 63, i8 63> 1101 %5 = or <16 x i8> %3, %4 1102 ; CHECK-DAG: binsli.b [[R2]], [[R1]], 1 1103 store <16 x i8> %5, <16 x i8>* %c 1104 ; CHECK-DAG: st.b [[R2]], 0($4) 1105 1106 ret void 1107 ; CHECK: .size binsl_v16i8_i 1108} 1109 1110define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1111 ; CHECK: binsl_v8i16_i: 1112 1113 %1 = load <8 x i16>, <8 x i16>* %a 1114 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1115 %2 = load <8 x i16>, <8 x i16>* %b 1116 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1117 %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152, 1118 i16 49152, i16 49152, i16 49152, i16 49152> 1119 %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383, 1120 i16 16383, i16 16383, i16 16383, i16 16383> 1121 %5 = or <8 x i16> %3, %4 1122 ; CHECK-DAG: binsli.h [[R2]], [[R1]], 1 1123 store <8 x i16> %5, <8 x i16>* %c 1124 ; CHECK-DAG: st.h [[R2]], 0($4) 1125 1126 ret void 1127 ; CHECK: .size binsl_v8i16_i 1128} 1129 1130define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1131 ; CHECK: binsl_v4i32_i: 1132 1133 %1 = load <4 x i32>, <4 x i32>* %a 1134 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1135 %2 = load <4 x i32>, <4 x i32>* %b 1136 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1137 %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472> 1138 %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> 1139 %5 = or <4 x i32> %3, %4 1140 ; CHECK-DAG: binsli.w [[R2]], [[R1]], 1 1141 store <4 x i32> %5, <4 x i32>* %c 1142 ; CHECK-DAG: st.w [[R2]], 0($4) 1143 1144 ret void 1145 ; CHECK: .size binsl_v4i32_i 1146} 1147 1148define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1149 ; CHECK: binsl_v2i64_i: 1150 1151 %1 = load <2 x i64>, <2 x i64>* %a 1152 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1153 %2 = load <2 x i64>, <2 x i64>* %b 1154 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1155 %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608> 1156 %4 = and <2 x i64> %2, <i64 7, i64 7> 1157 %5 = or <2 x i64> %3, %4 1158 ; TODO: We use a particularly wide mask here to work around a legalization 1159 ; issue. If the mask doesn't fit within a 10-bit immediate, it gets 1160 ; legalized into a constant pool. We should add a test to cover the 1161 ; other cases once they correctly select binsli.d. 1162 ; CHECK-DAG: binsli.d [[R2]], [[R1]], 60 1163 store <2 x i64> %5, <2 x i64>* %c 1164 ; CHECK-DAG: st.d [[R2]], 0($4) 1165 1166 ret void 1167 ; CHECK: .size binsl_v2i64_i 1168} 1169 1170define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1171 ; CHECK: binsr_v16i8_i: 1172 1173 %1 = load <16 x i8>, <16 x i8>* %a 1174 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1175 %2 = load <16 x i8>, <16 x i8>* %b 1176 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1177 %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, 1178 i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1179 %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252, 1180 i8 252, i8 252, i8 252, i8 252, 1181 i8 252, i8 252, i8 252, i8 252, 1182 i8 252, i8 252, i8 252, i8 252> 1183 %5 = or <16 x i8> %3, %4 1184 ; CHECK-DAG: binsri.b [[R2]], [[R1]], 1 1185 store <16 x i8> %5, <16 x i8>* %c 1186 ; CHECK-DAG: st.b [[R2]], 0($4) 1187 1188 ret void 1189 ; CHECK: .size binsr_v16i8_i 1190} 1191 1192define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1193 ; CHECK: binsr_v8i16_i: 1194 1195 %1 = load <8 x i16>, <8 x i16>* %a 1196 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1197 %2 = load <8 x i16>, <8 x i16>* %b 1198 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1199 %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, 1200 i16 3, i16 3, i16 3, i16 3> 1201 %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532, 1202 i16 65532, i16 65532, i16 65532, i16 65532> 1203 %5 = or <8 x i16> %3, %4 1204 ; CHECK-DAG: binsri.h [[R2]], [[R1]], 1 1205 store <8 x i16> %5, <8 x i16>* %c 1206 ; CHECK-DAG: st.h [[R2]], 0($4) 1207 1208 ret void 1209 ; CHECK: .size binsr_v8i16_i 1210} 1211 1212define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1213 ; CHECK: binsr_v4i32_i: 1214 1215 %1 = load <4 x i32>, <4 x i32>* %a 1216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1217 %2 = load <4 x i32>, <4 x i32>* %b 1218 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1219 %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 1220 %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292> 1221 %5 = or <4 x i32> %3, %4 1222 ; CHECK-DAG: binsri.w [[R2]], [[R1]], 1 1223 store <4 x i32> %5, <4 x i32>* %c 1224 ; CHECK-DAG: st.w [[R2]], 0($4) 1225 1226 ret void 1227 ; CHECK: .size binsr_v4i32_i 1228} 1229 1230define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1231 ; CHECK: binsr_v2i64_i: 1232 1233 %1 = load <2 x i64>, <2 x i64>* %a 1234 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1235 %2 = load <2 x i64>, <2 x i64>* %b 1236 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1237 %3 = and <2 x i64> %1, <i64 3, i64 3> 1238 %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612> 1239 %5 = or <2 x i64> %3, %4 1240 ; CHECK-DAG: binsri.d [[R2]], [[R1]], 1 1241 store <2 x i64> %5, <2 x i64>* %c 1242 ; CHECK-DAG: st.d [[R2]], 0($4) 1243 1244 ret void 1245 ; CHECK: .size binsr_v2i64_i 1246} 1247 1248define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1249 ; CHECK: bclr_v16i8: 1250 1251 %1 = load <16 x i8>, <16 x i8>* %a 1252 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1253 %2 = load <16 x i8>, <16 x i8>* %b 1254 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1255 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1256 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1257 %5 = and <16 x i8> %1, %4 1258 ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1259 store <16 x i8> %5, <16 x i8>* %c 1260 ; CHECK-DAG: st.b [[R3]], 0($4) 1261 1262 ret void 1263 ; CHECK: .size bclr_v16i8 1264} 1265 1266define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1267 ; CHECK: bclr_v8i16: 1268 1269 %1 = load <8 x i16>, <8 x i16>* %a 1270 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1271 %2 = load <8 x i16>, <8 x i16>* %b 1272 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1273 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1274 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1275 %5 = and <8 x i16> %1, %4 1276 ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1277 store <8 x i16> %5, <8 x i16>* %c 1278 ; CHECK-DAG: st.h [[R3]], 0($4) 1279 1280 ret void 1281 ; CHECK: .size bclr_v8i16 1282} 1283 1284define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1285 ; CHECK: bclr_v4i32: 1286 1287 %1 = load <4 x i32>, <4 x i32>* %a 1288 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1289 %2 = load <4 x i32>, <4 x i32>* %b 1290 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1291 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1292 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1> 1293 %5 = and <4 x i32> %1, %4 1294 ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1295 store <4 x i32> %5, <4 x i32>* %c 1296 ; CHECK-DAG: st.w [[R3]], 0($4) 1297 1298 ret void 1299 ; CHECK: .size bclr_v4i32 1300} 1301 1302define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1303 ; CHECK: bclr_v2i64: 1304 1305 %1 = load <2 x i64>, <2 x i64>* %a 1306 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1307 %2 = load <2 x i64>, <2 x i64>* %b 1308 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1309 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1310 %4 = xor <2 x i64> %3, <i64 -1, i64 -1> 1311 %5 = and <2 x i64> %1, %4 1312 ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1313 store <2 x i64> %5, <2 x i64>* %c 1314 ; CHECK-DAG: st.d [[R3]], 0($4) 1315 1316 ret void 1317 ; CHECK: .size bclr_v2i64 1318} 1319 1320define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1321 ; CHECK: bset_v16i8: 1322 1323 %1 = load <16 x i8>, <16 x i8>* %a 1324 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1325 %2 = load <16 x i8>, <16 x i8>* %b 1326 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1327 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1328 %4 = or <16 x i8> %1, %3 1329 ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1330 store <16 x i8> %4, <16 x i8>* %c 1331 ; CHECK-DAG: st.b [[R3]], 0($4) 1332 1333 ret void 1334 ; CHECK: .size bset_v16i8 1335} 1336 1337define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1338 ; CHECK: bset_v8i16: 1339 1340 %1 = load <8 x i16>, <8 x i16>* %a 1341 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1342 %2 = load <8 x i16>, <8 x i16>* %b 1343 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1344 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1345 %4 = or <8 x i16> %1, %3 1346 ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1347 store <8 x i16> %4, <8 x i16>* %c 1348 ; CHECK-DAG: st.h [[R3]], 0($4) 1349 1350 ret void 1351 ; CHECK: .size bset_v8i16 1352} 1353 1354define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1355 ; CHECK: bset_v4i32: 1356 1357 %1 = load <4 x i32>, <4 x i32>* %a 1358 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1359 %2 = load <4 x i32>, <4 x i32>* %b 1360 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1361 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1362 %4 = or <4 x i32> %1, %3 1363 ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1364 store <4 x i32> %4, <4 x i32>* %c 1365 ; CHECK-DAG: st.w [[R3]], 0($4) 1366 1367 ret void 1368 ; CHECK: .size bset_v4i32 1369} 1370 1371define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1372 ; CHECK: bset_v2i64: 1373 1374 %1 = load <2 x i64>, <2 x i64>* %a 1375 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1376 %2 = load <2 x i64>, <2 x i64>* %b 1377 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1378 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1379 %4 = or <2 x i64> %1, %3 1380 ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1381 store <2 x i64> %4, <2 x i64>* %c 1382 ; CHECK-DAG: st.d [[R3]], 0($4) 1383 1384 ret void 1385 ; CHECK: .size bset_v2i64 1386} 1387 1388define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1389 ; CHECK: bneg_v16i8: 1390 1391 %1 = load <16 x i8>, <16 x i8>* %a 1392 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1393 %2 = load <16 x i8>, <16 x i8>* %b 1394 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1395 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1396 %4 = xor <16 x i8> %1, %3 1397 ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1398 store <16 x i8> %4, <16 x i8>* %c 1399 ; CHECK-DAG: st.b [[R3]], 0($4) 1400 1401 ret void 1402 ; CHECK: .size bneg_v16i8 1403} 1404 1405define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1406 ; CHECK: bneg_v8i16: 1407 1408 %1 = load <8 x i16>, <8 x i16>* %a 1409 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1410 %2 = load <8 x i16>, <8 x i16>* %b 1411 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1412 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1413 %4 = xor <8 x i16> %1, %3 1414 ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1415 store <8 x i16> %4, <8 x i16>* %c 1416 ; CHECK-DAG: st.h [[R3]], 0($4) 1417 1418 ret void 1419 ; CHECK: .size bneg_v8i16 1420} 1421 1422define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1423 ; CHECK: bneg_v4i32: 1424 1425 %1 = load <4 x i32>, <4 x i32>* %a 1426 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1427 %2 = load <4 x i32>, <4 x i32>* %b 1428 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1429 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1430 %4 = xor <4 x i32> %1, %3 1431 ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1432 store <4 x i32> %4, <4 x i32>* %c 1433 ; CHECK-DAG: st.w [[R3]], 0($4) 1434 1435 ret void 1436 ; CHECK: .size bneg_v4i32 1437} 1438 1439define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1440 ; CHECK: bneg_v2i64: 1441 1442 %1 = load <2 x i64>, <2 x i64>* %a 1443 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1444 %2 = load <2 x i64>, <2 x i64>* %b 1445 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1446 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1447 %4 = xor <2 x i64> %1, %3 1448 ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1449 store <2 x i64> %4, <2 x i64>* %c 1450 ; CHECK-DAG: st.d [[R3]], 0($4) 1451 1452 ret void 1453 ; CHECK: .size bneg_v2i64 1454} 1455 1456define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1457 ; CHECK: bclri_v16i8: 1458 1459 %1 = load <16 x i8>, <16 x i8>* %a 1460 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1461 %2 = xor <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, 1462 <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1463 %3 = and <16 x i8> %1, %2 1464 ; bclri.b and andi.b are exactly equivalent. 1465 ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247 1466 store <16 x i8> %3, <16 x i8>* %c 1467 ; CHECK-DAG: st.b [[R3]], 0($4) 1468 1469 ret void 1470 ; CHECK: .size bclri_v16i8 1471} 1472 1473define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1474 ; CHECK: bclri_v8i16: 1475 1476 %1 = load <8 x i16>, <8 x i16>* %a 1477 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1478 %2 = xor <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>, 1479 <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1480 %3 = and <8 x i16> %1, %2 1481 ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3 1482 store <8 x i16> %3, <8 x i16>* %c 1483 ; CHECK-DAG: st.h [[R3]], 0($4) 1484 1485 ret void 1486 ; CHECK: .size bclri_v8i16 1487} 1488 1489define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1490 ; CHECK: bclri_v4i32: 1491 1492 %1 = load <4 x i32>, <4 x i32>* %a 1493 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1494 %2 = xor <4 x i32> <i32 8, i32 8, i32 8, i32 8>, 1495 <i32 -1, i32 -1, i32 -1, i32 -1> 1496 %3 = and <4 x i32> %1, %2 1497 ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3 1498 store <4 x i32> %3, <4 x i32>* %c 1499 ; CHECK-DAG: st.w [[R3]], 0($4) 1500 1501 ret void 1502 ; CHECK: .size bclri_v4i32 1503} 1504 1505define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1506 ; CHECK: bclri_v2i64: 1507 1508 %1 = load <2 x i64>, <2 x i64>* %a 1509 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1510 %2 = xor <2 x i64> <i64 8, i64 8>, 1511 <i64 -1, i64 -1> 1512 %3 = and <2 x i64> %1, %2 1513 ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3 1514 store <2 x i64> %3, <2 x i64>* %c 1515 ; CHECK-DAG: st.d [[R3]], 0($4) 1516 1517 ret void 1518 ; CHECK: .size bclri_v2i64 1519} 1520 1521define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1522 ; CHECK: bseti_v16i8: 1523 1524 %1 = load <16 x i8>, <16 x i8>* %a 1525 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1526 %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> 1527 ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3 1528 store <16 x i8> %2, <16 x i8>* %c 1529 ; CHECK-DAG: st.b [[R3]], 0($4) 1530 1531 ret void 1532 ; CHECK: .size bseti_v16i8 1533} 1534 1535define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1536 ; CHECK: bseti_v8i16: 1537 1538 %1 = load <8 x i16>, <8 x i16>* %a 1539 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1540 %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1541 ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3 1542 store <8 x i16> %2, <8 x i16>* %c 1543 ; CHECK-DAG: st.h [[R3]], 0($4) 1544 1545 ret void 1546 ; CHECK: .size bseti_v8i16 1547} 1548 1549define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1550 ; CHECK: bseti_v4i32: 1551 1552 %1 = load <4 x i32>, <4 x i32>* %a 1553 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1554 %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8> 1555 ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3 1556 store <4 x i32> %2, <4 x i32>* %c 1557 ; CHECK-DAG: st.w [[R3]], 0($4) 1558 1559 ret void 1560 ; CHECK: .size bseti_v4i32 1561} 1562 1563define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1564 ; CHECK: bseti_v2i64: 1565 1566 %1 = load <2 x i64>, <2 x i64>* %a 1567 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1568 %2 = or <2 x i64> %1, <i64 8, i64 8> 1569 ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3 1570 store <2 x i64> %2, <2 x i64>* %c 1571 ; CHECK-DAG: st.d [[R3]], 0($4) 1572 1573 ret void 1574 ; CHECK: .size bseti_v2i64 1575} 1576 1577define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1578 ; CHECK: bnegi_v16i8: 1579 1580 %1 = load <16 x i8>, <16 x i8>* %a 1581 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1582 %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> 1583 ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3 1584 store <16 x i8> %2, <16 x i8>* %c 1585 ; CHECK-DAG: st.b [[R3]], 0($4) 1586 1587 ret void 1588 ; CHECK: .size bnegi_v16i8 1589} 1590 1591define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1592 ; CHECK: bnegi_v8i16: 1593 1594 %1 = load <8 x i16>, <8 x i16>* %a 1595 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1596 %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1597 ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3 1598 store <8 x i16> %2, <8 x i16>* %c 1599 ; CHECK-DAG: st.h [[R3]], 0($4) 1600 1601 ret void 1602 ; CHECK: .size bnegi_v8i16 1603} 1604 1605define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1606 ; CHECK: bnegi_v4i32: 1607 1608 %1 = load <4 x i32>, <4 x i32>* %a 1609 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1610 %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8> 1611 ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3 1612 store <4 x i32> %2, <4 x i32>* %c 1613 ; CHECK-DAG: st.w [[R3]], 0($4) 1614 1615 ret void 1616 ; CHECK: .size bnegi_v4i32 1617} 1618 1619define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1620 ; CHECK: bnegi_v2i64: 1621 1622 %1 = load <2 x i64>, <2 x i64>* %a 1623 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1624 %2 = xor <2 x i64> %1, <i64 8, i64 8> 1625 ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3 1626 store <2 x i64> %2, <2 x i64>* %c 1627 ; CHECK-DAG: st.d [[R3]], 0($4) 1628 1629 ret void 1630 ; CHECK: .size bnegi_v2i64 1631} 1632 1633declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val) 1634declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) 1635declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) 1636declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) 1637declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val) 1638declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val) 1639declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val) 1640declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val) 1641