1; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s 2; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD 3 4@v2i64 = global <2 x i64> zeroinitializer 5@v2i32 = global <2 x i32> zeroinitializer 6@v4i32 = global <4 x i32> zeroinitializer 7@v4i16 = global <4 x i16> zeroinitializer 8@v8i16 = global <8 x i16> zeroinitializer 9@v8i8 = global <8 x i8> zeroinitializer 10@v16i8 = global <16 x i8> zeroinitializer 11 12@v2f32 = global <2 x float> zeroinitializer 13@v2f64 = global <2 x double> zeroinitializer 14@v4f32 = global <4 x float> zeroinitializer 15 16 17; 64 bit conversions 18define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) { 19; CHECK-LABEL: conv_i64_to_v8i8: 20; CHECK: vrev64.8 21 %v = bitcast i64 %val to <8 x i8> 22 %w = load <8 x i8>, <8 x i8>* @v8i8 23 %a = add <8 x i8> %v, %w 24 store <8 x i8> %a, <8 x i8>* %store 25 ret void 26} 27 28define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) { 29; CHECK-LABEL: conv_v8i8_to_i64: 30; CHECK: vrev64.8 31 %v = load <8 x i8>, <8 x i8>* %load 32 %w = load <8 x i8>, <8 x i8>* @v8i8 33 %a = add <8 x i8> %v, %w 34 %f = bitcast <8 x i8> %a to i64 35 call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store ) 36 ret void 37} 38 39define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) { 40; CHECK-LABEL: conv_i64_to_v4i16: 41; CHECK: vrev64.16 42 %v = bitcast i64 %val to <4 x i16> 43 %w = load <4 x i16>, <4 x i16>* @v4i16 44 %a = add <4 x i16> %v, %w 45 store <4 x i16> %a, <4 x i16>* %store 46 ret void 47} 48 49define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) { 50; CHECK-LABEL: conv_v4i16_to_i64: 51; CHECK: vrev64.16 52 %v = load <4 x i16>, <4 x i16>* %load 53 %w = load <4 x i16>, <4 x i16>* @v4i16 54 %a = add <4 x i16> %v, %w 55 %f = bitcast <4 x i16> %a to i64 56 call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store ) 57 ret void 58} 59 60define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) { 61; CHECK-LABEL: conv_i64_to_v2i32: 62; CHECK: vrev64.32 63 %v = bitcast i64 %val to <2 x i32> 64 %w = load <2 x i32>, <2 x i32>* @v2i32 65 %a = add <2 x i32> %v, %w 66 store <2 x i32> %a, <2 x i32>* %store 67 ret void 68} 69 70define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) { 71; CHECK-LABEL: conv_v2i32_to_i64: 72; CHECK: vrev64.32 73 %v = load <2 x i32>, <2 x i32>* %load 74 %w = load <2 x i32>, <2 x i32>* @v2i32 75 %a = add <2 x i32> %v, %w 76 %f = bitcast <2 x i32> %a to i64 77 call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store ) 78 ret void 79} 80 81define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) { 82; CHECK-LABEL: conv_i64_to_v2f32: 83; CHECK: vrev64.32 84 %v = bitcast i64 %val to <2 x float> 85 %w = load <2 x float>, <2 x float>* @v2f32 86 %a = fadd <2 x float> %v, %w 87 store <2 x float> %a, <2 x float>* %store 88 ret void 89} 90 91define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) { 92; CHECK-LABEL: conv_v2f32_to_i64: 93; CHECK: vrev64.32 94 %v = load <2 x float>, <2 x float>* %load 95 %w = load <2 x float>, <2 x float>* @v2f32 96 %a = fadd <2 x float> %v, %w 97 %f = bitcast <2 x float> %a to i64 98 call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store ) 99 ret void 100} 101 102define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) { 103; CHECK-LABEL: conv_f64_to_v8i8: 104; CHECK: vrev64.8 105 %v = bitcast double %val to <8 x i8> 106 %w = load <8 x i8>, <8 x i8>* @v8i8 107 %a = add <8 x i8> %v, %w 108 store <8 x i8> %a, <8 x i8>* %store 109 ret void 110} 111 112define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) { 113; CHECK-LABEL: conv_v8i8_to_f64: 114; CHECK: vrev64.8 115 %v = load <8 x i8>, <8 x i8>* %load 116 %w = load <8 x i8>, <8 x i8>* @v8i8 117 %a = add <8 x i8> %v, %w 118 %f = bitcast <8 x i8> %a to double 119 call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store ) 120 ret void 121} 122 123define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) { 124; CHECK-LABEL: conv_f64_to_v4i16: 125; CHECK: vrev64.16 126 %v = bitcast double %val to <4 x i16> 127 %w = load <4 x i16>, <4 x i16>* @v4i16 128 %a = add <4 x i16> %v, %w 129 store <4 x i16> %a, <4 x i16>* %store 130 ret void 131} 132 133define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) { 134; CHECK-LABEL: conv_v4i16_to_f64: 135; CHECK: vrev64.16 136 %v = load <4 x i16>, <4 x i16>* %load 137 %w = load <4 x i16>, <4 x i16>* @v4i16 138 %a = add <4 x i16> %v, %w 139 %f = bitcast <4 x i16> %a to double 140 call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store ) 141 ret void 142} 143 144define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) { 145; CHECK-LABEL: conv_f64_to_v2i32: 146; CHECK: vrev64.32 147 %v = bitcast double %val to <2 x i32> 148 %w = load <2 x i32>, <2 x i32>* @v2i32 149 %a = add <2 x i32> %v, %w 150 store <2 x i32> %a, <2 x i32>* %store 151 ret void 152} 153 154define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) { 155; CHECK-LABEL: conv_v2i32_to_f64: 156; CHECK: vrev64.32 157 %v = load <2 x i32>, <2 x i32>* %load 158 %w = load <2 x i32>, <2 x i32>* @v2i32 159 %a = add <2 x i32> %v, %w 160 %f = bitcast <2 x i32> %a to double 161 call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store ) 162 ret void 163} 164 165define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) { 166; CHECK-LABEL: conv_f64_to_v2f32: 167; CHECK: vrev64.32 168 %v = bitcast double %val to <2 x float> 169 %w = load <2 x float>, <2 x float>* @v2f32 170 %a = fadd <2 x float> %v, %w 171 store <2 x float> %a, <2 x float>* %store 172 ret void 173} 174 175define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) { 176; CHECK-LABEL: conv_v2f32_to_f64: 177; CHECK: vrev64.32 178 %v = load <2 x float>, <2 x float>* %load 179 %w = load <2 x float>, <2 x float>* @v2f32 180 %a = fadd <2 x float> %v, %w 181 %f = bitcast <2 x float> %a to double 182 call void @conv_f64_to_v2f32( double %f, <2 x float>* %store ) 183 ret void 184} 185 186; 128 bit conversions 187 188 189define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) { 190; CHECK-LABEL: conv_i128_to_v16i8: 191; CHECK: vrev32.8 192 %v = bitcast i128 %val to <16 x i8> 193 %w = load <16 x i8>, <16 x i8>* @v16i8 194 %a = add <16 x i8> %v, %w 195 store <16 x i8> %a, <16 x i8>* %store 196 ret void 197} 198 199define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) { 200; CHECK-LABEL: conv_v16i8_to_i128: 201; CHECK: vrev32.8 202 %v = load <16 x i8>, <16 x i8>* %load 203 %w = load <16 x i8>, <16 x i8>* @v16i8 204 %a = add <16 x i8> %v, %w 205 %f = bitcast <16 x i8> %a to i128 206 call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store ) 207 ret void 208} 209 210define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) { 211; CHECK-LABEL: conv_i128_to_v8i16: 212; CHECK: vrev32.16 213 %v = bitcast i128 %val to <8 x i16> 214 %w = load <8 x i16>, <8 x i16>* @v8i16 215 %a = add <8 x i16> %v, %w 216 store <8 x i16> %a, <8 x i16>* %store 217 ret void 218} 219 220define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) { 221; CHECK-LABEL: conv_v8i16_to_i128: 222; CHECK: vrev32.16 223 %v = load <8 x i16>, <8 x i16>* %load 224 %w = load <8 x i16>, <8 x i16>* @v8i16 225 %a = add <8 x i16> %v, %w 226 %f = bitcast <8 x i16> %a to i128 227 call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store ) 228 ret void 229} 230 231define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) { 232; CHECK-LABEL: conv_i128_to_v4i32: 233; CHECK: vrev64.32 234 %v = bitcast i128 %val to <4 x i32> 235 %w = load <4 x i32>, <4 x i32>* @v4i32 236 %a = add <4 x i32> %v, %w 237 store <4 x i32> %a, <4 x i32>* %store 238 ret void 239} 240 241define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) { 242; CHECK-LABEL: conv_v4i32_to_i128: 243; CHECK: vrev64.32 244 %v = load <4 x i32>, <4 x i32>* %load 245 %w = load <4 x i32>, <4 x i32>* @v4i32 246 %a = add <4 x i32> %v, %w 247 %f = bitcast <4 x i32> %a to i128 248 call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store ) 249 ret void 250} 251 252define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) { 253; CHECK-LABEL: conv_i128_to_v4f32: 254; CHECK: vrev64.32 255 %v = bitcast i128 %val to <4 x float> 256 %w = load <4 x float>, <4 x float>* @v4f32 257 %a = fadd <4 x float> %v, %w 258 store <4 x float> %a, <4 x float>* %store 259 ret void 260} 261 262define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) { 263; CHECK-LABEL: conv_v4f32_to_i128: 264; CHECK: vrev64.32 265 %v = load <4 x float>, <4 x float>* %load 266 %w = load <4 x float>, <4 x float>* @v4f32 267 %a = fadd <4 x float> %v, %w 268 %f = bitcast <4 x float> %a to i128 269 call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store ) 270 ret void 271} 272 273define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) { 274; CHECK-LABEL: conv_f128_to_v2f64: 275; CHECK: vrev64.32 276 %v = bitcast fp128 %val to <2 x double> 277 %w = load <2 x double>, <2 x double>* @v2f64 278 %a = fadd <2 x double> %v, %w 279 store <2 x double> %a, <2 x double>* %store 280 ret void 281} 282 283define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) { 284; CHECK-LABEL: conv_v2f64_to_f128: 285; CHECK: vrev64.32 286 %v = load <2 x double>, <2 x double>* %load 287 %w = load <2 x double>, <2 x double>* @v2f64 288 %a = fadd <2 x double> %v, %w 289 %f = bitcast <2 x double> %a to fp128 290 call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store ) 291 ret void 292} 293 294define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) { 295; CHECK-LABEL: conv_f128_to_v16i8: 296; CHECK: vrev32.8 297 %v = bitcast fp128 %val to <16 x i8> 298 %w = load <16 x i8>, <16 x i8>* @v16i8 299 %a = add <16 x i8> %v, %w 300 store <16 x i8> %a, <16 x i8>* %store 301 ret void 302} 303 304define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) { 305; CHECK-LABEL: conv_v16i8_to_f128: 306; CHECK: vrev32.8 307 %v = load <16 x i8>, <16 x i8>* %load 308 %w = load <16 x i8>, <16 x i8>* @v16i8 309 %a = add <16 x i8> %v, %w 310 %f = bitcast <16 x i8> %a to fp128 311 call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store ) 312 ret void 313} 314 315define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) { 316; CHECK-LABEL: conv_f128_to_v8i16: 317; CHECK: vrev32.16 318 %v = bitcast fp128 %val to <8 x i16> 319 %w = load <8 x i16>, <8 x i16>* @v8i16 320 %a = add <8 x i16> %v, %w 321 store <8 x i16> %a, <8 x i16>* %store 322 ret void 323} 324 325define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) { 326; CHECK-LABEL: conv_v8i16_to_f128: 327; CHECK: vrev32.16 328 %v = load <8 x i16>, <8 x i16>* %load 329 %w = load <8 x i16>, <8 x i16>* @v8i16 330 %a = add <8 x i16> %v, %w 331 %f = bitcast <8 x i16> %a to fp128 332 call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store ) 333 ret void 334} 335 336define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) { 337; CHECK-LABEL: conv_f128_to_v4f32: 338; CHECK: vrev64.32 339 %v = bitcast fp128 %val to <4 x float> 340 %w = load <4 x float>, <4 x float>* @v4f32 341 %a = fadd <4 x float> %v, %w 342 store <4 x float> %a, <4 x float>* %store 343 ret void 344} 345 346define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) { 347; CHECK-LABEL: conv_v4f32_to_f128: 348; CHECK: vrev64.32 349 %v = load <4 x float>, <4 x float>* %load 350 %w = load <4 x float>, <4 x float>* @v4f32 351 %a = fadd <4 x float> %v, %w 352 %f = bitcast <4 x float> %a to fp128 353 call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store ) 354 ret void 355} 356 357define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) { 358; CHECK-LABEL: arg_v4i32: 359; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 360; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 361; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 362; CHECK-HARD-LABEL: arg_v4i32: 363; CHECK-HARD-NOT: vmov 364; CHECK-HARD: vst1.64 {d0, d1} 365 store <4 x i32> %var, <4 x i32>* %store 366 ret void 367} 368 369define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) { 370; CHECK-LABEL: arg_v8i16: 371; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 372; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 373; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 374; CHECK-HARD-LABEL: arg_v8i16: 375; CHECK-HARD-NOT: vmov 376; CHECK-HARD: vst1.64 {d0, d1} 377 store <8 x i16> %var, <8 x i16>* %store 378 ret void 379} 380 381define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) { 382; CHECK-LABEL: arg_v16i8: 383; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 384; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 385; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 386; CHECK-HARD-LABEL: arg_v16i8: 387; CHECK-HARD-NOT: vmov 388; CHECK-HARD: vst1.64 {d0, d1} 389 store <16 x i8> %var, <16 x i8>* %store 390 ret void 391} 392 393