1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=CI %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s 4 5; GCN-LABEL: {{^}}global_store_v3i64: 6; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 7; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 8define amdgpu_kernel void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 9 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32 10 ret void 11} 12 13; GCN-LABEL: {{^}}global_store_v3i64_unaligned: 14; GCN: buffer_store_byte 15; GCN: buffer_store_byte 16; GCN: buffer_store_byte 17; GCN: buffer_store_byte 18 19; GCN: buffer_store_byte 20; GCN: buffer_store_byte 21; GCN: buffer_store_byte 22; GCN: buffer_store_byte 23 24; GCN: buffer_store_byte 25; GCN: buffer_store_byte 26; GCN: buffer_store_byte 27; GCN: buffer_store_byte 28 29; GCN: buffer_store_byte 30; GCN: buffer_store_byte 31; GCN: buffer_store_byte 32; GCN: buffer_store_byte 33 34; GCN: buffer_store_byte 35; GCN: buffer_store_byte 36; GCN: buffer_store_byte 37; GCN: buffer_store_byte 38 39; GCN: buffer_store_byte 40; GCN: buffer_store_byte 41; GCN: buffer_store_byte 42; GCN: buffer_store_byte 43define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 44 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1 45 ret void 46} 47 48; GCN-LABEL: {{^}}local_store_v3i64: 49; SI: ds_write2_b64 50; SI: ds_write_b64 51 52; CI: ds_write_b64 53; CI: ds_write_b128 54 55; VI: ds_write_b64 56; VI: ds_write_b128 57define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 58 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 59 ret void 60} 61 62; GCN-LABEL: {{^}}local_store_v3i64_unaligned: 63; GCN: ds_write_b8 64; GCN: ds_write_b8 65; GCN: ds_write_b8 66; GCN: ds_write_b8 67 68; GCN: ds_write_b8 69; GCN: ds_write_b8 70; GCN: ds_write_b8 71; GCN: ds_write_b8 72 73; GCN: ds_write_b8 74; GCN: ds_write_b8 75; GCN: ds_write_b8 76; GCN: ds_write_b8 77 78; GCN: ds_write_b8 79; GCN: ds_write_b8 80; GCN: ds_write_b8 81; GCN: ds_write_b8 82 83; GCN: ds_write_b8 84; GCN: ds_write_b8 85; GCN: ds_write_b8 86; GCN: ds_write_b8 87 88; GCN: ds_write_b8 89; GCN: ds_write_b8 90; GCN: ds_write_b8 91; GCN: ds_write_b8 92define amdgpu_kernel void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 93 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 1 94 ret void 95} 96 97; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i32: 98; SI-DAG: buffer_store_dwordx2 99; SI-DAG: buffer_store_dword v 100; VI-DAG: buffer_store_dwordx3 101define amdgpu_kernel void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) { 102 %trunc = trunc <3 x i64> %x to <3 x i32> 103 store <3 x i32> %trunc, <3 x i32> addrspace(1)* %out 104 ret void 105} 106 107; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i16: 108; GCN-DAG: buffer_store_short 109; GCN-DAG: buffer_store_dword v 110define amdgpu_kernel void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) { 111 %trunc = trunc <3 x i64> %x to <3 x i16> 112 store <3 x i16> %trunc, <3 x i16> addrspace(1)* %out 113 ret void 114} 115 116 117; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i8: 118; GCN-DAG: buffer_store_short 119; GCN-DAG: buffer_store_byte v 120define amdgpu_kernel void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) { 121 %trunc = trunc <3 x i64> %x to <3 x i8> 122 store <3 x i8> %trunc, <3 x i8> addrspace(1)* %out 123 ret void 124} 125 126; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i1: 127; GCN-DAG: buffer_store_byte v 128; GCN-DAG: buffer_store_byte v 129; GCN-DAG: buffer_store_byte v 130define amdgpu_kernel void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) { 131 %trunc = trunc <3 x i64> %x to <3 x i1> 132 store <3 x i1> %trunc, <3 x i1> addrspace(1)* %out 133 ret void 134} 135