1; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906 2; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 3; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 4 5declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp) 6 7; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp 8; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} 9; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} 10define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp( 11 i32 addrspace(1)* %r, 12 <4 x i8> addrspace(1)* %a, 13 <4 x i8> addrspace(1)* %b, 14 i32 addrspace(1)* %c) { 15entry: 16 %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a 17 %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b 18 %a.val.cast = bitcast <4 x i8> %a.val to i32 19 %b.val.cast = bitcast <4 x i8> %b.val to i32 20 %c.val = load i32, i32 addrspace(1)* %c 21 %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) 22 store i32 %r.val, i32 addrspace(1)* %r 23 ret void 24} 25 26; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_no_clamp 27; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 28; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} 29define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp( 30 i32 addrspace(1)* %r, 31 <4 x i8> addrspace(1)* %a, 32 <4 x i8> addrspace(1)* %b, 33 i32 addrspace(1)* %c) { 34entry: 35 %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a 36 %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b 37 %a.val.cast = bitcast <4 x i8> %a.val to i32 38 %b.val.cast = bitcast <4 x i8> %b.val to i32 39 %c.val = load i32, i32 addrspace(1)* %c 40 %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) 41 store i32 %r.val, i32 addrspace(1)* %r 42 ret void 43} 44