Home
last modified time | relevance | path

Searched refs:workgroup (Results 1 – 25 of 139) sorted by relevance

123456

/external/tensorflow/tensorflow/lite/delegates/gpu/gl/workgroups/
Dcalculator_from_metadata.cc46 for (const auto* workgroup : *workgroups.workgroups()) { in WorkgroupsCalculatorFromMetadata()
47 uint3 size(workgroup->size()->x(), workgroup->size()->y(), in WorkgroupsCalculatorFromMetadata()
48 workgroup->size()->z()); in WorkgroupsCalculatorFromMetadata()
51 for (auto node_id : *workgroup->node_indices()) { in WorkgroupsCalculatorFromMetadata()
71 for (auto workgroup : *workgroups.hardcoded_workgroups()) { in FindWorkgroups()
72 if (workgroup->gpu_info()->c_str() == gpu_info.opengl_info.renderer_name) { in FindWorkgroups()
73 return workgroup; in FindWorkgroups()
/external/llvm-project/llvm/test/CodeGen/AMDGPU/
Dmad24-get-global-id.ll3 ; If the workgroup id range is restricted, we should be able to use
6 declare i32 @llvm.amdgcn.workgroup.id.x() #0
18 %workgroup.size.xy = load i32, i32 addrspace(4)* %gep, align 4, !invariant.load !0
19 %workgroup.size.x = and i32 %workgroup.size.xy, 65535
22 %workgroup.id.x = call i32 @llvm.amdgcn.workgroup.id.x(), !range !2
24 %mul = mul i32 %workgroup.id.x, %workgroup.size.x
Dcallee-special-input-sgprs-fixed-abi.ll52 %val = call i32 @llvm.amdgcn.workgroup.id.x()
66 %val = call i32 @llvm.amdgcn.workgroup.id.x()
75 %val = call i32 @llvm.amdgcn.workgroup.id.y()
84 %val = call i32 @llvm.amdgcn.workgroup.id.z()
93 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
94 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
105 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
106 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
107 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
118 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
[all …]
Dannotate-kernel-features-hsa.ll5 declare i32 @llvm.amdgcn.workgroup.id.x() #0
6 declare i32 @llvm.amdgcn.workgroup.id.y() #0
7 declare i32 @llvm.amdgcn.workgroup.id.z() #0
22 %val = call i32 @llvm.amdgcn.workgroup.id.x()
29 %val = call i32 @llvm.amdgcn.workgroup.id.y()
36 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
38 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
45 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
46 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
54 %val = call i32 @llvm.amdgcn.workgroup.id.z()
[all …]
Dcallee-special-input-sgprs.ll108 %val = call i32 @llvm.amdgcn.workgroup.id.x()
122 %val = call i32 @llvm.amdgcn.workgroup.id.x()
131 %val = call i32 @llvm.amdgcn.workgroup.id.y()
140 %val = call i32 @llvm.amdgcn.workgroup.id.z()
149 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
150 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
161 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
162 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
163 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
174 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
[all …]
Dsyncscopes.ll5 …r3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into…
10 i32 %workgroup,
16 store atomic i32 %workgroup, i32* %workgroup_out syncscope("workgroup") seq_cst, align 4
Dfence-barrier.ll7 declare i32 @llvm.amdgcn.workgroup.id.x()
32 fence syncscope("workgroup") release
34 fence syncscope("workgroup") acquire
39 %12 = call i32 @llvm.amdgcn.workgroup.id.x()
73 %9 = call i32 @llvm.amdgcn.workgroup.id.x()
91 %25 = call i32 @llvm.amdgcn.workgroup.id.x()
110 %44 = call i32 @llvm.amdgcn.workgroup.id.x()
125 fence syncscope("workgroup") release
127 fence syncscope("workgroup") acquire
152 %6 = call i32 @llvm.amdgcn.workgroup.id.x()
[all …]
Dattr-amdgpu-num-sgpr.ll38 ; workgroup ids: 3
65 ; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
66 ; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
67 ; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
103 ; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
105 ; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
107 ; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
121 declare i32 @llvm.amdgcn.workgroup.id.x() #1
122 declare i32 @llvm.amdgcn.workgroup.id.y() #1
123 declare i32 @llvm.amdgcn.workgroup.id.z() #1
Dvectorize-loads.ll15 …%id_x = load i16, i16 addrspace(4)* %gep_x.cast, align 4, !invariant.load !0 ; load workgroup size…
18 …%id_y = load i16, i16 addrspace(4)* %gep_y.cast, align 2, !invariant.load !0 ; load workgroup size…
38 …%id_x = load i16, i16 addrspace(4)* %gep_x.cast, align 4, !invariant.load !0 ; load workgroup size…
41 …%id_y = load i16, i16 addrspace(4)* %gep_y.cast, align 2, !invariant.load !0 ; load workgroup size…
44 …%id_z = load i16, i16 addrspace(4)* %gep_z.cast, align 4, !invariant.load !0 ; load workgroup size…
47 …%id_w = load i16, i16 addrspace(4)* %gep_w.cast, align 2, !invariant.load !0 ; load workgroup size…
Dllvm.amdgcn.workgroup.id.ll8 declare i32 @llvm.amdgcn.workgroup.id.x() #0
9 declare i32 @llvm.amdgcn.workgroup.id.y() #0
10 declare i32 @llvm.amdgcn.workgroup.id.z() #0
38 %id = call i32 @llvm.amdgcn.workgroup.id.x()
65 %id = call i32 @llvm.amdgcn.workgroup.id.y()
100 %id = call i32 @llvm.amdgcn.workgroup.id.z()
/external/llvm/test/CodeGen/AMDGPU/
Dmad24-get-global-id.ll3 ; If the workgroup id range is restricted, we should be able to use
6 declare i32 @llvm.amdgcn.workgroup.id.x() #0
18 %workgroup.size.xy = load i32, i32 addrspace(2)* %gep, align 4, !invariant.load !0
19 %workgroup.size.x = and i32 %workgroup.size.xy, 65535
22 %workgroup.id.x = call i32 @llvm.amdgcn.workgroup.id.x(), !range !2
24 %mul = mul i32 %workgroup.id.x, %workgroup.size.x
Dannotate-kernel-features-hsa.ll3 declare i32 @llvm.amdgcn.workgroup.id.x() #0
4 declare i32 @llvm.amdgcn.workgroup.id.y() #0
5 declare i32 @llvm.amdgcn.workgroup.id.z() #0
16 %val = call i32 @llvm.amdgcn.workgroup.id.x()
23 %val = call i32 @llvm.amdgcn.workgroup.id.y()
30 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
32 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
39 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
40 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
48 %val = call i32 @llvm.amdgcn.workgroup.id.z()
[all …]
Damdgcn.work-item-intrinsics.ll19 ; The workgroup.id values are stored in sgprs offset by the number of user
33 %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0
45 %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0
61 %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0
106 declare i32 @llvm.amdgcn.workgroup.id.x() #0
107 declare i32 @llvm.amdgcn.workgroup.id.y() #0
108 declare i32 @llvm.amdgcn.workgroup.id.z() #0
Dllvm.amdgcn.workgroup.id.ll6 declare i32 @llvm.amdgcn.workgroup.id.x() #0
7 declare i32 @llvm.amdgcn.workgroup.id.y() #0
8 declare i32 @llvm.amdgcn.workgroup.id.z() #0
37 %id = call i32 @llvm.amdgcn.workgroup.id.x()
65 %id = call i32 @llvm.amdgcn.workgroup.id.y()
101 %id = call i32 @llvm.amdgcn.workgroup.id.z()
/external/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/
Dirtranslator-amdgpu_kernel-system-sgprs.ll23 %id = call i32 @llvm.amdgcn.workgroup.id.x()
37 %id = call i32 @llvm.amdgcn.workgroup.id.y()
51 %id = call i32 @llvm.amdgcn.workgroup.id.z()
65 %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
67 %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
81 %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
83 %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
85 %id2 = call i32 @llvm.amdgcn.workgroup.id.y()
99 %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
101 %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
[all …]
Dllvm.amdgcn.workgroup.id.ll8 declare i32 @llvm.amdgcn.workgroup.id.x() #0
9 declare i32 @llvm.amdgcn.workgroup.id.y() #0
10 declare i32 @llvm.amdgcn.workgroup.id.z() #0
38 %id = call i32 @llvm.amdgcn.workgroup.id.x()
65 %id = call i32 @llvm.amdgcn.workgroup.id.y()
100 %id = call i32 @llvm.amdgcn.workgroup.id.z()
Dirtranslator-fence.ll117 fence syncscope("workgroup-one-as") acquire
126 fence syncscope("workgroup-one-as") release
135 fence syncscope("workgroup-one-as") acq_rel
144 fence syncscope("workgroup-one-as") seq_cst
296 fence syncscope("workgroup") acquire
305 fence syncscope("workgroup") release
314 fence syncscope("workgroup") acq_rel
323 fence syncscope("workgroup") seq_cst
/external/tensorflow/tensorflow/lite/delegates/gpu/gl/kernels/
Dconv.cc235 uint3 workgroup = uint3(16, 16, 1); in GenerateCode() local
238 workgroup = uint3(8, 8, 2); in GenerateCode()
241 workgroup = uint3(4, 8, 4); in GenerateCode()
244 workgroup = uint3(4, 4, 8); in GenerateCode()
247 workgroup = uint3(4, 4, 16); in GenerateCode()
250 workgroup = uint3(2, 8, 16); in GenerateCode()
254 workgroup = uint3(16, 8, 2); in GenerateCode()
257 workgroup = uint3(16, 4, 4); in GenerateCode()
260 workgroup = uint3(8, 4, 8); in GenerateCode()
263 workgroup = uint3(8, 4, 8); in GenerateCode()
[all …]
/external/llvm-project/clang/test/CodeGenOpenCL/
Datomic-ops.cl35 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
50 …// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") se…
56 …tomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
59 …tomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
62 …tomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
68 …// CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") s…
71 …// CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") s…
74 …// CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") s…
77 …// CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") …
80 …// CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") …
[all …]
/external/llvm-project/mlir/include/mlir/Dialect/LLVMIR/
DROCDLOps.td66 def ROCDL_BlockIdXOp : ROCDL_SpecialRegisterOp<"workgroup.id.x">;
67 def ROCDL_BlockIdYOp : ROCDL_SpecialRegisterOp<"workgroup.id.y">;
68 def ROCDL_BlockIdZOp : ROCDL_SpecialRegisterOp<"workgroup.id.z">;
73 def ROCDL_BlockDimXOp : ROCDL_DeviceFunctionOp<"workgroup.dim.x",
76 def ROCDL_BlockDimYOp : ROCDL_DeviceFunctionOp<"workgroup.dim.y",
79 def ROCDL_BlockDimZOp : ROCDL_DeviceFunctionOp<"workgroup.dim.z",
98 llvmContext.getOrInsertSyncScopeID("workgroup"));
101 llvmContext.getOrInsertSyncScopeID("workgroup"));
/external/tensorflow/tensorflow/lite/delegates/gpu/gl/compiler/
Dfuse_auto_input.cc54 uint3 workgroup = node_code.workgroup; in ApplyToNode() local
86 if (input_producer_attr.code.workgroup != uint3()) { in ApplyToNode()
91 if (workgroup != uint3()) { in ApplyToNode()
94 workgroup = input_producer_attr.code.workgroup; in ApplyToNode()
/external/llvm-project/clang/test/SemaOpenCL/
Dbuiltins-amdgcn-error.cl133 …__builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argu…
134 …__builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argu…
139 const char ptr[] = "workgroup";
150 …val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-war…
151 …val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-war…
156 const char ptr[] = "workgroup";
164 …val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-war…
165 …val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-war…
170 const char ptr[] = "workgroup";
178 …val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-war…
[all …]
/external/tensorflow/tensorflow/lite/delegates/gpu/gl/
Dworkgroups.fbs24 // Defines the size of a workgroup.
27 // Shader has to cover exactly these nodes to have workgroup size applied.
31 // A collection of matchers to override default workgroup sizes in shaders.
/external/llvm-project/mlir/test/Dialect/LLVMIR/
Drocdl.mlir11 // CHECK: rocdl.workgroup.id.x : !llvm.i32
12 %3 = rocdl.workgroup.id.x : !llvm.i32
13 // CHECK: rocdl.workgroup.id.y : !llvm.i32
14 %4 = rocdl.workgroup.id.y : !llvm.i32
15 // CHECK: rocdl.workgroup.id.z : !llvm.i32
16 %5 = rocdl.workgroup.id.z : !llvm.i32
17 // CHECK: rocdl.workgroup.dim.x : !llvm.i32
18 %6 = rocdl.workgroup.dim.x : !llvm.i32
19 // CHECK: rocdl.workgroup.dim.y : !llvm.i32
20 %7 = rocdl.workgroup.dim.y : !llvm.i32
[all …]
/external/llvm-project/llvm/test/Linker/
Dsyncscopes.ll5 ; CHECK: fence syncscope("workgroup") seq_cst
11 ; CHECK: fence syncscope("workgroup") seq_cst

123456