1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUTargetStreamer.h"
15 #include "SIDefines.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCELFStreamer.h"
20 #include "llvm/MC/MCObjectFileInfo.h"
21 #include "llvm/MC/MCSectionELF.h"
22 #include "llvm/Support/ELF.h"
23 #include "llvm/Support/FormattedStream.h"
24
25 using namespace llvm;
26
AMDGPUTargetStreamer(MCStreamer & S)27 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
28 : MCTargetStreamer(S) { }
29
30 //===----------------------------------------------------------------------===//
31 // AMDGPUTargetAsmStreamer
32 //===----------------------------------------------------------------------===//
33
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)34 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
35 formatted_raw_ostream &OS)
36 : AMDGPUTargetStreamer(S), OS(OS) { }
37
38 void
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)39 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
40 uint32_t Minor) {
41 OS << "\t.hsa_code_object_version " <<
42 Twine(Major) << "," << Twine(Minor) << '\n';
43 }
44
45 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)46 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
47 uint32_t Minor,
48 uint32_t Stepping,
49 StringRef VendorName,
50 StringRef ArchName) {
51 OS << "\t.hsa_code_object_isa " <<
52 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
53 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
54
55 }
56
57 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)58 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
59 uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
60 bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
61 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
62 bool EnableSGPRDispatchPtr = (Header.code_properties &
63 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
64 bool EnableSGPRQueuePtr = (Header.code_properties &
65 AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
66 bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
67 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
68 bool EnableSGPRDispatchID = (Header.code_properties &
69 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
70 bool EnableSGPRFlatScratchInit = (Header.code_properties &
71 AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
72 bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
73 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
74 bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
75 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
76 bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
77 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
78 bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
79 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
80 bool EnableOrderedAppendGDS = (Header.code_properties &
81 AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
82 uint32_t PrivateElementSize = (Header.code_properties &
83 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
84 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
85 bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
86 bool IsDynamicCallstack = (Header.code_properties &
87 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
88 bool IsDebugEnabled = (Header.code_properties &
89 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
90 bool IsXNackEnabled = (Header.code_properties &
91 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
92
93 OS << "\t.amd_kernel_code_t\n" <<
94 "\t\tkernel_code_version_major = " <<
95 Header.amd_kernel_code_version_major << '\n' <<
96 "\t\tkernel_code_version_minor = " <<
97 Header.amd_kernel_code_version_minor << '\n' <<
98 "\t\tmachine_kind = " <<
99 Header.amd_machine_kind << '\n' <<
100 "\t\tmachine_version_major = " <<
101 Header.amd_machine_version_major << '\n' <<
102 "\t\tmachine_version_minor = " <<
103 Header.amd_machine_version_minor << '\n' <<
104 "\t\tmachine_version_stepping = " <<
105 Header.amd_machine_version_stepping << '\n' <<
106 "\t\tkernel_code_entry_byte_offset = " <<
107 Header.kernel_code_entry_byte_offset << '\n' <<
108 "\t\tkernel_code_prefetch_byte_size = " <<
109 Header.kernel_code_prefetch_byte_size << '\n' <<
110 "\t\tmax_scratch_backing_memory_byte_size = " <<
111 Header.max_scratch_backing_memory_byte_size << '\n' <<
112 "\t\tcompute_pgm_rsrc1_vgprs = " <<
113 G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
114 "\t\tcompute_pgm_rsrc1_sgprs = " <<
115 G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
116 "\t\tcompute_pgm_rsrc1_priority = " <<
117 G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
118 "\t\tcompute_pgm_rsrc1_float_mode = " <<
119 G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
120 "\t\tcompute_pgm_rsrc1_priv = " <<
121 G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
122 "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
123 G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
124 "\t\tcompute_pgm_rsrc1_debug_mode = " <<
125 G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
126 "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
127 G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
128 "\t\tcompute_pgm_rsrc2_scratch_en = " <<
129 G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
130 "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
131 G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
132 "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
133 G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
134 "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
135 G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
136 "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
137 G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
138 "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
139 G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
140 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
141 G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
142 "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
143 G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
144 "\t\tcompute_pgm_rsrc2_lds_size = " <<
145 G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
146 "\t\tcompute_pgm_rsrc2_excp_en = " <<
147 G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
148
149 "\t\tenable_sgpr_private_segment_buffer = " <<
150 EnableSGPRPrivateSegmentBuffer << '\n' <<
151 "\t\tenable_sgpr_dispatch_ptr = " <<
152 EnableSGPRDispatchPtr << '\n' <<
153 "\t\tenable_sgpr_queue_ptr = " <<
154 EnableSGPRQueuePtr << '\n' <<
155 "\t\tenable_sgpr_kernarg_segment_ptr = " <<
156 EnableSGPRKernargSegmentPtr << '\n' <<
157 "\t\tenable_sgpr_dispatch_id = " <<
158 EnableSGPRDispatchID << '\n' <<
159 "\t\tenable_sgpr_flat_scratch_init = " <<
160 EnableSGPRFlatScratchInit << '\n' <<
161 "\t\tenable_sgpr_private_segment_size = " <<
162 EnableSGPRPrivateSegmentSize << '\n' <<
163 "\t\tenable_sgpr_grid_workgroup_count_x = " <<
164 EnableSGPRGridWorkgroupCountX << '\n' <<
165 "\t\tenable_sgpr_grid_workgroup_count_y = " <<
166 EnableSGPRGridWorkgroupCountY << '\n' <<
167 "\t\tenable_sgpr_grid_workgroup_count_z = " <<
168 EnableSGPRGridWorkgroupCountZ << '\n' <<
169 "\t\tenable_ordered_append_gds = " <<
170 EnableOrderedAppendGDS << '\n' <<
171 "\t\tprivate_element_size = " <<
172 PrivateElementSize << '\n' <<
173 "\t\tis_ptr64 = " <<
174 IsPtr64 << '\n' <<
175 "\t\tis_dynamic_callstack = " <<
176 IsDynamicCallstack << '\n' <<
177 "\t\tis_debug_enabled = " <<
178 IsDebugEnabled << '\n' <<
179 "\t\tis_xnack_enabled = " <<
180 IsXNackEnabled << '\n' <<
181 "\t\tworkitem_private_segment_byte_size = " <<
182 Header.workitem_private_segment_byte_size << '\n' <<
183 "\t\tworkgroup_group_segment_byte_size = " <<
184 Header.workgroup_group_segment_byte_size << '\n' <<
185 "\t\tgds_segment_byte_size = " <<
186 Header.gds_segment_byte_size << '\n' <<
187 "\t\tkernarg_segment_byte_size = " <<
188 Header.kernarg_segment_byte_size << '\n' <<
189 "\t\tworkgroup_fbarrier_count = " <<
190 Header.workgroup_fbarrier_count << '\n' <<
191 "\t\twavefront_sgpr_count = " <<
192 Header.wavefront_sgpr_count << '\n' <<
193 "\t\tworkitem_vgpr_count = " <<
194 Header.workitem_vgpr_count << '\n' <<
195 "\t\treserved_vgpr_first = " <<
196 Header.reserved_vgpr_first << '\n' <<
197 "\t\treserved_vgpr_count = " <<
198 Header.reserved_vgpr_count << '\n' <<
199 "\t\treserved_sgpr_first = " <<
200 Header.reserved_sgpr_first << '\n' <<
201 "\t\treserved_sgpr_count = " <<
202 Header.reserved_sgpr_count << '\n' <<
203 "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
204 Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
205 "\t\tdebug_private_segment_buffer_sgpr = " <<
206 Header.debug_private_segment_buffer_sgpr << '\n' <<
207 "\t\tkernarg_segment_alignment = " <<
208 (uint32_t)Header.kernarg_segment_alignment << '\n' <<
209 "\t\tgroup_segment_alignment = " <<
210 (uint32_t)Header.group_segment_alignment << '\n' <<
211 "\t\tprivate_segment_alignment = " <<
212 (uint32_t)Header.private_segment_alignment << '\n' <<
213 "\t\twavefront_size = " <<
214 (uint32_t)Header.wavefront_size << '\n' <<
215 "\t\tcall_convention = " <<
216 Header.call_convention << '\n' <<
217 "\t\truntime_loader_kernel_symbol = " <<
218 Header.runtime_loader_kernel_symbol << '\n' <<
219 // TODO: control_directives
220 "\t.end_amd_kernel_code_t\n";
221
222 }
223
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)224 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
225 unsigned Type) {
226 switch (Type) {
227 default: llvm_unreachable("Invalid AMDGPU symbol type");
228 case ELF::STT_AMDGPU_HSA_KERNEL:
229 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
230 break;
231 }
232 }
233
EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName)234 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
235 StringRef GlobalName) {
236 OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
237 }
238
EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName)239 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
240 StringRef GlobalName) {
241 OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
242 }
243
244 //===----------------------------------------------------------------------===//
245 // AMDGPUTargetELFStreamer
246 //===----------------------------------------------------------------------===//
247
AMDGPUTargetELFStreamer(MCStreamer & S)248 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
249 : AMDGPUTargetStreamer(S), Streamer(S) { }
250
getStreamer()251 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
252 return static_cast<MCELFStreamer &>(Streamer);
253 }
254
255 void
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)256 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
257 uint32_t Minor) {
258 MCStreamer &OS = getStreamer();
259 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
260
261 unsigned NameSZ = 4;
262
263 OS.PushSection();
264 OS.SwitchSection(Note);
265 OS.EmitIntValue(NameSZ, 4); // namesz
266 OS.EmitIntValue(8, 4); // descz
267 OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
268 OS.EmitBytes(StringRef("AMD", NameSZ)); // name
269 OS.EmitIntValue(Major, 4); // desc
270 OS.EmitIntValue(Minor, 4);
271 OS.EmitValueToAlignment(4);
272 OS.PopSection();
273 }
274
275 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)276 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
277 uint32_t Minor,
278 uint32_t Stepping,
279 StringRef VendorName,
280 StringRef ArchName) {
281 MCStreamer &OS = getStreamer();
282 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
283
284 unsigned NameSZ = 4;
285 uint16_t VendorNameSize = VendorName.size() + 1;
286 uint16_t ArchNameSize = ArchName.size() + 1;
287 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
288 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
289 VendorNameSize + ArchNameSize;
290
291 OS.PushSection();
292 OS.SwitchSection(Note);
293 OS.EmitIntValue(NameSZ, 4); // namesz
294 OS.EmitIntValue(DescSZ, 4); // descsz
295 OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type
296 OS.EmitBytes(StringRef("AMD", 4)); // name
297 OS.EmitIntValue(VendorNameSize, 2); // desc
298 OS.EmitIntValue(ArchNameSize, 2);
299 OS.EmitIntValue(Major, 4);
300 OS.EmitIntValue(Minor, 4);
301 OS.EmitIntValue(Stepping, 4);
302 OS.EmitBytes(VendorName);
303 OS.EmitIntValue(0, 1); // NULL terminate VendorName
304 OS.EmitBytes(ArchName);
305 OS.EmitIntValue(0, 1); // NULL terminte ArchName
306 OS.EmitValueToAlignment(4);
307 OS.PopSection();
308 }
309
310 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)311 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
312
313 MCStreamer &OS = getStreamer();
314 OS.PushSection();
315 // The MCObjectFileInfo that is available to the assembler is a generic
316 // implementation and not AMDGPUHSATargetObjectFile, so we can't use
317 // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
318 OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
319 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
320 OS.PopSection();
321 }
322
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)323 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
324 unsigned Type) {
325 MCSymbolELF *Symbol = cast<MCSymbolELF>(
326 getStreamer().getContext().getOrCreateSymbol(SymbolName));
327 Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
328 }
329
EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName)330 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
331 StringRef GlobalName) {
332
333 MCSymbolELF *Symbol = cast<MCSymbolELF>(
334 getStreamer().getContext().getOrCreateSymbol(GlobalName));
335 Symbol->setType(ELF::STT_OBJECT);
336 Symbol->setBinding(ELF::STB_LOCAL);
337 }
338
EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName)339 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
340 StringRef GlobalName) {
341
342 MCSymbolELF *Symbol = cast<MCSymbolELF>(
343 getStreamer().getContext().getOrCreateSymbol(GlobalName));
344 Symbol->setType(ELF::STT_OBJECT);
345 Symbol->setBinding(ELF::STB_GLOBAL);
346 }
347