1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <algorithm>
24 #include <unistd.h>
25 #include "core/device.hpp"
26 #include "core/platform.hpp"
27 #include "pipe/p_screen.h"
28 #include "pipe/p_state.h"
29 #include "util/bitscan.h"
30 #include "util/u_debug.h"
31 #include "spirv/invocation.hpp"
32 #include "nir/invocation.hpp"
33 #include <fstream>
34 
35 using namespace clover;
36 
37 namespace {
38    template<typename T>
39    std::vector<T>
get_compute_param(pipe_screen * pipe,pipe_shader_ir ir_format,pipe_compute_cap cap)40    get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
41                      pipe_compute_cap cap) {
42       int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
43       std::vector<T> v(sz / sizeof(T));
44 
45       pipe->get_compute_param(pipe, ir_format, cap, &v.front());
46       return v;
47    }
48 }
49 
device(clover::platform & platform,pipe_loader_device * ldev)50 device::device(clover::platform &platform, pipe_loader_device *ldev) :
51    platform(platform), clc_cache(NULL), ldev(ldev) {
52    pipe = pipe_loader_create_screen(ldev);
53    if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
54       if (supports_ir(PIPE_SHADER_IR_NATIVE))
55          return;
56 #ifdef HAVE_CLOVER_SPIRV
57       if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED)) {
58          nir::check_for_libclc(*this);
59          clc_cache = nir::create_clc_disk_cache();
60          clc_nir = lazy<std::shared_ptr<nir_shader>>([&] () { std::string log; return std::shared_ptr<nir_shader>(nir::load_libclc_nir(*this, log), ralloc_free); });
61          return;
62       }
63 #endif
64    }
65    if (pipe)
66       pipe->destroy(pipe);
67    throw error(CL_INVALID_DEVICE);
68 }
69 
~device()70 device::~device() {
71    if (clc_cache)
72       disk_cache_destroy(clc_cache);
73    if (pipe)
74       pipe->destroy(pipe);
75    if (ldev)
76       pipe_loader_release(&ldev, 1);
77 }
78 
79 bool
operator ==(const device & dev) const80 device::operator==(const device &dev) const {
81    return this == &dev;
82 }
83 
84 cl_device_type
type() const85 device::type() const {
86    switch (ldev->type) {
87    case PIPE_LOADER_DEVICE_SOFTWARE:
88       return CL_DEVICE_TYPE_CPU;
89    case PIPE_LOADER_DEVICE_PCI:
90    case PIPE_LOADER_DEVICE_PLATFORM:
91       return CL_DEVICE_TYPE_GPU;
92    default:
93       unreachable("Unknown device type.");
94    }
95 }
96 
97 cl_uint
vendor_id() const98 device::vendor_id() const {
99    switch (ldev->type) {
100    case PIPE_LOADER_DEVICE_SOFTWARE:
101    case PIPE_LOADER_DEVICE_PLATFORM:
102       return 0;
103    case PIPE_LOADER_DEVICE_PCI:
104       return ldev->u.pci.vendor_id;
105    default:
106       unreachable("Unknown device type.");
107    }
108 }
109 
110 size_t
max_images_read() const111 device::max_images_read() const {
112    return PIPE_MAX_SHADER_SAMPLER_VIEWS;
113 }
114 
115 size_t
max_images_write() const116 device::max_images_write() const {
117    return PIPE_MAX_SHADER_IMAGES;
118 }
119 
120 size_t
max_image_buffer_size() const121 device::max_image_buffer_size() const {
122    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
123 }
124 
125 cl_uint
max_image_levels_2d() const126 device::max_image_levels_2d() const {
127    return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
128 }
129 
130 cl_uint
max_image_levels_3d() const131 device::max_image_levels_3d() const {
132    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
133 }
134 
135 size_t
max_image_array_number() const136 device::max_image_array_number() const {
137    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
138 }
139 
140 cl_uint
max_samplers() const141 device::max_samplers() const {
142    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
143                                  PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
144 }
145 
146 cl_ulong
max_mem_global() const147 device::max_mem_global() const {
148    return get_compute_param<uint64_t>(pipe, ir_format(),
149                                       PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
150 }
151 
152 cl_ulong
max_mem_local() const153 device::max_mem_local() const {
154    return get_compute_param<uint64_t>(pipe, ir_format(),
155                                       PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
156 }
157 
158 cl_ulong
max_mem_input() const159 device::max_mem_input() const {
160    return get_compute_param<uint64_t>(pipe, ir_format(),
161                                       PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
162 }
163 
164 cl_ulong
max_const_buffer_size() const165 device::max_const_buffer_size() const {
166    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
167                                  PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
168 }
169 
170 cl_uint
max_const_buffers() const171 device::max_const_buffers() const {
172    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
173                                  PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
174 }
175 
176 size_t
max_threads_per_block() const177 device::max_threads_per_block() const {
178    return get_compute_param<uint64_t>(
179       pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
180 }
181 
182 cl_ulong
max_mem_alloc_size() const183 device::max_mem_alloc_size() const {
184    return get_compute_param<uint64_t>(pipe, ir_format(),
185                                       PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
186 }
187 
188 cl_uint
max_clock_frequency() const189 device::max_clock_frequency() const {
190    return get_compute_param<uint32_t>(pipe, ir_format(),
191                                       PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
192 }
193 
194 cl_uint
max_compute_units() const195 device::max_compute_units() const {
196    return get_compute_param<uint32_t>(pipe, ir_format(),
197                                       PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
198 }
199 
200 bool
image_support() const201 device::image_support() const {
202    return get_compute_param<uint32_t>(pipe, ir_format(),
203                                       PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
204 }
205 
206 bool
has_doubles() const207 device::has_doubles() const {
208    return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
209 }
210 
211 bool
has_halves() const212 device::has_halves() const {
213    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
214                                  PIPE_SHADER_CAP_FP16);
215 }
216 
217 bool
has_int64_atomics() const218 device::has_int64_atomics() const {
219    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
220                                  PIPE_SHADER_CAP_INT64_ATOMICS);
221 }
222 
223 bool
has_unified_memory() const224 device::has_unified_memory() const {
225    return pipe->get_param(pipe, PIPE_CAP_UMA);
226 }
227 
228 size_t
mem_base_addr_align() const229 device::mem_base_addr_align() const {
230    return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
231 }
232 
233 cl_device_svm_capabilities
svm_support() const234 device::svm_support() const {
235    // Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
236    // interactions won't work according to spec as clover manages a GPU side
237    // copy of the host data.
238    //
239    // The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
240    // but the application and/or the kernel updates the memory via SVM and not
241    // the cl_mem buffer.
242    // We can't even do proper tracking on what memory might have been accessed
243    // as the host ptr to the buffer could be within a SVM region, where through
244    // the CL API there is no reliable way of knowing if a certain cl_mem buffer
245    // was accessed by a kernel or not and the runtime can't reliably know from
246    // which side the GPU buffer content needs to be updated.
247    //
248    // Another unsolvable scenario is a cl_mem object passed by cl_mem reference
249    // and SVM pointer into the same kernel at the same time.
250    if (allows_user_pointers() && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
251       // we can emulate all lower levels if we support fine grain system
252       return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
253              CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
254              CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
255    return 0;
256 }
257 
258 bool
allows_user_pointers() const259 device::allows_user_pointers() const {
260    return pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) ||
261           pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY);
262 }
263 
264 std::vector<size_t>
max_block_size() const265 device::max_block_size() const {
266    auto v = get_compute_param<uint64_t>(pipe, ir_format(),
267                                         PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
268    return { v.begin(), v.end() };
269 }
270 
271 cl_uint
subgroup_size() const272 device::subgroup_size() const {
273    return get_compute_param<uint32_t>(pipe, ir_format(),
274                                       PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
275 }
276 
277 cl_uint
address_bits() const278 device::address_bits() const {
279    return get_compute_param<uint32_t>(pipe, ir_format(),
280                                       PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
281 }
282 
283 std::string
device_name() const284 device::device_name() const {
285    return pipe->get_name(pipe);
286 }
287 
288 std::string
vendor_name() const289 device::vendor_name() const {
290    return pipe->get_device_vendor(pipe);
291 }
292 
293 enum pipe_shader_ir
ir_format() const294 device::ir_format() const {
295    if (supports_ir(PIPE_SHADER_IR_NATIVE))
296       return PIPE_SHADER_IR_NATIVE;
297 
298    assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
299    return PIPE_SHADER_IR_NIR_SERIALIZED;
300 }
301 
302 std::string
ir_target() const303 device::ir_target() const {
304    std::vector<char> target = get_compute_param<char>(
305       pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
306    return { target.data() };
307 }
308 
309 enum pipe_endian
endianness() const310 device::endianness() const {
311    return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
312 }
313 
314 std::string
device_version() const315 device::device_version() const {
316    static const std::string device_version =
317          debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
318    return device_version;
319 }
320 
321 std::string
device_clc_version() const322 device::device_clc_version() const {
323    static const std::string device_clc_version =
324          debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
325    return device_clc_version;
326 }
327 
328 bool
supports_ir(enum pipe_shader_ir ir) const329 device::supports_ir(enum pipe_shader_ir ir) const {
330    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
331                                  PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
332 }
333 
334 std::string
supported_extensions() const335 device::supported_extensions() const {
336    return
337       "cl_khr_byte_addressable_store"
338       " cl_khr_global_int32_base_atomics"
339       " cl_khr_global_int32_extended_atomics"
340       " cl_khr_local_int32_base_atomics"
341       " cl_khr_local_int32_extended_atomics"
342       + std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
343       + std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
344       + std::string(has_doubles() ? " cl_khr_fp64" : "")
345       + std::string(has_halves() ? " cl_khr_fp16" : "")
346       + std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
347 }
348 
349 const void *
get_compiler_options(enum pipe_shader_ir ir) const350 device::get_compiler_options(enum pipe_shader_ir ir) const {
351    return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
352 }
353