1 //
2 // Copyright 2019 Karol Herbst
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include "invocation.hpp"
24 
25 #include <tuple>
26 
27 #include "core/device.hpp"
28 #include "core/error.hpp"
29 #include "core/module.hpp"
30 #include "pipe/p_state.h"
31 #include "util/algorithm.hpp"
32 #include "util/functional.hpp"
33 
34 #include <compiler/glsl_types.h>
35 #include <compiler/nir/nir_builder.h>
36 #include <compiler/nir/nir_serialize.h>
37 #include <compiler/spirv/nir_spirv.h>
38 #include <util/u_math.h>
39 
40 using namespace clover;
41 
42 #ifdef HAVE_CLOVER_SPIRV
43 
44 // Refs and unrefs the glsl_type_singleton.
45 static class glsl_type_ref {
46 public:
glsl_type_ref()47    glsl_type_ref() {
48       glsl_type_singleton_init_or_ref();
49    }
50 
~glsl_type_ref()51    ~glsl_type_ref() {
52       glsl_type_singleton_decref();
53    }
54 } glsl_type_ref;
55 
56 static const nir_shader_compiler_options *
dev_get_nir_compiler_options(const device & dev)57 dev_get_nir_compiler_options(const device &dev)
58 {
59    const void *co = dev.get_compiler_options(PIPE_SHADER_IR_NIR);
60    return static_cast<const nir_shader_compiler_options*>(co);
61 }
62 
debug_function(void * private_data,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)63 static void debug_function(void *private_data,
64                    enum nir_spirv_debug_level level, size_t spirv_offset,
65                    const char *message)
66 {
67    assert(private_data);
68    auto r_log = reinterpret_cast<std::string *>(private_data);
69    *r_log += message;
70 }
71 
72 static void
clover_arg_size_align(const glsl_type * type,unsigned * size,unsigned * align)73 clover_arg_size_align(const glsl_type *type, unsigned *size, unsigned *align)
74 {
75    if (type == glsl_type::sampler_type) {
76       *size = 0;
77       *align = 1;
78    } else if (type->is_image()) {
79       *size = *align = sizeof(cl_mem);
80    } else {
81       *size = type->cl_size();
82       *align = type->cl_alignment();
83    }
84 }
85 
86 static bool
clover_nir_lower_images(nir_shader * shader)87 clover_nir_lower_images(nir_shader *shader)
88 {
89    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
90 
91    ASSERTED int last_loc = -1;
92    int num_rd_images = 0, num_wr_images = 0, num_samplers = 0;
93    nir_foreach_uniform_variable(var, shader) {
94       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
95          /* Assume they come in order */
96          assert(var->data.location > last_loc);
97          last_loc = var->data.location;
98       }
99 
100       /* TODO: Constant samplers */
101       if (var->type == glsl_bare_sampler_type()) {
102          var->data.driver_location = num_samplers++;
103       } else if (glsl_type_is_image(var->type)) {
104          if (var->data.access & ACCESS_NON_WRITEABLE)
105             var->data.driver_location = num_rd_images++;
106          else
107             var->data.driver_location = num_wr_images++;
108       } else {
109          /* CL shouldn't have any sampled images */
110          assert(!glsl_type_is_sampler(var->type));
111       }
112    }
113    shader->info.num_textures = num_rd_images;
114    shader->info.textures_used = (1 << num_rd_images) - 1;
115    shader->info.num_images = num_wr_images;
116 
117    nir_builder b;
118    nir_builder_init(&b, impl);
119 
120    bool progress = false;
121    nir_foreach_block_reverse(block, impl) {
122       nir_foreach_instr_reverse_safe(instr, block) {
123          switch (instr->type) {
124          case nir_instr_type_deref: {
125             nir_deref_instr *deref = nir_instr_as_deref(instr);
126             if (deref->deref_type != nir_deref_type_var)
127                break;
128 
129             if (!glsl_type_is_image(deref->type) &&
130                 !glsl_type_is_sampler(deref->type))
131                break;
132 
133             b.cursor = nir_instr_remove(&deref->instr);
134             nir_ssa_def *loc =
135                nir_imm_intN_t(&b, deref->var->data.driver_location,
136                                   deref->dest.ssa.bit_size);
137             nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(loc));
138             progress = true;
139             break;
140          }
141 
142          case nir_instr_type_tex: {
143             nir_tex_instr *tex = nir_instr_as_tex(instr);
144             unsigned count = 0;
145             for (unsigned i = 0; i < tex->num_srcs; i++) {
146                if (tex->src[i].src_type == nir_tex_src_texture_deref ||
147                    tex->src[i].src_type == nir_tex_src_sampler_deref) {
148                   nir_deref_instr *deref = nir_src_as_deref(tex->src[i].src);
149                   if (deref->deref_type == nir_deref_type_var) {
150                      /* In this case, we know the actual variable */
151                      if (tex->src[i].src_type == nir_tex_src_texture_deref)
152                         tex->texture_index = deref->var->data.driver_location;
153                      else
154                         tex->sampler_index = deref->var->data.driver_location;
155                      /* This source gets discarded */
156                      nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,
157                                            NIR_SRC_INIT);
158                      continue;
159                   } else {
160                      assert(tex->src[i].src.is_ssa);
161                      b.cursor = nir_before_instr(&tex->instr);
162                      /* Back-ends expect a 32-bit thing, not 64-bit */
163                      nir_ssa_def *offset = nir_u2u32(&b, tex->src[i].src.ssa);
164                      if (tex->src[i].src_type == nir_tex_src_texture_deref)
165                         tex->src[count].src_type = nir_tex_src_texture_offset;
166                      else
167                         tex->src[count].src_type = nir_tex_src_sampler_offset;
168                      nir_instr_rewrite_src(&tex->instr, &tex->src[count].src,
169                                            nir_src_for_ssa(offset));
170                   }
171                } else {
172                   /* If we've removed a source, move this one down */
173                   if (count != i) {
174                      assert(count < i);
175                      tex->src[count].src_type = tex->src[i].src_type;
176                      nir_instr_move_src(&tex->instr, &tex->src[count].src,
177                                         &tex->src[i].src);
178                   }
179                }
180                count++;
181             }
182             tex->num_srcs = count;
183             progress = true;
184             break;
185          }
186 
187          case nir_instr_type_intrinsic: {
188             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
189             switch (intrin->intrinsic) {
190             case nir_intrinsic_image_deref_load:
191             case nir_intrinsic_image_deref_store:
192             case nir_intrinsic_image_deref_atomic_add:
193             case nir_intrinsic_image_deref_atomic_imin:
194             case nir_intrinsic_image_deref_atomic_umin:
195             case nir_intrinsic_image_deref_atomic_imax:
196             case nir_intrinsic_image_deref_atomic_umax:
197             case nir_intrinsic_image_deref_atomic_and:
198             case nir_intrinsic_image_deref_atomic_or:
199             case nir_intrinsic_image_deref_atomic_xor:
200             case nir_intrinsic_image_deref_atomic_exchange:
201             case nir_intrinsic_image_deref_atomic_comp_swap:
202             case nir_intrinsic_image_deref_atomic_fadd:
203             case nir_intrinsic_image_deref_atomic_inc_wrap:
204             case nir_intrinsic_image_deref_atomic_dec_wrap:
205             case nir_intrinsic_image_deref_size:
206             case nir_intrinsic_image_deref_samples: {
207                assert(intrin->src[0].is_ssa);
208                b.cursor = nir_before_instr(&intrin->instr);
209                /* Back-ends expect a 32-bit thing, not 64-bit */
210                nir_ssa_def *offset = nir_u2u32(&b, intrin->src[0].ssa);
211                nir_rewrite_image_intrinsic(intrin, offset, false);
212                progress = true;
213                break;
214             }
215 
216             default:
217                break;
218             }
219             break;
220          }
221 
222          default:
223             break;
224          }
225       }
226    }
227 
228    if (progress) {
229       nir_metadata_preserve(impl, nir_metadata_block_index |
230                                   nir_metadata_dominance);
231    } else {
232       nir_metadata_preserve(impl, nir_metadata_all);
233    }
234 
235    return progress;
236 }
237 
238 struct clover_lower_nir_state {
239    std::vector<module::argument> &args;
240    uint32_t global_dims;
241    nir_variable *constant_var;
242    nir_variable *offset_vars[3];
243 };
244 
245 static bool
clover_lower_nir_filter(const nir_instr * instr,const void *)246 clover_lower_nir_filter(const nir_instr *instr, const void *)
247 {
248    return instr->type == nir_instr_type_intrinsic;
249 }
250 
251 static nir_ssa_def *
clover_lower_nir_instr(nir_builder * b,nir_instr * instr,void * _state)252 clover_lower_nir_instr(nir_builder *b, nir_instr *instr, void *_state)
253 {
254    clover_lower_nir_state *state = reinterpret_cast<clover_lower_nir_state*>(_state);
255    nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
256 
257    switch (intrinsic->intrinsic) {
258    case nir_intrinsic_load_base_global_invocation_id: {
259       nir_ssa_def *loads[3];
260 
261       /* create variables if we didn't do so alrady */
262       if (!state->offset_vars[0]) {
263          /* TODO: fix for 64 bit */
264          /* Even though we only place one scalar argument, clover will bind up to
265           * three 32 bit values
266          */
267          unsigned location = state->args.size();
268          state->args.emplace_back(module::argument::scalar, 4, 4, 4,
269                                   module::argument::zero_ext,
270                                   module::argument::grid_offset);
271 
272          const glsl_type *type = glsl_uint_type();
273          for (uint32_t i = 0; i < 3; i++) {
274             state->offset_vars[i] =
275                nir_variable_create(b->shader, nir_var_uniform, type,
276                                    "global_invocation_id_offsets");
277             state->offset_vars[i]->data.location = location + i;
278          }
279       }
280 
281       for (int i = 0; i < 3; i++) {
282          nir_variable *var = state->offset_vars[i];
283          loads[i] = var ? nir_load_var(b, var) : nir_imm_int(b, 0);
284       }
285 
286       return nir_u2u(b, nir_vec(b, loads, state->global_dims),
287                      nir_dest_bit_size(intrinsic->dest));
288    }
289    case nir_intrinsic_load_constant_base_ptr: {
290       return nir_load_var(b, state->constant_var);
291    }
292 
293    default:
294       return NULL;
295    }
296 }
297 
298 static bool
clover_lower_nir(nir_shader * nir,std::vector<module::argument> & args,uint32_t dims,uint32_t pointer_bit_size)299 clover_lower_nir(nir_shader *nir, std::vector<module::argument> &args,
300                  uint32_t dims, uint32_t pointer_bit_size)
301 {
302    nir_variable *constant_var = NULL;
303    if (nir->constant_data_size) {
304       const glsl_type *type = pointer_bit_size == 64 ? glsl_uint64_t_type() : glsl_uint_type();
305 
306       constant_var = nir_variable_create(nir, nir_var_uniform, type,
307                                          "constant_buffer_addr");
308       constant_var->data.location = args.size();
309 
310       args.emplace_back(module::argument::global,
311                         pointer_bit_size / 8, pointer_bit_size / 8, pointer_bit_size / 8,
312                         module::argument::zero_ext,
313                         module::argument::constant_buffer);
314    }
315 
316    clover_lower_nir_state state = { args, dims, constant_var };
317    return nir_shader_lower_instructions(nir,
318       clover_lower_nir_filter, clover_lower_nir_instr, &state);
319 }
320 
321 static spirv_to_nir_options
create_spirv_options(const device & dev,std::string & r_log)322 create_spirv_options(const device &dev, std::string &r_log)
323 {
324    struct spirv_to_nir_options spirv_options = {};
325    spirv_options.environment = NIR_SPIRV_OPENCL;
326    if (dev.address_bits() == 32u) {
327       spirv_options.shared_addr_format = nir_address_format_32bit_offset;
328       spirv_options.global_addr_format = nir_address_format_32bit_global;
329       spirv_options.temp_addr_format = nir_address_format_32bit_offset;
330       spirv_options.constant_addr_format = nir_address_format_32bit_global;
331    } else {
332       spirv_options.shared_addr_format = nir_address_format_32bit_offset_as_64bit;
333       spirv_options.global_addr_format = nir_address_format_64bit_global;
334       spirv_options.temp_addr_format = nir_address_format_32bit_offset_as_64bit;
335       spirv_options.constant_addr_format = nir_address_format_64bit_global;
336    }
337    spirv_options.caps.address = true;
338    spirv_options.caps.float64 = true;
339    spirv_options.caps.int8 = true;
340    spirv_options.caps.int16 = true;
341    spirv_options.caps.int64 = true;
342    spirv_options.caps.kernel = true;
343    spirv_options.caps.kernel_image = dev.image_support();
344    spirv_options.caps.int64_atomics = dev.has_int64_atomics();
345    spirv_options.debug.func = &debug_function;
346    spirv_options.debug.private_data = &r_log;
347    return spirv_options;
348 }
349 
create_clc_disk_cache(void)350 struct disk_cache *clover::nir::create_clc_disk_cache(void)
351 {
352    struct mesa_sha1 ctx;
353    unsigned char sha1[20];
354    char cache_id[20 * 2 + 1];
355    _mesa_sha1_init(&ctx);
356 
357    if (!disk_cache_get_function_identifier((void *)clover::nir::create_clc_disk_cache, &ctx))
358       return NULL;
359 
360    _mesa_sha1_final(&ctx, sha1);
361 
362    disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
363    return disk_cache_create("clover-clc", cache_id, 0);
364 }
365 
check_for_libclc(const device & dev)366 void clover::nir::check_for_libclc(const device &dev)
367 {
368    if (!nir_can_find_libclc(dev.address_bits()))
369       throw error(CL_COMPILER_NOT_AVAILABLE);
370 }
371 
load_libclc_nir(const device & dev,std::string & r_log)372 nir_shader *clover::nir::load_libclc_nir(const device &dev, std::string &r_log)
373 {
374    spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
375    auto *compiler_options = dev_get_nir_compiler_options(dev);
376 
377    return nir_load_libclc_shader(dev.address_bits(), dev.clc_cache,
378 				 &spirv_options, compiler_options);
379 }
380 
381 module clover::nir::spirv_to_nir(const module &mod, const device &dev,
382                                  std::string &r_log)
383 {
384    spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
385    std::shared_ptr<nir_shader> nir = dev.clc_nir;
386    spirv_options.clc_shader = nir.get();
387 
388    module m;
389    // We only insert one section.
390    assert(mod.secs.size() == 1);
391    auto &section = mod.secs[0];
392 
393    module::resource_id section_id = 0;
394    for (const auto &sym : mod.syms) {
395       assert(sym.section == 0);
396 
397       const auto *binary =
398          reinterpret_cast<const pipe_binary_program_header *>(section.data.data());
399       const uint32_t *data = reinterpret_cast<const uint32_t *>(binary->blob);
400       const size_t num_words = binary->num_bytes / 4;
401       const char *name = sym.name.c_str();
402       auto *compiler_options = dev_get_nir_compiler_options(dev);
403 
404       nir_shader *nir = spirv_to_nir(data, num_words, nullptr, 0,
405                                      MESA_SHADER_KERNEL, name,
406                                      &spirv_options, compiler_options);
407       if (!nir) {
408          r_log += "Translation from SPIR-V to NIR for kernel \"" + sym.name +
409                   "\" failed.\n";
410          throw build_error();
411       }
412 
413       nir->info.cs.local_size_variable = sym.reqd_work_group_size[0] == 0;
414       nir->info.cs.local_size[0] = sym.reqd_work_group_size[0];
415       nir->info.cs.local_size[1] = sym.reqd_work_group_size[1];
416       nir->info.cs.local_size[2] = sym.reqd_work_group_size[2];
417       nir_validate_shader(nir, "clover");
418 
419       // Inline all functions first.
420       // according to the comment on nir_inline_functions
421       NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
422       NIR_PASS_V(nir, nir_lower_returns);
423       NIR_PASS_V(nir, nir_lower_libclc, spirv_options.clc_shader);
424 
425       NIR_PASS_V(nir, nir_inline_functions);
426       NIR_PASS_V(nir, nir_copy_prop);
427       NIR_PASS_V(nir, nir_opt_deref);
428 
429       // Pick off the single entrypoint that we want.
430       foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
431          if (!func->is_entrypoint)
432             exec_node_remove(&func->node);
433       }
434       assert(exec_list_length(&nir->functions) == 1);
435 
436       nir_validate_shader(nir, "clover after function inlining");
437 
438       NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
439 
440       // copy propagate to prepare for lower_explicit_io
441       NIR_PASS_V(nir, nir_split_var_copies);
442       NIR_PASS_V(nir, nir_opt_copy_prop_vars);
443       NIR_PASS_V(nir, nir_lower_var_copies);
444       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
445       NIR_PASS_V(nir, nir_opt_dce);
446 
447       NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
448 
449       NIR_PASS_V(nir, nir_lower_system_values);
450       nir_lower_compute_system_values_options sysval_options = { 0 };
451       sysval_options.has_base_global_invocation_id = true;
452       NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
453 
454       // constant fold before lowering mem constants
455       NIR_PASS_V(nir, nir_opt_constant_folding);
456 
457       NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_constant, NULL);
458       NIR_PASS_V(nir, nir_lower_mem_constant_vars,
459                  glsl_get_cl_type_size_align);
460       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
461                  spirv_options.constant_addr_format);
462 
463       auto args = sym.args;
464       NIR_PASS_V(nir, clover_lower_nir, args, dev.max_block_size().size(),
465                  dev.address_bits());
466 
467       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
468                  nir_var_uniform, clover_arg_size_align);
469       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
470                  nir_var_mem_shared | nir_var_mem_global |
471                  nir_var_function_temp,
472                  glsl_get_cl_type_size_align);
473 
474       NIR_PASS_V(nir, nir_opt_deref);
475       NIR_PASS_V(nir, nir_lower_cl_images_to_tex);
476       NIR_PASS_V(nir, clover_nir_lower_images);
477       NIR_PASS_V(nir, nir_lower_memcpy);
478 
479       /* use offsets for kernel inputs (uniform) */
480       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
481                  nir->info.cs.ptr_size == 64 ?
482                  nir_address_format_32bit_offset_as_64bit :
483                  nir_address_format_32bit_offset);
484 
485       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
486                  spirv_options.constant_addr_format);
487       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared,
488                  spirv_options.shared_addr_format);
489 
490       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
491                  spirv_options.temp_addr_format);
492 
493       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
494                  spirv_options.global_addr_format);
495 
496       NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
497 
498       if (compiler_options->lower_int64_options)
499          NIR_PASS_V(nir, nir_lower_int64);
500 
501       NIR_PASS_V(nir, nir_opt_dce);
502 
503       if (nir->constant_data_size) {
504          const char *ptr = reinterpret_cast<const char *>(nir->constant_data);
505          const module::section constants {
506             section_id,
507             module::section::data_constant,
508             nir->constant_data_size,
509             { ptr, ptr + nir->constant_data_size }
510          };
511          nir->constant_data = NULL;
512          nir->constant_data_size = 0;
513          m.secs.push_back(constants);
514       }
515 
516       struct blob blob;
517       blob_init(&blob);
518       nir_serialize(&blob, nir, false);
519 
520       ralloc_free(nir);
521 
522       const pipe_binary_program_header header { uint32_t(blob.size) };
523       module::section text { section_id, module::section::text_executable, header.num_bytes, {} };
524       text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
525                        reinterpret_cast<const char *>(&header) + sizeof(header));
526       text.data.insert(text.data.end(), blob.data, blob.data + blob.size);
527 
528       free(blob.data);
529 
530       m.syms.emplace_back(sym.name, std::string(),
531                           sym.reqd_work_group_size, section_id, 0, args);
532       m.secs.push_back(text);
533       section_id++;
534    }
535    return m;
536 }
537 #else
538 module clover::nir::spirv_to_nir(const module &mod, const device &dev, std::string &r_log)
539 {
540    r_log += "SPIR-V support in clover is not enabled.\n";
541    throw error(CL_LINKER_NOT_AVAILABLE);
542 }
543 #endif
544