1 /*
2  * Copyright © 2017 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28 
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31 
32 typedef struct {
33    size_t blob_offset;
34    nir_ssa_def *src;
35    nir_block *block;
36 } write_phi_fixup;
37 
38 typedef struct {
39    const nir_shader *nir;
40 
41    struct blob *blob;
42 
43    /* maps pointer to index */
44    struct hash_table *remap_table;
45 
46    /* the next index to assign to a NIR in-memory object */
47    uint32_t next_idx;
48 
49    /* Array of write_phi_fixup structs representing phi sources that need to
50     * be resolved in the second pass.
51     */
52    struct util_dynarray phi_fixups;
53 
54    /* The last serialized type. */
55    const struct glsl_type *last_type;
56    const struct glsl_type *last_interface_type;
57    struct nir_variable_data last_var_data;
58 
59    /* For skipping equal ALU headers (typical after scalarization). */
60    nir_instr_type last_instr_type;
61    uintptr_t last_alu_header_offset;
62 
63    /* Don't write optional data such as variable names. */
64    bool strip;
65 } write_ctx;
66 
67 typedef struct {
68    nir_shader *nir;
69 
70    struct blob_reader *blob;
71 
72    /* the next index to assign to a NIR in-memory object */
73    uint32_t next_idx;
74 
75    /* The length of the index -> object table */
76    uint32_t idx_table_len;
77 
78    /* map from index to deserialized pointer */
79    void **idx_table;
80 
81    /* List of phi sources. */
82    struct list_head phi_srcs;
83 
84    /* The last deserialized type. */
85    const struct glsl_type *last_type;
86    const struct glsl_type *last_interface_type;
87    struct nir_variable_data last_var_data;
88 } read_ctx;
89 
90 static void
write_add_object(write_ctx * ctx,const void * obj)91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93    uint32_t index = ctx->next_idx++;
94    assert(index != MAX_OBJECT_IDS);
95    _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97 
98 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101    struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102    assert(entry);
103    return (uint32_t)(uintptr_t) entry->data;
104 }
105 
106 static void
read_add_object(read_ctx * ctx,void * obj)107 read_add_object(read_ctx *ctx, void *obj)
108 {
109    assert(ctx->next_idx < ctx->idx_table_len);
110    ctx->idx_table[ctx->next_idx++] = obj;
111 }
112 
113 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116    assert(idx < ctx->idx_table_len);
117    return ctx->idx_table[idx];
118 }
119 
120 static void *
read_object(read_ctx * ctx)121 read_object(read_ctx *ctx)
122 {
123    return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125 
126 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129    /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130    assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131    if (bit_size)
132       return util_logbase2(bit_size) + 1;
133    return 0;
134 }
135 
136 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139    if (bit_size)
140       return 1 << (bit_size - 1);
141    return 0;
142 }
143 
144 #define NUM_COMPONENTS_IS_SEPARATE_7   7
145 
146 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149    if (num_components <= 4)
150       return num_components;
151    if (num_components == 8)
152       return 5;
153    if (num_components == 16)
154       return 6;
155 
156    /* special value indicating that num_components is in the next uint32 */
157    return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159 
160 static uint8_t
decode_num_components_in_3bits(uint8_t value)161 decode_num_components_in_3bits(uint8_t value)
162 {
163    if (value <= 4)
164       return value;
165    if (value == 5)
166       return 8;
167    if (value == 6)
168       return 16;
169 
170    unreachable("invalid num_components encoding");
171    return 0;
172 }
173 
174 static void
write_constant(write_ctx * ctx,const nir_constant * c)175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177    blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178    blob_write_uint32(ctx->blob, c->num_elements);
179    for (unsigned i = 0; i < c->num_elements; i++)
180       write_constant(ctx, c->elements[i]);
181 }
182 
183 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186    nir_constant *c = ralloc(nvar, nir_constant);
187 
188    blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189    c->num_elements = blob_read_uint32(ctx->blob);
190    c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191    for (unsigned i = 0; i < c->num_elements; i++)
192       c->elements[i] = read_constant(ctx, nvar);
193 
194    return c;
195 }
196 
197 enum var_data_encoding {
198    var_encode_full,
199    var_encode_shader_temp,
200    var_encode_function_temp,
201    var_encode_location_diff,
202 };
203 
204 union packed_var {
205    uint32_t u32;
206    struct {
207       unsigned has_name:1;
208       unsigned has_constant_initializer:1;
209       unsigned has_pointer_initializer:1;
210       unsigned has_interface_type:1;
211       unsigned num_state_slots:7;
212       unsigned data_encoding:2;
213       unsigned type_same_as_last:1;
214       unsigned interface_type_same_as_last:1;
215       unsigned _pad:1;
216       unsigned num_members:16;
217    } u;
218 };
219 
220 union packed_var_data_diff {
221    uint32_t u32;
222    struct {
223       int location:13;
224       int location_frac:3;
225       int driver_location:16;
226    } u;
227 };
228 
229 static void
write_variable(write_ctx * ctx,const nir_variable * var)230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232    write_add_object(ctx, var);
233 
234    assert(var->num_state_slots < (1 << 7));
235 
236    STATIC_ASSERT(sizeof(union packed_var) == 4);
237    union packed_var flags;
238    flags.u32 = 0;
239 
240    flags.u.has_name = !ctx->strip && var->name;
241    flags.u.has_constant_initializer = !!(var->constant_initializer);
242    flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243    flags.u.has_interface_type = !!(var->interface_type);
244    flags.u.type_same_as_last = var->type == ctx->last_type;
245    flags.u.interface_type_same_as_last =
246       var->interface_type && var->interface_type == ctx->last_interface_type;
247    flags.u.num_state_slots = var->num_state_slots;
248    flags.u.num_members = var->num_members;
249 
250    struct nir_variable_data data = var->data;
251 
252    /* When stripping, we expect that the location is no longer needed,
253     * which is typically after shaders are linked.
254     */
255    if (ctx->strip &&
256        data.mode != nir_var_system_value &&
257        data.mode != nir_var_shader_in &&
258        data.mode != nir_var_shader_out)
259       data.location = 0;
260 
261    /* Temporary variables don't serialize var->data. */
262    if (data.mode == nir_var_shader_temp)
263       flags.u.data_encoding = var_encode_shader_temp;
264    else if (data.mode == nir_var_function_temp)
265       flags.u.data_encoding = var_encode_function_temp;
266    else {
267       struct nir_variable_data tmp = data;
268 
269       tmp.location = ctx->last_var_data.location;
270       tmp.location_frac = ctx->last_var_data.location_frac;
271       tmp.driver_location = ctx->last_var_data.driver_location;
272 
273       /* See if we can encode only the difference in locations from the last
274        * variable.
275        */
276       if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277           abs((int)data.location -
278               (int)ctx->last_var_data.location) < (1 << 12) &&
279           abs((int)data.driver_location -
280               (int)ctx->last_var_data.driver_location) < (1 << 15))
281          flags.u.data_encoding = var_encode_location_diff;
282       else
283          flags.u.data_encoding = var_encode_full;
284    }
285 
286    blob_write_uint32(ctx->blob, flags.u32);
287 
288    if (!flags.u.type_same_as_last) {
289       encode_type_to_blob(ctx->blob, var->type);
290       ctx->last_type = var->type;
291    }
292 
293    if (var->interface_type && !flags.u.interface_type_same_as_last) {
294       encode_type_to_blob(ctx->blob, var->interface_type);
295       ctx->last_interface_type = var->interface_type;
296    }
297 
298    if (flags.u.has_name)
299       blob_write_string(ctx->blob, var->name);
300 
301    if (flags.u.data_encoding == var_encode_full ||
302        flags.u.data_encoding == var_encode_location_diff) {
303       if (flags.u.data_encoding == var_encode_full) {
304          blob_write_bytes(ctx->blob, &data, sizeof(data));
305       } else {
306          /* Serialize only the difference in locations from the last variable.
307           */
308          union packed_var_data_diff diff;
309 
310          diff.u.location = data.location - ctx->last_var_data.location;
311          diff.u.location_frac = data.location_frac -
312                                 ctx->last_var_data.location_frac;
313          diff.u.driver_location = data.driver_location -
314                                   ctx->last_var_data.driver_location;
315 
316          blob_write_uint32(ctx->blob, diff.u32);
317       }
318 
319       ctx->last_var_data = data;
320    }
321 
322    for (unsigned i = 0; i < var->num_state_slots; i++) {
323       blob_write_bytes(ctx->blob, &var->state_slots[i],
324                        sizeof(var->state_slots[i]));
325    }
326    if (var->constant_initializer)
327       write_constant(ctx, var->constant_initializer);
328    if (var->pointer_initializer)
329       write_lookup_object(ctx, var->pointer_initializer);
330    if (var->num_members > 0) {
331       blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332                        var->num_members * sizeof(*var->members));
333    }
334 }
335 
336 static nir_variable *
read_variable(read_ctx * ctx)337 read_variable(read_ctx *ctx)
338 {
339    nir_variable *var = rzalloc(ctx->nir, nir_variable);
340    read_add_object(ctx, var);
341 
342    union packed_var flags;
343    flags.u32 = blob_read_uint32(ctx->blob);
344 
345    if (flags.u.type_same_as_last) {
346       var->type = ctx->last_type;
347    } else {
348       var->type = decode_type_from_blob(ctx->blob);
349       ctx->last_type = var->type;
350    }
351 
352    if (flags.u.has_interface_type) {
353       if (flags.u.interface_type_same_as_last) {
354          var->interface_type = ctx->last_interface_type;
355       } else {
356          var->interface_type = decode_type_from_blob(ctx->blob);
357          ctx->last_interface_type = var->interface_type;
358       }
359    }
360 
361    if (flags.u.has_name) {
362       const char *name = blob_read_string(ctx->blob);
363       var->name = ralloc_strdup(var, name);
364    } else {
365       var->name = NULL;
366    }
367 
368    if (flags.u.data_encoding == var_encode_shader_temp)
369       var->data.mode = nir_var_shader_temp;
370    else if (flags.u.data_encoding == var_encode_function_temp)
371       var->data.mode = nir_var_function_temp;
372    else if (flags.u.data_encoding == var_encode_full) {
373       blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374       ctx->last_var_data = var->data;
375    } else { /* var_encode_location_diff */
376       union packed_var_data_diff diff;
377       diff.u32 = blob_read_uint32(ctx->blob);
378 
379       var->data = ctx->last_var_data;
380       var->data.location += diff.u.location;
381       var->data.location_frac += diff.u.location_frac;
382       var->data.driver_location += diff.u.driver_location;
383 
384       ctx->last_var_data = var->data;
385    }
386 
387    var->num_state_slots = flags.u.num_state_slots;
388    if (var->num_state_slots != 0) {
389       var->state_slots = ralloc_array(var, nir_state_slot,
390                                       var->num_state_slots);
391       for (unsigned i = 0; i < var->num_state_slots; i++) {
392          blob_copy_bytes(ctx->blob, &var->state_slots[i],
393                          sizeof(var->state_slots[i]));
394       }
395    }
396    if (flags.u.has_constant_initializer)
397       var->constant_initializer = read_constant(ctx, var);
398    else
399       var->constant_initializer = NULL;
400 
401    if (flags.u.has_pointer_initializer)
402       var->pointer_initializer = read_object(ctx);
403    else
404       var->pointer_initializer = NULL;
405 
406    var->num_members = flags.u.num_members;
407    if (var->num_members > 0) {
408       var->members = ralloc_array(var, struct nir_variable_data,
409                                   var->num_members);
410       blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411                       var->num_members * sizeof(*var->members));
412    }
413 
414    return var;
415 }
416 
417 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420    blob_write_uint32(ctx->blob, exec_list_length(src));
421    foreach_list_typed(nir_variable, var, node, src) {
422       write_variable(ctx, var);
423    }
424 }
425 
426 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429    exec_list_make_empty(dst);
430    unsigned num_vars = blob_read_uint32(ctx->blob);
431    for (unsigned i = 0; i < num_vars; i++) {
432       nir_variable *var = read_variable(ctx);
433       exec_list_push_tail(dst, &var->node);
434    }
435 }
436 
437 static void
write_register(write_ctx * ctx,const nir_register * reg)438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440    write_add_object(ctx, reg);
441    blob_write_uint32(ctx->blob, reg->num_components);
442    blob_write_uint32(ctx->blob, reg->bit_size);
443    blob_write_uint32(ctx->blob, reg->num_array_elems);
444    blob_write_uint32(ctx->blob, reg->index);
445    blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
446    if (!ctx->strip && reg->name)
447       blob_write_string(ctx->blob, reg->name);
448 }
449 
450 static nir_register *
read_register(read_ctx * ctx)451 read_register(read_ctx *ctx)
452 {
453    nir_register *reg = ralloc(ctx->nir, nir_register);
454    read_add_object(ctx, reg);
455    reg->num_components = blob_read_uint32(ctx->blob);
456    reg->bit_size = blob_read_uint32(ctx->blob);
457    reg->num_array_elems = blob_read_uint32(ctx->blob);
458    reg->index = blob_read_uint32(ctx->blob);
459    bool has_name = blob_read_uint32(ctx->blob);
460    if (has_name) {
461       const char *name = blob_read_string(ctx->blob);
462       reg->name = ralloc_strdup(reg, name);
463    } else {
464       reg->name = NULL;
465    }
466 
467    list_inithead(&reg->uses);
468    list_inithead(&reg->defs);
469    list_inithead(&reg->if_uses);
470 
471    return reg;
472 }
473 
474 static void
write_reg_list(write_ctx * ctx,const struct exec_list * src)475 write_reg_list(write_ctx *ctx, const struct exec_list *src)
476 {
477    blob_write_uint32(ctx->blob, exec_list_length(src));
478    foreach_list_typed(nir_register, reg, node, src)
479       write_register(ctx, reg);
480 }
481 
482 static void
read_reg_list(read_ctx * ctx,struct exec_list * dst)483 read_reg_list(read_ctx *ctx, struct exec_list *dst)
484 {
485    exec_list_make_empty(dst);
486    unsigned num_regs = blob_read_uint32(ctx->blob);
487    for (unsigned i = 0; i < num_regs; i++) {
488       nir_register *reg = read_register(ctx);
489       exec_list_push_tail(dst, &reg->node);
490    }
491 }
492 
493 union packed_src {
494    uint32_t u32;
495    struct {
496       unsigned is_ssa:1;   /* <-- Header */
497       unsigned is_indirect:1;
498       unsigned object_idx:20;
499       unsigned _footer:10; /* <-- Footer */
500    } any;
501    struct {
502       unsigned _header:22; /* <-- Header */
503       unsigned negate:1;   /* <-- Footer */
504       unsigned abs:1;
505       unsigned swizzle_x:2;
506       unsigned swizzle_y:2;
507       unsigned swizzle_z:2;
508       unsigned swizzle_w:2;
509    } alu;
510    struct {
511       unsigned _header:22; /* <-- Header */
512       unsigned src_type:5; /* <-- Footer */
513       unsigned _pad:5;
514    } tex;
515 };
516 
517 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)518 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
519 {
520    /* Since sources are very frequent, we try to save some space when storing
521     * them. In particular, we store whether the source is a register and
522     * whether the register has an indirect index in the low two bits. We can
523     * assume that the high two bits of the index are zero, since otherwise our
524     * address space would've been exhausted allocating the remap table!
525     */
526    header.any.is_ssa = src->is_ssa;
527    if (src->is_ssa) {
528       header.any.object_idx = write_lookup_object(ctx, src->ssa);
529       blob_write_uint32(ctx->blob, header.u32);
530    } else {
531       header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
532       header.any.is_indirect = !!src->reg.indirect;
533       blob_write_uint32(ctx->blob, header.u32);
534       blob_write_uint32(ctx->blob, src->reg.base_offset);
535       if (src->reg.indirect) {
536          union packed_src header = {0};
537          write_src_full(ctx, src->reg.indirect, header);
538       }
539    }
540 }
541 
542 static void
write_src(write_ctx * ctx,const nir_src * src)543 write_src(write_ctx *ctx, const nir_src *src)
544 {
545    union packed_src header = {0};
546    write_src_full(ctx, src, header);
547 }
548 
549 static union packed_src
read_src(read_ctx * ctx,nir_src * src,void * mem_ctx)550 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
551 {
552    STATIC_ASSERT(sizeof(union packed_src) == 4);
553    union packed_src header;
554    header.u32 = blob_read_uint32(ctx->blob);
555 
556    src->is_ssa = header.any.is_ssa;
557    if (src->is_ssa) {
558       src->ssa = read_lookup_object(ctx, header.any.object_idx);
559    } else {
560       src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
561       src->reg.base_offset = blob_read_uint32(ctx->blob);
562       if (header.any.is_indirect) {
563          src->reg.indirect = ralloc(mem_ctx, nir_src);
564          read_src(ctx, src->reg.indirect, mem_ctx);
565       } else {
566          src->reg.indirect = NULL;
567       }
568    }
569    return header;
570 }
571 
572 union packed_dest {
573    uint8_t u8;
574    struct {
575       uint8_t is_ssa:1;
576       uint8_t has_name:1;
577       uint8_t num_components:3;
578       uint8_t bit_size:3;
579    } ssa;
580    struct {
581       uint8_t is_ssa:1;
582       uint8_t is_indirect:1;
583       uint8_t _pad:6;
584    } reg;
585 };
586 
587 enum intrinsic_const_indices_encoding {
588    /* Use the 9 bits of packed_const_indices to store 1-9 indices.
589     * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
590     * 4 2-bit indices, or 5-9 1-bit indices.
591     *
592     * The common case for load_ubo is 0, 0, 0, which is trivially represented.
593     * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
594     */
595    const_indices_9bit_all_combined,
596 
597    const_indices_8bit,  /* 8 bits per element */
598    const_indices_16bit, /* 16 bits per element */
599    const_indices_32bit, /* 32 bits per element */
600 };
601 
602 enum load_const_packing {
603    /* Constants are not packed and are stored in following dwords. */
604    load_const_full,
605 
606    /* packed_value contains high 19 bits, low bits are 0,
607     * good for floating-point decimals
608     */
609    load_const_scalar_hi_19bits,
610 
611    /* packed_value contains low 19 bits, high bits are sign-extended */
612    load_const_scalar_lo_19bits_sext,
613 };
614 
615 union packed_instr {
616    uint32_t u32;
617    struct {
618       unsigned instr_type:4; /* always present */
619       unsigned _pad:20;
620       unsigned dest:8;       /* always last */
621    } any;
622    struct {
623       unsigned instr_type:4;
624       unsigned exact:1;
625       unsigned no_signed_wrap:1;
626       unsigned no_unsigned_wrap:1;
627       unsigned saturate:1;
628       /* Reg: writemask; SSA: swizzles for 2 srcs */
629       unsigned writemask_or_two_swizzles:4;
630       unsigned op:9;
631       unsigned packed_src_ssa_16bit:1;
632       /* Scalarized ALUs always have the same header. */
633       unsigned num_followup_alu_sharing_header:2;
634       unsigned dest:8;
635    } alu;
636    struct {
637       unsigned instr_type:4;
638       unsigned deref_type:3;
639       unsigned cast_type_same_as_last:1;
640       unsigned modes:14; /* deref_var redefines this */
641       unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
642       unsigned _pad:1;  /* deref_var redefines this */
643       unsigned dest:8;
644    } deref;
645    struct {
646       unsigned instr_type:4;
647       unsigned deref_type:3;
648       unsigned _pad:1;
649       unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
650       unsigned dest:8;
651    } deref_var;
652    struct {
653       unsigned instr_type:4;
654       unsigned intrinsic:9;
655       unsigned const_indices_encoding:2;
656       unsigned packed_const_indices:9;
657       unsigned dest:8;
658    } intrinsic;
659    struct {
660       unsigned instr_type:4;
661       unsigned last_component:4;
662       unsigned bit_size:3;
663       unsigned packing:2; /* enum load_const_packing */
664       unsigned packed_value:19; /* meaning determined by packing */
665    } load_const;
666    struct {
667       unsigned instr_type:4;
668       unsigned last_component:4;
669       unsigned bit_size:3;
670       unsigned _pad:21;
671    } undef;
672    struct {
673       unsigned instr_type:4;
674       unsigned num_srcs:4;
675       unsigned op:4;
676       unsigned dest:8;
677       unsigned _pad:12;
678    } tex;
679    struct {
680       unsigned instr_type:4;
681       unsigned num_srcs:20;
682       unsigned dest:8;
683    } phi;
684    struct {
685       unsigned instr_type:4;
686       unsigned type:2;
687       unsigned _pad:26;
688    } jump;
689 };
690 
691 /* Write "lo24" as low 24 bits in the first uint32. */
692 static void
write_dest(write_ctx * ctx,const nir_dest * dst,union packed_instr header,nir_instr_type instr_type)693 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
694            nir_instr_type instr_type)
695 {
696    STATIC_ASSERT(sizeof(union packed_dest) == 1);
697    union packed_dest dest;
698    dest.u8 = 0;
699 
700    dest.ssa.is_ssa = dst->is_ssa;
701    if (dst->is_ssa) {
702       dest.ssa.has_name = !ctx->strip && dst->ssa.name;
703       dest.ssa.num_components =
704          encode_num_components_in_3bits(dst->ssa.num_components);
705       dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
706    } else {
707       dest.reg.is_indirect = !!(dst->reg.indirect);
708    }
709    header.any.dest = dest.u8;
710 
711    /* Check if the current ALU instruction has the same header as the previous
712     * instruction that is also ALU. If it is, we don't have to write
713     * the current header. This is a typical occurence after scalarization.
714     */
715    if (instr_type == nir_instr_type_alu) {
716       bool equal_header = false;
717 
718       if (ctx->last_instr_type == nir_instr_type_alu) {
719          assert(ctx->last_alu_header_offset);
720          union packed_instr last_header;
721          memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
722                 sizeof(last_header));
723 
724          /* Clear the field that counts ALUs with equal headers. */
725          union packed_instr clean_header;
726          clean_header.u32 = last_header.u32;
727          clean_header.alu.num_followup_alu_sharing_header = 0;
728 
729          /* There can be at most 4 consecutive ALU instructions
730           * sharing the same header.
731           */
732          if (last_header.alu.num_followup_alu_sharing_header < 3 &&
733              header.u32 == clean_header.u32) {
734             last_header.alu.num_followup_alu_sharing_header++;
735             memcpy(ctx->blob->data + ctx->last_alu_header_offset,
736                    &last_header, sizeof(last_header));
737 
738             equal_header = true;
739          }
740       }
741 
742       if (!equal_header) {
743          ctx->last_alu_header_offset = ctx->blob->size;
744          blob_write_uint32(ctx->blob, header.u32);
745       }
746    } else {
747       blob_write_uint32(ctx->blob, header.u32);
748    }
749 
750    if (dest.ssa.is_ssa &&
751        dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
752       blob_write_uint32(ctx->blob, dst->ssa.num_components);
753 
754    if (dst->is_ssa) {
755       write_add_object(ctx, &dst->ssa);
756       if (dest.ssa.has_name)
757          blob_write_string(ctx->blob, dst->ssa.name);
758    } else {
759       blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
760       blob_write_uint32(ctx->blob, dst->reg.base_offset);
761       if (dst->reg.indirect)
762          write_src(ctx, dst->reg.indirect);
763    }
764 }
765 
766 static void
read_dest(read_ctx * ctx,nir_dest * dst,nir_instr * instr,union packed_instr header)767 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
768           union packed_instr header)
769 {
770    union packed_dest dest;
771    dest.u8 = header.any.dest;
772 
773    if (dest.ssa.is_ssa) {
774       unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
775       unsigned num_components;
776       if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
777          num_components = blob_read_uint32(ctx->blob);
778       else
779          num_components = decode_num_components_in_3bits(dest.ssa.num_components);
780       char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
781       nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
782       read_add_object(ctx, &dst->ssa);
783    } else {
784       dst->reg.reg = read_object(ctx);
785       dst->reg.base_offset = blob_read_uint32(ctx->blob);
786       if (dest.reg.is_indirect) {
787          dst->reg.indirect = ralloc(instr, nir_src);
788          read_src(ctx, dst->reg.indirect, instr);
789       }
790    }
791 }
792 
793 static bool
are_object_ids_16bit(write_ctx * ctx)794 are_object_ids_16bit(write_ctx *ctx)
795 {
796    /* Check the highest object ID, because they are monotonic. */
797    return ctx->next_idx < (1 << 16);
798 }
799 
800 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)801 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
802 {
803    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
804 
805    for (unsigned i = 0; i < num_srcs; i++) {
806       if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
807          return false;
808 
809       unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
810 
811       for (unsigned chan = 0; chan < src_components; chan++) {
812          /* The swizzles for src0.x and src1.x are stored
813           * in writemask_or_two_swizzles for SSA ALUs.
814           */
815          if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
816              alu->src[i].swizzle[chan] < 4)
817             continue;
818 
819          if (alu->src[i].swizzle[chan] != chan)
820             return false;
821       }
822    }
823 
824    return are_object_ids_16bit(ctx);
825 }
826 
827 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)828 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
829 {
830    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
831    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
832 
833    /* 9 bits for nir_op */
834    STATIC_ASSERT(nir_num_opcodes <= 512);
835    union packed_instr header;
836    header.u32 = 0;
837 
838    header.alu.instr_type = alu->instr.type;
839    header.alu.exact = alu->exact;
840    header.alu.no_signed_wrap = alu->no_signed_wrap;
841    header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
842    header.alu.saturate = alu->dest.saturate;
843    header.alu.op = alu->op;
844    header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
845 
846    if (header.alu.packed_src_ssa_16bit &&
847        alu->dest.dest.is_ssa) {
848       /* For packed srcs of SSA ALUs, this field stores the swizzles. */
849       header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
850       if (num_srcs > 1)
851          header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
852    } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
853       /* For vec4 registers, this field is a writemask. */
854       header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
855    }
856 
857    write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
858 
859    if (!alu->dest.dest.is_ssa && dst_components > 4)
860       blob_write_uint32(ctx->blob, alu->dest.write_mask);
861 
862    if (header.alu.packed_src_ssa_16bit) {
863       for (unsigned i = 0; i < num_srcs; i++) {
864          assert(alu->src[i].src.is_ssa);
865          unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
866          assert(idx < (1 << 16));
867          blob_write_uint16(ctx->blob, idx);
868       }
869    } else {
870       for (unsigned i = 0; i < num_srcs; i++) {
871          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
872          unsigned src_components = nir_src_num_components(alu->src[i].src);
873          union packed_src src;
874          bool packed = src_components <= 4 && src_channels <= 4;
875          src.u32 = 0;
876 
877          src.alu.negate = alu->src[i].negate;
878          src.alu.abs = alu->src[i].abs;
879 
880          if (packed) {
881             src.alu.swizzle_x = alu->src[i].swizzle[0];
882             src.alu.swizzle_y = alu->src[i].swizzle[1];
883             src.alu.swizzle_z = alu->src[i].swizzle[2];
884             src.alu.swizzle_w = alu->src[i].swizzle[3];
885          }
886 
887          write_src_full(ctx, &alu->src[i].src, src);
888 
889          /* Store swizzles for vec8 and vec16. */
890          if (!packed) {
891             for (unsigned o = 0; o < src_channels; o += 8) {
892                unsigned value = 0;
893 
894                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
895                   value |= (uint32_t)alu->src[i].swizzle[o + j] <<
896                            (4 * j); /* 4 bits per swizzle */
897                }
898 
899                blob_write_uint32(ctx->blob, value);
900             }
901          }
902       }
903    }
904 }
905 
906 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)907 read_alu(read_ctx *ctx, union packed_instr header)
908 {
909    unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
910    nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
911 
912    alu->exact = header.alu.exact;
913    alu->no_signed_wrap = header.alu.no_signed_wrap;
914    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
915    alu->dest.saturate = header.alu.saturate;
916 
917    read_dest(ctx, &alu->dest.dest, &alu->instr, header);
918 
919    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
920 
921    if (alu->dest.dest.is_ssa) {
922       alu->dest.write_mask = u_bit_consecutive(0, dst_components);
923    } else if (dst_components <= 4) {
924       alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
925    } else {
926       alu->dest.write_mask = blob_read_uint32(ctx->blob);
927    }
928 
929    if (header.alu.packed_src_ssa_16bit) {
930       for (unsigned i = 0; i < num_srcs; i++) {
931          nir_alu_src *src = &alu->src[i];
932          src->src.is_ssa = true;
933          src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
934 
935          memset(&src->swizzle, 0, sizeof(src->swizzle));
936 
937          unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
938 
939          for (unsigned chan = 0; chan < src_components; chan++)
940             src->swizzle[chan] = chan;
941       }
942    } else {
943       for (unsigned i = 0; i < num_srcs; i++) {
944          union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
945          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
946          unsigned src_components = nir_src_num_components(alu->src[i].src);
947          bool packed = src_components <= 4 && src_channels <= 4;
948 
949          alu->src[i].negate = src.alu.negate;
950          alu->src[i].abs = src.alu.abs;
951 
952          memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
953 
954          if (packed) {
955             alu->src[i].swizzle[0] = src.alu.swizzle_x;
956             alu->src[i].swizzle[1] = src.alu.swizzle_y;
957             alu->src[i].swizzle[2] = src.alu.swizzle_z;
958             alu->src[i].swizzle[3] = src.alu.swizzle_w;
959          } else {
960             /* Load swizzles for vec8 and vec16. */
961             for (unsigned o = 0; o < src_channels; o += 8) {
962                unsigned value = blob_read_uint32(ctx->blob);
963 
964                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
965                   alu->src[i].swizzle[o + j] =
966                      (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
967                }
968             }
969          }
970       }
971    }
972 
973    if (header.alu.packed_src_ssa_16bit &&
974        alu->dest.dest.is_ssa) {
975       alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
976       if (num_srcs > 1)
977          alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
978    }
979 
980    return alu;
981 }
982 
983 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)984 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
985 {
986    assert(deref->deref_type < 8);
987    assert(deref->modes < (1 << 14));
988 
989    union packed_instr header;
990    header.u32 = 0;
991 
992    header.deref.instr_type = deref->instr.type;
993    header.deref.deref_type = deref->deref_type;
994 
995    if (deref->deref_type == nir_deref_type_cast) {
996       header.deref.modes = deref->modes;
997       header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
998    }
999 
1000    unsigned var_idx = 0;
1001    if (deref->deref_type == nir_deref_type_var) {
1002       var_idx = write_lookup_object(ctx, deref->var);
1003       if (var_idx && var_idx < (1 << 16))
1004          header.deref_var.object_idx = var_idx;
1005    }
1006 
1007    if (deref->deref_type == nir_deref_type_array ||
1008        deref->deref_type == nir_deref_type_ptr_as_array) {
1009       header.deref.packed_src_ssa_16bit =
1010          deref->parent.is_ssa && deref->arr.index.is_ssa &&
1011          are_object_ids_16bit(ctx);
1012    }
1013 
1014    write_dest(ctx, &deref->dest, header, deref->instr.type);
1015 
1016    switch (deref->deref_type) {
1017    case nir_deref_type_var:
1018       if (!header.deref_var.object_idx)
1019          blob_write_uint32(ctx->blob, var_idx);
1020       break;
1021 
1022    case nir_deref_type_struct:
1023       write_src(ctx, &deref->parent);
1024       blob_write_uint32(ctx->blob, deref->strct.index);
1025       break;
1026 
1027    case nir_deref_type_array:
1028    case nir_deref_type_ptr_as_array:
1029       if (header.deref.packed_src_ssa_16bit) {
1030          blob_write_uint16(ctx->blob,
1031                            write_lookup_object(ctx, deref->parent.ssa));
1032          blob_write_uint16(ctx->blob,
1033                            write_lookup_object(ctx, deref->arr.index.ssa));
1034       } else {
1035          write_src(ctx, &deref->parent);
1036          write_src(ctx, &deref->arr.index);
1037       }
1038       break;
1039 
1040    case nir_deref_type_cast:
1041       write_src(ctx, &deref->parent);
1042       blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1043       blob_write_uint32(ctx->blob, deref->cast.align_mul);
1044       blob_write_uint32(ctx->blob, deref->cast.align_offset);
1045       if (!header.deref.cast_type_same_as_last) {
1046          encode_type_to_blob(ctx->blob, deref->type);
1047          ctx->last_type = deref->type;
1048       }
1049       break;
1050 
1051    case nir_deref_type_array_wildcard:
1052       write_src(ctx, &deref->parent);
1053       break;
1054 
1055    default:
1056       unreachable("Invalid deref type");
1057    }
1058 }
1059 
1060 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)1061 read_deref(read_ctx *ctx, union packed_instr header)
1062 {
1063    nir_deref_type deref_type = header.deref.deref_type;
1064    nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1065 
1066    read_dest(ctx, &deref->dest, &deref->instr, header);
1067 
1068    nir_deref_instr *parent;
1069 
1070    switch (deref->deref_type) {
1071    case nir_deref_type_var:
1072       if (header.deref_var.object_idx)
1073          deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1074       else
1075          deref->var = read_object(ctx);
1076 
1077       deref->type = deref->var->type;
1078       break;
1079 
1080    case nir_deref_type_struct:
1081       read_src(ctx, &deref->parent, &deref->instr);
1082       parent = nir_src_as_deref(deref->parent);
1083       deref->strct.index = blob_read_uint32(ctx->blob);
1084       deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1085       break;
1086 
1087    case nir_deref_type_array:
1088    case nir_deref_type_ptr_as_array:
1089       if (header.deref.packed_src_ssa_16bit) {
1090          deref->parent.is_ssa = true;
1091          deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1092          deref->arr.index.is_ssa = true;
1093          deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1094       } else {
1095          read_src(ctx, &deref->parent, &deref->instr);
1096          read_src(ctx, &deref->arr.index, &deref->instr);
1097       }
1098 
1099       parent = nir_src_as_deref(deref->parent);
1100       if (deref->deref_type == nir_deref_type_array)
1101          deref->type = glsl_get_array_element(parent->type);
1102       else
1103          deref->type = parent->type;
1104       break;
1105 
1106    case nir_deref_type_cast:
1107       read_src(ctx, &deref->parent, &deref->instr);
1108       deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1109       deref->cast.align_mul = blob_read_uint32(ctx->blob);
1110       deref->cast.align_offset = blob_read_uint32(ctx->blob);
1111       if (header.deref.cast_type_same_as_last) {
1112          deref->type = ctx->last_type;
1113       } else {
1114          deref->type = decode_type_from_blob(ctx->blob);
1115          ctx->last_type = deref->type;
1116       }
1117       break;
1118 
1119    case nir_deref_type_array_wildcard:
1120       read_src(ctx, &deref->parent, &deref->instr);
1121       parent = nir_src_as_deref(deref->parent);
1122       deref->type = glsl_get_array_element(parent->type);
1123       break;
1124 
1125    default:
1126       unreachable("Invalid deref type");
1127    }
1128 
1129    if (deref_type == nir_deref_type_var) {
1130       deref->modes = deref->var->data.mode;
1131    } else if (deref->deref_type == nir_deref_type_cast) {
1132       deref->modes = header.deref.modes;
1133    } else {
1134       assert(deref->parent.is_ssa);
1135       deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1136    }
1137 
1138    return deref;
1139 }
1140 
1141 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1142 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1143 {
1144    /* 9 bits for nir_intrinsic_op */
1145    STATIC_ASSERT(nir_num_intrinsics <= 512);
1146    unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1147    unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1148    assert(intrin->intrinsic < 512);
1149 
1150    union packed_instr header;
1151    header.u32 = 0;
1152 
1153    header.intrinsic.instr_type = intrin->instr.type;
1154    header.intrinsic.intrinsic = intrin->intrinsic;
1155 
1156    /* Analyze constant indices to decide how to encode them. */
1157    if (num_indices) {
1158       unsigned max_bits = 0;
1159       for (unsigned i = 0; i < num_indices; i++) {
1160          unsigned max = util_last_bit(intrin->const_index[i]);
1161          max_bits = MAX2(max_bits, max);
1162       }
1163 
1164       if (max_bits * num_indices <= 9) {
1165          header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1166 
1167          /* Pack all const indices into 6 bits. */
1168          unsigned bit_size = 9 / num_indices;
1169          for (unsigned i = 0; i < num_indices; i++) {
1170             header.intrinsic.packed_const_indices |=
1171                intrin->const_index[i] << (i * bit_size);
1172          }
1173       } else if (max_bits <= 8)
1174          header.intrinsic.const_indices_encoding = const_indices_8bit;
1175       else if (max_bits <= 16)
1176          header.intrinsic.const_indices_encoding = const_indices_16bit;
1177       else
1178          header.intrinsic.const_indices_encoding = const_indices_32bit;
1179    }
1180 
1181    if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1182       write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1183    else
1184       blob_write_uint32(ctx->blob, header.u32);
1185 
1186    for (unsigned i = 0; i < num_srcs; i++)
1187       write_src(ctx, &intrin->src[i]);
1188 
1189    if (num_indices) {
1190       switch (header.intrinsic.const_indices_encoding) {
1191       case const_indices_8bit:
1192          for (unsigned i = 0; i < num_indices; i++)
1193             blob_write_uint8(ctx->blob, intrin->const_index[i]);
1194          break;
1195       case const_indices_16bit:
1196          for (unsigned i = 0; i < num_indices; i++)
1197             blob_write_uint16(ctx->blob, intrin->const_index[i]);
1198          break;
1199       case const_indices_32bit:
1200          for (unsigned i = 0; i < num_indices; i++)
1201             blob_write_uint32(ctx->blob, intrin->const_index[i]);
1202          break;
1203       }
1204    }
1205 }
1206 
1207 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1208 read_intrinsic(read_ctx *ctx, union packed_instr header)
1209 {
1210    nir_intrinsic_op op = header.intrinsic.intrinsic;
1211    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1212 
1213    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1214    unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1215 
1216    if (nir_intrinsic_infos[op].has_dest)
1217       read_dest(ctx, &intrin->dest, &intrin->instr, header);
1218 
1219    for (unsigned i = 0; i < num_srcs; i++)
1220       read_src(ctx, &intrin->src[i], &intrin->instr);
1221 
1222    /* Vectorized instrinsics have num_components same as dst or src that has
1223     * 0 components in the info. Find it.
1224     */
1225    if (nir_intrinsic_infos[op].has_dest &&
1226        nir_intrinsic_infos[op].dest_components == 0) {
1227       intrin->num_components = nir_dest_num_components(intrin->dest);
1228    } else {
1229       for (unsigned i = 0; i < num_srcs; i++) {
1230          if (nir_intrinsic_infos[op].src_components[i] == 0) {
1231             intrin->num_components = nir_src_num_components(intrin->src[i]);
1232             break;
1233          }
1234       }
1235    }
1236 
1237    if (num_indices) {
1238       switch (header.intrinsic.const_indices_encoding) {
1239       case const_indices_9bit_all_combined: {
1240          unsigned bit_size = 9 / num_indices;
1241          unsigned bit_mask = u_bit_consecutive(0, bit_size);
1242          for (unsigned i = 0; i < num_indices; i++) {
1243             intrin->const_index[i] =
1244                (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1245                bit_mask;
1246          }
1247          break;
1248       }
1249       case const_indices_8bit:
1250          for (unsigned i = 0; i < num_indices; i++)
1251             intrin->const_index[i] = blob_read_uint8(ctx->blob);
1252          break;
1253       case const_indices_16bit:
1254          for (unsigned i = 0; i < num_indices; i++)
1255             intrin->const_index[i] = blob_read_uint16(ctx->blob);
1256          break;
1257       case const_indices_32bit:
1258          for (unsigned i = 0; i < num_indices; i++)
1259             intrin->const_index[i] = blob_read_uint32(ctx->blob);
1260          break;
1261       }
1262    }
1263 
1264    return intrin;
1265 }
1266 
1267 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1268 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1269 {
1270    assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1271    union packed_instr header;
1272    header.u32 = 0;
1273 
1274    header.load_const.instr_type = lc->instr.type;
1275    header.load_const.last_component = lc->def.num_components - 1;
1276    header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1277    header.load_const.packing = load_const_full;
1278 
1279    /* Try to pack 1-component constants into the 19 free bits in the header. */
1280    if (lc->def.num_components == 1) {
1281       switch (lc->def.bit_size) {
1282       case 64:
1283          if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1284             /* packed_value contains high 19 bits, low bits are 0 */
1285             header.load_const.packing = load_const_scalar_hi_19bits;
1286             header.load_const.packed_value = lc->value[0].u64 >> 45;
1287          } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1288             /* packed_value contains low 19 bits, high bits are sign-extended */
1289             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1290             header.load_const.packed_value = lc->value[0].u64;
1291          }
1292          break;
1293 
1294       case 32:
1295          if ((lc->value[0].u32 & 0x1fff) == 0) {
1296             header.load_const.packing = load_const_scalar_hi_19bits;
1297             header.load_const.packed_value = lc->value[0].u32 >> 13;
1298          } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1299             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1300             header.load_const.packed_value = lc->value[0].u32;
1301          }
1302          break;
1303 
1304       case 16:
1305          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1306          header.load_const.packed_value = lc->value[0].u16;
1307          break;
1308       case 8:
1309          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1310          header.load_const.packed_value = lc->value[0].u8;
1311          break;
1312       case 1:
1313          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1314          header.load_const.packed_value = lc->value[0].b;
1315          break;
1316       default:
1317          unreachable("invalid bit_size");
1318       }
1319    }
1320 
1321    blob_write_uint32(ctx->blob, header.u32);
1322 
1323    if (header.load_const.packing == load_const_full) {
1324       switch (lc->def.bit_size) {
1325       case 64:
1326          blob_write_bytes(ctx->blob, lc->value,
1327                           sizeof(*lc->value) * lc->def.num_components);
1328          break;
1329 
1330       case 32:
1331          for (unsigned i = 0; i < lc->def.num_components; i++)
1332             blob_write_uint32(ctx->blob, lc->value[i].u32);
1333          break;
1334 
1335       case 16:
1336          for (unsigned i = 0; i < lc->def.num_components; i++)
1337             blob_write_uint16(ctx->blob, lc->value[i].u16);
1338          break;
1339 
1340       default:
1341          assert(lc->def.bit_size <= 8);
1342          for (unsigned i = 0; i < lc->def.num_components; i++)
1343             blob_write_uint8(ctx->blob, lc->value[i].u8);
1344          break;
1345       }
1346    }
1347 
1348    write_add_object(ctx, &lc->def);
1349 }
1350 
1351 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1352 read_load_const(read_ctx *ctx, union packed_instr header)
1353 {
1354    nir_load_const_instr *lc =
1355       nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1356                                   decode_bit_size_3bits(header.load_const.bit_size));
1357 
1358    switch (header.load_const.packing) {
1359    case load_const_scalar_hi_19bits:
1360       switch (lc->def.bit_size) {
1361       case 64:
1362          lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1363          break;
1364       case 32:
1365          lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1366          break;
1367       default:
1368          unreachable("invalid bit_size");
1369       }
1370       break;
1371 
1372    case load_const_scalar_lo_19bits_sext:
1373       switch (lc->def.bit_size) {
1374       case 64:
1375          lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1376          break;
1377       case 32:
1378          lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1379          break;
1380       case 16:
1381          lc->value[0].u16 = header.load_const.packed_value;
1382          break;
1383       case 8:
1384          lc->value[0].u8 = header.load_const.packed_value;
1385          break;
1386       case 1:
1387          lc->value[0].b = header.load_const.packed_value;
1388          break;
1389       default:
1390          unreachable("invalid bit_size");
1391       }
1392       break;
1393 
1394    case load_const_full:
1395       switch (lc->def.bit_size) {
1396       case 64:
1397          blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1398          break;
1399 
1400       case 32:
1401          for (unsigned i = 0; i < lc->def.num_components; i++)
1402             lc->value[i].u32 = blob_read_uint32(ctx->blob);
1403          break;
1404 
1405       case 16:
1406          for (unsigned i = 0; i < lc->def.num_components; i++)
1407             lc->value[i].u16 = blob_read_uint16(ctx->blob);
1408          break;
1409 
1410       default:
1411          assert(lc->def.bit_size <= 8);
1412          for (unsigned i = 0; i < lc->def.num_components; i++)
1413             lc->value[i].u8 = blob_read_uint8(ctx->blob);
1414          break;
1415       }
1416       break;
1417    }
1418 
1419    read_add_object(ctx, &lc->def);
1420    return lc;
1421 }
1422 
1423 static void
write_ssa_undef(write_ctx * ctx,const nir_ssa_undef_instr * undef)1424 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1425 {
1426    assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1427 
1428    union packed_instr header;
1429    header.u32 = 0;
1430 
1431    header.undef.instr_type = undef->instr.type;
1432    header.undef.last_component = undef->def.num_components - 1;
1433    header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1434 
1435    blob_write_uint32(ctx->blob, header.u32);
1436    write_add_object(ctx, &undef->def);
1437 }
1438 
1439 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1440 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1441 {
1442    nir_ssa_undef_instr *undef =
1443       nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1444                                  decode_bit_size_3bits(header.undef.bit_size));
1445 
1446    read_add_object(ctx, &undef->def);
1447    return undef;
1448 }
1449 
1450 union packed_tex_data {
1451    uint32_t u32;
1452    struct {
1453       unsigned sampler_dim:4;
1454       unsigned dest_type:8;
1455       unsigned coord_components:3;
1456       unsigned is_array:1;
1457       unsigned is_shadow:1;
1458       unsigned is_new_style_shadow:1;
1459       unsigned component:2;
1460       unsigned texture_non_uniform:1;
1461       unsigned sampler_non_uniform:1;
1462       unsigned unused:8; /* Mark unused for valgrind. */
1463    } u;
1464 };
1465 
1466 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1467 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1468 {
1469    assert(tex->num_srcs < 16);
1470    assert(tex->op < 16);
1471 
1472    union packed_instr header;
1473    header.u32 = 0;
1474 
1475    header.tex.instr_type = tex->instr.type;
1476    header.tex.num_srcs = tex->num_srcs;
1477    header.tex.op = tex->op;
1478 
1479    write_dest(ctx, &tex->dest, header, tex->instr.type);
1480 
1481    blob_write_uint32(ctx->blob, tex->texture_index);
1482    blob_write_uint32(ctx->blob, tex->sampler_index);
1483    if (tex->op == nir_texop_tg4)
1484       blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1485 
1486    STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1487    union packed_tex_data packed = {
1488       .u.sampler_dim = tex->sampler_dim,
1489       .u.dest_type = tex->dest_type,
1490       .u.coord_components = tex->coord_components,
1491       .u.is_array = tex->is_array,
1492       .u.is_shadow = tex->is_shadow,
1493       .u.is_new_style_shadow = tex->is_new_style_shadow,
1494       .u.component = tex->component,
1495       .u.texture_non_uniform = tex->texture_non_uniform,
1496       .u.sampler_non_uniform = tex->sampler_non_uniform,
1497    };
1498    blob_write_uint32(ctx->blob, packed.u32);
1499 
1500    for (unsigned i = 0; i < tex->num_srcs; i++) {
1501       union packed_src src;
1502       src.u32 = 0;
1503       src.tex.src_type = tex->src[i].src_type;
1504       write_src_full(ctx, &tex->src[i].src, src);
1505    }
1506 }
1507 
1508 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1509 read_tex(read_ctx *ctx, union packed_instr header)
1510 {
1511    nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1512 
1513    read_dest(ctx, &tex->dest, &tex->instr, header);
1514 
1515    tex->op = header.tex.op;
1516    tex->texture_index = blob_read_uint32(ctx->blob);
1517    tex->sampler_index = blob_read_uint32(ctx->blob);
1518    if (tex->op == nir_texop_tg4)
1519       blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1520 
1521    union packed_tex_data packed;
1522    packed.u32 = blob_read_uint32(ctx->blob);
1523    tex->sampler_dim = packed.u.sampler_dim;
1524    tex->dest_type = packed.u.dest_type;
1525    tex->coord_components = packed.u.coord_components;
1526    tex->is_array = packed.u.is_array;
1527    tex->is_shadow = packed.u.is_shadow;
1528    tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1529    tex->component = packed.u.component;
1530    tex->texture_non_uniform = packed.u.texture_non_uniform;
1531    tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1532 
1533    for (unsigned i = 0; i < tex->num_srcs; i++) {
1534       union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1535       tex->src[i].src_type = src.tex.src_type;
1536    }
1537 
1538    return tex;
1539 }
1540 
1541 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1542 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1543 {
1544    union packed_instr header;
1545    header.u32 = 0;
1546 
1547    header.phi.instr_type = phi->instr.type;
1548    header.phi.num_srcs = exec_list_length(&phi->srcs);
1549 
1550    /* Phi nodes are special, since they may reference SSA definitions and
1551     * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1552     * and then store enough information so that a later fixup pass can fill
1553     * them in correctly.
1554     */
1555    write_dest(ctx, &phi->dest, header, phi->instr.type);
1556 
1557    nir_foreach_phi_src(src, phi) {
1558       assert(src->src.is_ssa);
1559       size_t blob_offset = blob_reserve_uint32(ctx->blob);
1560       ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1561       assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1562       write_phi_fixup fixup = {
1563          .blob_offset = blob_offset,
1564          .src = src->src.ssa,
1565          .block = src->pred,
1566       };
1567       util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1568    }
1569 }
1570 
1571 static void
write_fixup_phis(write_ctx * ctx)1572 write_fixup_phis(write_ctx *ctx)
1573 {
1574    util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1575       uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1576       blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1577       blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1578    }
1579 
1580    util_dynarray_clear(&ctx->phi_fixups);
1581 }
1582 
1583 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1584 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1585 {
1586    nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1587 
1588    read_dest(ctx, &phi->dest, &phi->instr, header);
1589 
1590    /* For similar reasons as before, we just store the index directly into the
1591     * pointer, and let a later pass resolve the phi sources.
1592     *
1593     * In order to ensure that the copied sources (which are just the indices
1594     * from the blob for now) don't get inserted into the old shader's use-def
1595     * lists, we have to add the phi instruction *before* we set up its
1596     * sources.
1597     */
1598    nir_instr_insert_after_block(blk, &phi->instr);
1599 
1600    for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1601       nir_phi_src *src = ralloc(phi, nir_phi_src);
1602 
1603       src->src.is_ssa = true;
1604       src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1605       src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1606 
1607       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1608        * we have to set the parent_instr manually.  It doesn't really matter
1609        * when we do it, so we might as well do it here.
1610        */
1611       src->src.parent_instr = &phi->instr;
1612 
1613       /* Stash it in the list of phi sources.  We'll walk this list and fix up
1614        * sources at the very end of read_function_impl.
1615        */
1616       list_add(&src->src.use_link, &ctx->phi_srcs);
1617 
1618       exec_list_push_tail(&phi->srcs, &src->node);
1619    }
1620 
1621    return phi;
1622 }
1623 
1624 static void
read_fixup_phis(read_ctx * ctx)1625 read_fixup_phis(read_ctx *ctx)
1626 {
1627    list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1628       src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1629       src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1630 
1631       /* Remove from this list */
1632       list_del(&src->src.use_link);
1633 
1634       list_addtail(&src->src.use_link, &src->src.ssa->uses);
1635    }
1636    assert(list_is_empty(&ctx->phi_srcs));
1637 }
1638 
1639 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1640 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1641 {
1642    /* These aren't handled because they require special block linking */
1643    assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1644 
1645    assert(jmp->type < 4);
1646 
1647    union packed_instr header;
1648    header.u32 = 0;
1649 
1650    header.jump.instr_type = jmp->instr.type;
1651    header.jump.type = jmp->type;
1652 
1653    blob_write_uint32(ctx->blob, header.u32);
1654 }
1655 
1656 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1657 read_jump(read_ctx *ctx, union packed_instr header)
1658 {
1659    /* These aren't handled because they require special block linking */
1660    assert(header.jump.type != nir_jump_goto &&
1661           header.jump.type != nir_jump_goto_if);
1662 
1663    nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1664    return jmp;
1665 }
1666 
1667 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1668 write_call(write_ctx *ctx, const nir_call_instr *call)
1669 {
1670    blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1671 
1672    for (unsigned i = 0; i < call->num_params; i++)
1673       write_src(ctx, &call->params[i]);
1674 }
1675 
1676 static nir_call_instr *
read_call(read_ctx * ctx)1677 read_call(read_ctx *ctx)
1678 {
1679    nir_function *callee = read_object(ctx);
1680    nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1681 
1682    for (unsigned i = 0; i < call->num_params; i++)
1683       read_src(ctx, &call->params[i], call);
1684 
1685    return call;
1686 }
1687 
1688 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1689 write_instr(write_ctx *ctx, const nir_instr *instr)
1690 {
1691    /* We have only 4 bits for the instruction type. */
1692    assert(instr->type < 16);
1693 
1694    switch (instr->type) {
1695    case nir_instr_type_alu:
1696       write_alu(ctx, nir_instr_as_alu(instr));
1697       break;
1698    case nir_instr_type_deref:
1699       write_deref(ctx, nir_instr_as_deref(instr));
1700       break;
1701    case nir_instr_type_intrinsic:
1702       write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1703       break;
1704    case nir_instr_type_load_const:
1705       write_load_const(ctx, nir_instr_as_load_const(instr));
1706       break;
1707    case nir_instr_type_ssa_undef:
1708       write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1709       break;
1710    case nir_instr_type_tex:
1711       write_tex(ctx, nir_instr_as_tex(instr));
1712       break;
1713    case nir_instr_type_phi:
1714       write_phi(ctx, nir_instr_as_phi(instr));
1715       break;
1716    case nir_instr_type_jump:
1717       write_jump(ctx, nir_instr_as_jump(instr));
1718       break;
1719    case nir_instr_type_call:
1720       blob_write_uint32(ctx->blob, instr->type);
1721       write_call(ctx, nir_instr_as_call(instr));
1722       break;
1723    case nir_instr_type_parallel_copy:
1724       unreachable("Cannot write parallel copies");
1725    default:
1726       unreachable("bad instr type");
1727    }
1728 }
1729 
1730 /* Return the number of instructions read. */
1731 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1732 read_instr(read_ctx *ctx, nir_block *block)
1733 {
1734    STATIC_ASSERT(sizeof(union packed_instr) == 4);
1735    union packed_instr header;
1736    header.u32 = blob_read_uint32(ctx->blob);
1737    nir_instr *instr;
1738 
1739    switch (header.any.instr_type) {
1740    case nir_instr_type_alu:
1741       for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1742          nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1743       return header.alu.num_followup_alu_sharing_header + 1;
1744    case nir_instr_type_deref:
1745       instr = &read_deref(ctx, header)->instr;
1746       break;
1747    case nir_instr_type_intrinsic:
1748       instr = &read_intrinsic(ctx, header)->instr;
1749       break;
1750    case nir_instr_type_load_const:
1751       instr = &read_load_const(ctx, header)->instr;
1752       break;
1753    case nir_instr_type_ssa_undef:
1754       instr = &read_ssa_undef(ctx, header)->instr;
1755       break;
1756    case nir_instr_type_tex:
1757       instr = &read_tex(ctx, header)->instr;
1758       break;
1759    case nir_instr_type_phi:
1760       /* Phi instructions are a bit of a special case when reading because we
1761        * don't want inserting the instruction to automatically handle use/defs
1762        * for us.  Instead, we need to wait until all the blocks/instructions
1763        * are read so that we can set their sources up.
1764        */
1765       read_phi(ctx, block, header);
1766       return 1;
1767    case nir_instr_type_jump:
1768       instr = &read_jump(ctx, header)->instr;
1769       break;
1770    case nir_instr_type_call:
1771       instr = &read_call(ctx)->instr;
1772       break;
1773    case nir_instr_type_parallel_copy:
1774       unreachable("Cannot read parallel copies");
1775    default:
1776       unreachable("bad instr type");
1777    }
1778 
1779    nir_instr_insert_after_block(block, instr);
1780    return 1;
1781 }
1782 
1783 static void
write_block(write_ctx * ctx,const nir_block * block)1784 write_block(write_ctx *ctx, const nir_block *block)
1785 {
1786    write_add_object(ctx, block);
1787    blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1788 
1789    ctx->last_instr_type = ~0;
1790    ctx->last_alu_header_offset = 0;
1791 
1792    nir_foreach_instr(instr, block) {
1793       write_instr(ctx, instr);
1794       ctx->last_instr_type = instr->type;
1795    }
1796 }
1797 
1798 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1799 read_block(read_ctx *ctx, struct exec_list *cf_list)
1800 {
1801    /* Don't actually create a new block.  Just use the one from the tail of
1802     * the list.  NIR guarantees that the tail of the list is a block and that
1803     * no two blocks are side-by-side in the IR;  It should be empty.
1804     */
1805    nir_block *block =
1806       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1807 
1808    read_add_object(ctx, block);
1809    unsigned num_instrs = blob_read_uint32(ctx->blob);
1810    for (unsigned i = 0; i < num_instrs;) {
1811       i += read_instr(ctx, block);
1812    }
1813 }
1814 
1815 static void
1816 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1817 
1818 static void
1819 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1820 
1821 static void
write_if(write_ctx * ctx,nir_if * nif)1822 write_if(write_ctx *ctx, nir_if *nif)
1823 {
1824    write_src(ctx, &nif->condition);
1825 
1826    write_cf_list(ctx, &nif->then_list);
1827    write_cf_list(ctx, &nif->else_list);
1828 }
1829 
1830 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1831 read_if(read_ctx *ctx, struct exec_list *cf_list)
1832 {
1833    nir_if *nif = nir_if_create(ctx->nir);
1834 
1835    read_src(ctx, &nif->condition, nif);
1836 
1837    nir_cf_node_insert_end(cf_list, &nif->cf_node);
1838 
1839    read_cf_list(ctx, &nif->then_list);
1840    read_cf_list(ctx, &nif->else_list);
1841 }
1842 
1843 static void
write_loop(write_ctx * ctx,nir_loop * loop)1844 write_loop(write_ctx *ctx, nir_loop *loop)
1845 {
1846    write_cf_list(ctx, &loop->body);
1847 }
1848 
1849 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1850 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1851 {
1852    nir_loop *loop = nir_loop_create(ctx->nir);
1853 
1854    nir_cf_node_insert_end(cf_list, &loop->cf_node);
1855 
1856    read_cf_list(ctx, &loop->body);
1857 }
1858 
1859 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1860 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1861 {
1862    blob_write_uint32(ctx->blob, cf->type);
1863 
1864    switch (cf->type) {
1865    case nir_cf_node_block:
1866       write_block(ctx, nir_cf_node_as_block(cf));
1867       break;
1868    case nir_cf_node_if:
1869       write_if(ctx, nir_cf_node_as_if(cf));
1870       break;
1871    case nir_cf_node_loop:
1872       write_loop(ctx, nir_cf_node_as_loop(cf));
1873       break;
1874    default:
1875       unreachable("bad cf type");
1876    }
1877 }
1878 
1879 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1880 read_cf_node(read_ctx *ctx, struct exec_list *list)
1881 {
1882    nir_cf_node_type type = blob_read_uint32(ctx->blob);
1883 
1884    switch (type) {
1885    case nir_cf_node_block:
1886       read_block(ctx, list);
1887       break;
1888    case nir_cf_node_if:
1889       read_if(ctx, list);
1890       break;
1891    case nir_cf_node_loop:
1892       read_loop(ctx, list);
1893       break;
1894    default:
1895       unreachable("bad cf type");
1896    }
1897 }
1898 
1899 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1900 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1901 {
1902    blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1903    foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1904       write_cf_node(ctx, cf);
1905    }
1906 }
1907 
1908 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1909 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1910 {
1911    uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1912    for (unsigned i = 0; i < num_cf_nodes; i++)
1913       read_cf_node(ctx, cf_list);
1914 }
1915 
1916 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1917 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1918 {
1919    blob_write_uint8(ctx->blob, fi->structured);
1920 
1921    write_var_list(ctx, &fi->locals);
1922    write_reg_list(ctx, &fi->registers);
1923    blob_write_uint32(ctx->blob, fi->reg_alloc);
1924 
1925    write_cf_list(ctx, &fi->body);
1926    write_fixup_phis(ctx);
1927 }
1928 
1929 static nir_function_impl *
read_function_impl(read_ctx * ctx,nir_function * fxn)1930 read_function_impl(read_ctx *ctx, nir_function *fxn)
1931 {
1932    nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1933    fi->function = fxn;
1934 
1935    fi->structured = blob_read_uint8(ctx->blob);
1936 
1937    read_var_list(ctx, &fi->locals);
1938    read_reg_list(ctx, &fi->registers);
1939    fi->reg_alloc = blob_read_uint32(ctx->blob);
1940 
1941    read_cf_list(ctx, &fi->body);
1942    read_fixup_phis(ctx);
1943 
1944    fi->valid_metadata = 0;
1945 
1946    return fi;
1947 }
1948 
1949 static void
write_function(write_ctx * ctx,const nir_function * fxn)1950 write_function(write_ctx *ctx, const nir_function *fxn)
1951 {
1952    uint32_t flags = fxn->is_entrypoint;
1953    if (fxn->name)
1954       flags |= 0x2;
1955    if (fxn->impl)
1956       flags |= 0x4;
1957    blob_write_uint32(ctx->blob, flags);
1958    if (fxn->name)
1959       blob_write_string(ctx->blob, fxn->name);
1960 
1961    write_add_object(ctx, fxn);
1962 
1963    blob_write_uint32(ctx->blob, fxn->num_params);
1964    for (unsigned i = 0; i < fxn->num_params; i++) {
1965       uint32_t val =
1966          ((uint32_t)fxn->params[i].num_components) |
1967          ((uint32_t)fxn->params[i].bit_size) << 8;
1968       blob_write_uint32(ctx->blob, val);
1969    }
1970 
1971    /* At first glance, it looks like we should write the function_impl here.
1972     * However, call instructions need to be able to reference at least the
1973     * function and those will get processed as we write the function_impls.
1974     * We stop here and write function_impls as a second pass.
1975     */
1976 }
1977 
1978 static void
read_function(read_ctx * ctx)1979 read_function(read_ctx *ctx)
1980 {
1981    uint32_t flags = blob_read_uint32(ctx->blob);
1982    bool has_name = flags & 0x2;
1983    char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1984 
1985    nir_function *fxn = nir_function_create(ctx->nir, name);
1986 
1987    read_add_object(ctx, fxn);
1988 
1989    fxn->num_params = blob_read_uint32(ctx->blob);
1990    fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1991    for (unsigned i = 0; i < fxn->num_params; i++) {
1992       uint32_t val = blob_read_uint32(ctx->blob);
1993       fxn->params[i].num_components = val & 0xff;
1994       fxn->params[i].bit_size = (val >> 8) & 0xff;
1995    }
1996 
1997    fxn->is_entrypoint = flags & 0x1;
1998    if (flags & 0x4)
1999       fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2000 }
2001 
2002 /**
2003  * Serialize NIR into a binary blob.
2004  *
2005  * \param strip  Don't serialize information only useful for debugging,
2006  *               such as variable names, making cache hits from similar
2007  *               shaders more likely.
2008  */
2009 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2010 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2011 {
2012    write_ctx ctx = {0};
2013    ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2014    ctx.blob = blob;
2015    ctx.nir = nir;
2016    ctx.strip = strip;
2017    util_dynarray_init(&ctx.phi_fixups, NULL);
2018 
2019    size_t idx_size_offset = blob_reserve_uint32(blob);
2020 
2021    struct shader_info info = nir->info;
2022    uint32_t strings = 0;
2023    if (!strip && info.name)
2024       strings |= 0x1;
2025    if (!strip && info.label)
2026       strings |= 0x2;
2027    blob_write_uint32(blob, strings);
2028    if (!strip && info.name)
2029       blob_write_string(blob, info.name);
2030    if (!strip && info.label)
2031       blob_write_string(blob, info.label);
2032    info.name = info.label = NULL;
2033    blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2034 
2035    write_var_list(&ctx, &nir->variables);
2036 
2037    blob_write_uint32(blob, nir->num_inputs);
2038    blob_write_uint32(blob, nir->num_uniforms);
2039    blob_write_uint32(blob, nir->num_outputs);
2040    blob_write_uint32(blob, nir->shared_size);
2041    blob_write_uint32(blob, nir->scratch_size);
2042 
2043    blob_write_uint32(blob, exec_list_length(&nir->functions));
2044    nir_foreach_function(fxn, nir) {
2045       write_function(&ctx, fxn);
2046    }
2047 
2048    nir_foreach_function(fxn, nir) {
2049       if (fxn->impl)
2050          write_function_impl(&ctx, fxn->impl);
2051    }
2052 
2053    blob_write_uint32(blob, nir->constant_data_size);
2054    if (nir->constant_data_size > 0)
2055       blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2056 
2057    *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2058 
2059    _mesa_hash_table_destroy(ctx.remap_table, NULL);
2060    util_dynarray_fini(&ctx.phi_fixups);
2061 }
2062 
2063 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2064 nir_deserialize(void *mem_ctx,
2065                 const struct nir_shader_compiler_options *options,
2066                 struct blob_reader *blob)
2067 {
2068    read_ctx ctx = {0};
2069    ctx.blob = blob;
2070    list_inithead(&ctx.phi_srcs);
2071    ctx.idx_table_len = blob_read_uint32(blob);
2072    ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2073 
2074    uint32_t strings = blob_read_uint32(blob);
2075    char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2076    char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2077 
2078    struct shader_info info;
2079    blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2080 
2081    ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2082 
2083    info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2084    info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2085 
2086    ctx.nir->info = info;
2087 
2088    read_var_list(&ctx, &ctx.nir->variables);
2089 
2090    ctx.nir->num_inputs = blob_read_uint32(blob);
2091    ctx.nir->num_uniforms = blob_read_uint32(blob);
2092    ctx.nir->num_outputs = blob_read_uint32(blob);
2093    ctx.nir->shared_size = blob_read_uint32(blob);
2094    ctx.nir->scratch_size = blob_read_uint32(blob);
2095 
2096    unsigned num_functions = blob_read_uint32(blob);
2097    for (unsigned i = 0; i < num_functions; i++)
2098       read_function(&ctx);
2099 
2100    nir_foreach_function(fxn, ctx.nir) {
2101       if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2102          fxn->impl = read_function_impl(&ctx, fxn);
2103    }
2104 
2105    ctx.nir->constant_data_size = blob_read_uint32(blob);
2106    if (ctx.nir->constant_data_size > 0) {
2107       ctx.nir->constant_data =
2108          ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2109       blob_copy_bytes(blob, ctx.nir->constant_data,
2110                       ctx.nir->constant_data_size);
2111    }
2112 
2113    free(ctx.idx_table);
2114 
2115    return ctx.nir;
2116 }
2117 
2118 void
nir_shader_serialize_deserialize(nir_shader * shader)2119 nir_shader_serialize_deserialize(nir_shader *shader)
2120 {
2121    const struct nir_shader_compiler_options *options = shader->options;
2122 
2123    struct blob writer;
2124    blob_init(&writer);
2125    nir_serialize(&writer, shader, false);
2126 
2127    /* Delete all of dest's ralloc children but leave dest alone */
2128    void *dead_ctx = ralloc_context(NULL);
2129    ralloc_adopt(dead_ctx, shader);
2130    ralloc_free(dead_ctx);
2131 
2132    dead_ctx = ralloc_context(NULL);
2133 
2134    struct blob_reader reader;
2135    blob_reader_init(&reader, writer.data, writer.size);
2136    nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2137 
2138    blob_finish(&writer);
2139 
2140    nir_shader_replace(shader, copy);
2141    ralloc_free(dead_ctx);
2142 }
2143