1 /*
2  * Copyright (c) 2012-2015 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef H_ETNA_INTERNAL
25 #define H_ETNA_INTERNAL
26 
27 #include <assert.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 
31 #include "hw/state.xml.h"
32 #include "hw/state_3d.xml.h"
33 
34 #include <etnaviv_drmif.h>
35 
36 #define ETNA_NUM_INPUTS (16)
37 #define ETNA_NUM_VARYINGS 8
38 #define ETNA_NUM_LOD (14)
39 #define ETNA_NUM_LAYERS (6)
40 #define ETNA_MAX_UNIFORMS (256)
41 #define ETNA_MAX_PIXELPIPES 2
42 
43 /* All RS operations must have width%16 = 0 */
44 #define ETNA_RS_WIDTH_MASK (16 - 1)
45 /* RS tiled operations must have height%4 = 0 */
46 #define ETNA_RS_HEIGHT_MASK (3)
47 /* PE render targets must be aligned to 64 bytes */
48 #define ETNA_PE_ALIGNMENT (64)
49 
50 /* These demarcate the margin (fixp16) between the computed sizes and the
51   value sent to the chip. These have been set to the numbers used by the
52   Vivante driver on gc2000. They used to be -1 for scissor right and bottom. I
53   am not sure whether older hardware was relying on these or they were just a
54   guess. But if so, these need to be moved to the _specs structure.
55 */
56 #define ETNA_SE_SCISSOR_MARGIN_RIGHT (0x1119)
57 #define ETNA_SE_SCISSOR_MARGIN_BOTTOM (0x1111)
58 #define ETNA_SE_CLIP_MARGIN_RIGHT (0xffff)
59 #define ETNA_SE_CLIP_MARGIN_BOTTOM (0xffff)
60 
61 /* GPU chip 3D specs */
62 struct etna_specs {
63    /* HALTI (gross architecture) level. -1 for pre-HALTI. */
64    int halti : 8;
65    /* supports SUPERTILE (64x64) tiling? */
66    unsigned can_supertile : 1;
67    /* needs z=(z+w)/2, for older GCxxx */
68    unsigned vs_need_z_div : 1;
69    /* supports trigonometric instructions */
70    unsigned has_sin_cos_sqrt : 1;
71    /* has SIGN/FLOOR/CEIL instructions */
72    unsigned has_sign_floor_ceil : 1;
73    /* can use VS_RANGE, PS_RANGE registers*/
74    unsigned has_shader_range_registers : 1;
75    /* has the new sin/cos/log functions */
76    unsigned has_new_transcendentals : 1;
77    /* has the new dp2/dpX_norm instructions, among others */
78    unsigned has_halti2_instructions : 1;
79    /* supports single-buffer rendering with multiple pixel pipes */
80    unsigned single_buffer : 1;
81    /* has unified uniforms memory */
82    unsigned has_unified_uniforms : 1;
83    /* can load shader instructions from memory */
84    unsigned has_icache : 1;
85    /* ASTC texture support (and has associated states) */
86    unsigned tex_astc : 1;
87    /* has BLT engine instead of RS */
88    unsigned use_blt : 1;
89    /* can use any kind of wrapping mode on npot textures */
90    unsigned npot_tex_any_wrap;
91    /* number of bits per TS tile */
92    unsigned bits_per_tile;
93    /* clear value for TS (dependent on bits_per_tile) */
94    uint32_t ts_clear_value;
95    /* base of vertex texture units */
96    unsigned vertex_sampler_offset;
97    /* number of fragment sampler units */
98    unsigned fragment_sampler_count;
99    /* number of vertex sampler units */
100    unsigned vertex_sampler_count;
101    /* size of vertex shader output buffer */
102    unsigned vertex_output_buffer_size;
103    /* maximum number of vertex element configurations */
104    unsigned vertex_max_elements;
105    /* size of a cached vertex (?) */
106    unsigned vertex_cache_size;
107    /* number of shader cores */
108    unsigned shader_core_count;
109    /* number of vertex streams */
110    unsigned stream_count;
111    /* vertex shader memory address*/
112    uint32_t vs_offset;
113    /* pixel shader memory address*/
114    uint32_t ps_offset;
115    /* vertex shader uniforms address*/
116    uint32_t vs_uniforms_offset;
117    /* pixel shader uniforms address*/
118    uint32_t ps_uniforms_offset;
119    /* vertex/fragment shader max instructions */
120    uint32_t max_instructions;
121    /* maximum number of varyings */
122    unsigned max_varyings;
123    /* maximum number of registers */
124    unsigned max_registers;
125    /* maximum vertex uniforms */
126    unsigned max_vs_uniforms;
127    /* maximum pixel uniforms */
128    unsigned max_ps_uniforms;
129    /* maximum texture size */
130    unsigned max_texture_size;
131    /* maximum texture size */
132    unsigned max_rendertarget_size;
133    /* available pixel pipes */
134    unsigned pixel_pipes;
135    /* number of constants */
136    unsigned num_constants;
137 };
138 
139 /* Compiled Gallium state. All the different compiled state atoms are woven
140  * together and uploaded only when it is necessary to synchronize the state,
141  * for example before rendering. */
142 
143 /* Compiled pipe_blend_color */
144 struct compiled_blend_color {
145    float color[4];
146    uint32_t PE_ALPHA_BLEND_COLOR;
147 };
148 
149 /* Compiled pipe_stencil_ref */
150 struct compiled_stencil_ref {
151    uint32_t PE_STENCIL_CONFIG;
152    uint32_t PE_STENCIL_CONFIG_EXT;
153 };
154 
155 /* Compiled pipe_scissor_state */
156 struct compiled_scissor_state {
157    uint32_t SE_SCISSOR_LEFT;
158    uint32_t SE_SCISSOR_TOP;
159    uint32_t SE_SCISSOR_RIGHT;
160    uint32_t SE_SCISSOR_BOTTOM;
161    uint32_t SE_CLIP_RIGHT;
162    uint32_t SE_CLIP_BOTTOM;
163 };
164 
165 /* Compiled pipe_viewport_state */
166 struct compiled_viewport_state {
167    uint32_t PA_VIEWPORT_SCALE_X;
168    uint32_t PA_VIEWPORT_SCALE_Y;
169    uint32_t PA_VIEWPORT_SCALE_Z;
170    uint32_t PA_VIEWPORT_OFFSET_X;
171    uint32_t PA_VIEWPORT_OFFSET_Y;
172    uint32_t PA_VIEWPORT_OFFSET_Z;
173    uint32_t SE_SCISSOR_LEFT;
174    uint32_t SE_SCISSOR_TOP;
175    uint32_t SE_SCISSOR_RIGHT;
176    uint32_t SE_SCISSOR_BOTTOM;
177    uint32_t SE_CLIP_RIGHT;
178    uint32_t SE_CLIP_BOTTOM;
179    uint32_t PE_DEPTH_NEAR;
180    uint32_t PE_DEPTH_FAR;
181 };
182 
183 /* Compiled pipe_framebuffer_state */
184 struct compiled_framebuffer_state {
185    struct pipe_surface *cbuf, *zsbuf; /* keep reference to surfaces */
186    uint32_t GL_MULTI_SAMPLE_CONFIG;
187    uint32_t PE_COLOR_FORMAT;
188    uint32_t PE_DEPTH_CONFIG;
189    struct etna_reloc PE_DEPTH_ADDR;
190    struct etna_reloc PE_PIPE_DEPTH_ADDR[ETNA_MAX_PIXELPIPES];
191    uint32_t PE_DEPTH_STRIDE;
192    uint32_t PE_HDEPTH_CONTROL;
193    uint32_t PE_DEPTH_NORMALIZE;
194    struct etna_reloc PE_COLOR_ADDR;
195    struct etna_reloc PE_PIPE_COLOR_ADDR[ETNA_MAX_PIXELPIPES];
196    uint32_t PE_COLOR_STRIDE;
197    uint32_t SE_SCISSOR_LEFT;
198    uint32_t SE_SCISSOR_TOP;
199    uint32_t SE_SCISSOR_RIGHT;
200    uint32_t SE_SCISSOR_BOTTOM;
201    uint32_t SE_CLIP_RIGHT;
202    uint32_t SE_CLIP_BOTTOM;
203    uint32_t RA_MULTISAMPLE_UNK00E04;
204    uint32_t RA_MULTISAMPLE_UNK00E10[VIVS_RA_MULTISAMPLE_UNK00E10__LEN];
205    uint32_t RA_CENTROID_TABLE[VIVS_RA_CENTROID_TABLE__LEN];
206    uint32_t TS_MEM_CONFIG;
207    uint32_t TS_DEPTH_CLEAR_VALUE;
208    struct etna_reloc TS_DEPTH_STATUS_BASE;
209    struct etna_reloc TS_DEPTH_SURFACE_BASE;
210    uint32_t TS_COLOR_CLEAR_VALUE;
211    struct etna_reloc TS_COLOR_STATUS_BASE;
212    struct etna_reloc TS_COLOR_SURFACE_BASE;
213    uint32_t PE_LOGIC_OP;
214    bool msaa_mode; /* adds input (and possible temp) to PS */
215 };
216 
217 /* Compiled context->create_vertex_elements_state */
218 struct compiled_vertex_elements_state {
219    unsigned num_elements;
220    uint32_t FE_VERTEX_ELEMENT_CONFIG[VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN];
221    uint32_t NFE_GENERIC_ATTRIB_CONFIG0[VIVS_NFE_GENERIC_ATTRIB__LEN];
222    uint32_t NFE_GENERIC_ATTRIB_SCALE[VIVS_NFE_GENERIC_ATTRIB__LEN];
223    uint32_t NFE_GENERIC_ATTRIB_CONFIG1[VIVS_NFE_GENERIC_ATTRIB__LEN];
224 };
225 
226 /* Compiled context->set_vertex_buffer result */
227 struct compiled_set_vertex_buffer {
228    uint32_t FE_VERTEX_STREAM_CONTROL;
229    uint32_t FE_VERTEX_STREAM_UNK14680;
230    struct etna_reloc FE_VERTEX_STREAM_BASE_ADDR;
231 };
232 
233 /* Compiled linked VS+PS shader state */
234 struct compiled_shader_state {
235    uint32_t RA_CONTROL;
236    uint32_t PA_ATTRIBUTE_ELEMENT_COUNT;
237    uint32_t PA_CONFIG;
238    uint32_t PA_SHADER_ATTRIBUTES[VIVS_PA_SHADER_ATTRIBUTES__LEN];
239    uint32_t VS_END_PC;
240    uint32_t VS_OUTPUT_COUNT; /* number of outputs if point size per vertex disabled */
241    uint32_t VS_OUTPUT_COUNT_PSIZE; /* number of outputs of point size per vertex enabled */
242    uint32_t VS_INPUT_COUNT;
243    uint32_t VS_TEMP_REGISTER_CONTROL;
244    uint32_t VS_OUTPUT[4];
245    uint32_t VS_INPUT[4];
246    uint32_t VS_LOAD_BALANCING;
247    uint32_t VS_START_PC;
248    uint32_t PS_END_PC;
249    uint32_t PS_OUTPUT_REG;
250    uint32_t PS_INPUT_COUNT;
251    uint32_t PS_INPUT_COUNT_MSAA; /* Adds an input */
252    uint32_t PS_TEMP_REGISTER_CONTROL;
253    uint32_t PS_TEMP_REGISTER_CONTROL_MSAA; /* Adds a temporary if needed to make space for extra input */
254    uint32_t PS_CONTROL;
255    uint32_t PS_START_PC;
256    uint32_t GL_VARYING_TOTAL_COMPONENTS;
257    uint32_t GL_VARYING_NUM_COMPONENTS;
258    uint32_t GL_VARYING_COMPONENT_USE[2];
259    uint32_t GL_HALTI5_SH_SPECIALS;
260    unsigned vs_inst_mem_size;
261    unsigned vs_uniforms_size;
262    unsigned ps_inst_mem_size;
263    unsigned ps_uniforms_size;
264    uint32_t *VS_INST_MEM;
265    uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
266    uint32_t *PS_INST_MEM;
267    uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
268    struct etna_reloc PS_INST_ADDR;
269    struct etna_reloc VS_INST_ADDR;
270 };
271 
272 /* state of some 3d and common registers relevant to etna driver */
273 struct etna_3d_state {
274    unsigned vs_uniforms_size;
275    unsigned ps_uniforms_size;
276 
277    uint32_t /*01008*/ PS_INPUT_COUNT;
278    uint32_t /*0100C*/ PS_TEMP_REGISTER_CONTROL;
279    uint32_t /*03818*/ GL_MULTI_SAMPLE_CONFIG;
280    uint32_t /*05000*/ VS_UNIFORMS[VIVS_VS_UNIFORMS__LEN];
281    uint32_t /*07000*/ PS_UNIFORMS[VIVS_PS_UNIFORMS__LEN];
282 };
283 
284 /* Helpers to assist creating and setting bitarrays (eg, for varyings).
285  * field_size must be a power of two, and <= 32. */
286 #define DEFINE_ETNA_BITARRAY(name, num, field_size) \
287    uint32_t name[(num) * (field_size) / 32]
288 
289 static inline void
etna_bitarray_set(uint32_t * array,size_t array_size,size_t field_size,size_t index,uint32_t value)290 etna_bitarray_set(uint32_t *array, size_t array_size, size_t field_size,
291                   size_t index, uint32_t value)
292 {
293    size_t shift = (index * field_size) % 32;
294    size_t offset = (index * field_size) / 32;
295 
296    assert(index < array_size * 32 / field_size);
297    assert(value < 1 << field_size);
298 
299    array[offset] |= value << shift;
300 }
301 
302 #define etna_bitarray_set(array, field_size, index, value) \
303    etna_bitarray_set((array), ARRAY_SIZE(array), field_size, index, value)
304 
305 #endif
306