1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #ifndef RADEON_CODE_H
24 #define RADEON_CODE_H
25 
26 #include <stdint.h>
27 
28 #define R300_PFS_MAX_ALU_INST     64
29 #define R300_PFS_MAX_TEX_INST     32
30 #define R300_PFS_MAX_TEX_INDIRECT 4
31 #define R300_PFS_NUM_TEMP_REGS    32
32 #define R300_PFS_NUM_CONST_REGS   32
33 
34 #define R400_PFS_MAX_ALU_INST     512
35 #define R400_PFS_MAX_TEX_INST     512
36 
37 #define R500_PFS_MAX_INST         512
38 #define R500_PFS_NUM_TEMP_REGS    128
39 #define R500_PFS_NUM_CONST_REGS   256
40 #define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
41 #define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
42 
43 /* The r500 maximum depth is not just for loops, but any combination of loops
44  * and subroutine jumps. */
45 #define R500_PVS_MAX_LOOP_DEPTH 8
46 
47 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
48 
49 enum {
50 	/**
51 	 * External constants are constants whose meaning is unknown to this
52 	 * compiler. For example, a Mesa gl_program's constants are turned
53 	 * into external constants.
54 	 */
55 	RC_CONSTANT_EXTERNAL = 0,
56 
57 	RC_CONSTANT_IMMEDIATE,
58 
59 	/**
60 	 * Constant referring to state that is known by this compiler,
61 	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
62 	 */
63 	RC_CONSTANT_STATE
64 };
65 
66 enum {
67 	RC_STATE_SHADOW_AMBIENT = 0,
68 
69 	RC_STATE_R300_WINDOW_DIMENSION,
70 	RC_STATE_R300_TEXRECT_FACTOR,
71 	RC_STATE_R300_TEXSCALE_FACTOR,
72 	RC_STATE_R300_VIEWPORT_SCALE,
73 	RC_STATE_R300_VIEWPORT_OFFSET
74 };
75 
76 struct rc_constant {
77 	unsigned Type:2; /**< RC_CONSTANT_xxx */
78 	unsigned Size:3;
79 
80 	union {
81 		unsigned External;
82 		float Immediate[4];
83 		unsigned State[2];
84 	} u;
85 };
86 
87 struct rc_constant_list {
88 	struct rc_constant * Constants;
89 	unsigned Count;
90 
91 	unsigned _Reserved;
92 };
93 
94 void rc_constants_init(struct rc_constant_list * c);
95 void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
96 void rc_constants_destroy(struct rc_constant_list * c);
97 unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
98 unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
99 unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
100 unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
101 void rc_constants_print(struct rc_constant_list * c);
102 
103 /**
104  * Compare functions.
105  *
106  * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
107  * the correct GL compare function.
108  */
109 typedef enum {
110 	RC_COMPARE_FUNC_NEVER = 0,
111 	RC_COMPARE_FUNC_LESS,
112 	RC_COMPARE_FUNC_EQUAL,
113 	RC_COMPARE_FUNC_LEQUAL,
114 	RC_COMPARE_FUNC_GREATER,
115 	RC_COMPARE_FUNC_NOTEQUAL,
116 	RC_COMPARE_FUNC_GEQUAL,
117 	RC_COMPARE_FUNC_ALWAYS
118 } rc_compare_func;
119 
120 /**
121  * Coordinate wrapping modes.
122  *
123  * These are not quite the same as their GL counterparts yet.
124  */
125 typedef enum {
126 	RC_WRAP_NONE = 0,
127 	RC_WRAP_REPEAT,
128 	RC_WRAP_MIRRORED_REPEAT,
129 	RC_WRAP_MIRRORED_CLAMP
130 } rc_wrap_mode;
131 
132 /**
133  * Stores state that influences the compilation of a fragment program.
134  */
135 struct r300_fragment_program_external_state {
136 	struct {
137 		/**
138 		 * This field contains swizzle for some lowering passes
139 		 * (shadow comparison, unorm->snorm conversion)
140 		 */
141 		unsigned texture_swizzle:12;
142 
143 		/**
144 		 * If the sampler is used as a shadow sampler,
145 		 * this field specifies the compare function.
146 		 *
147 		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
148 		 * \sa rc_compare_func
149 		 */
150 		unsigned texture_compare_func : 3;
151 
152 		/**
153 		 * No matter what the sampler type is,
154 		 * this field turns it into a shadow sampler.
155 		 */
156 		unsigned compare_mode_enabled : 1;
157 
158 		/**
159 		 * If the sampler will receive non-normalized coords,
160 		 * this field is set. The scaling factor is given by
161 		 * RC_STATE_R300_TEXRECT_FACTOR.
162 		 */
163 		unsigned non_normalized_coords : 1;
164 
165 		/**
166 		 * This field specifies wrapping modes for the sampler.
167 		 *
168 		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
169 		 * will be performed on the coordinates.
170 		 */
171 		unsigned wrap_mode : 3;
172 
173 		/**
174 		 * The coords are scaled after applying the wrap mode emulation
175 		 * and right before texture fetch. The scaling factor is given by
176 		 * RC_STATE_R300_TEXSCALE_FACTOR. */
177 		unsigned clamp_and_scale_before_fetch : 1;
178 
179 		/**
180 		 * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
181 		 * in the shader.
182 		 */
183 		unsigned convert_unorm_to_snorm:1;
184 	} unit[16];
185 };
186 
187 
188 
189 struct r300_fragment_program_node {
190 	int tex_offset; /**< first tex instruction */
191 	int tex_end; /**< last tex instruction, relative to tex_offset */
192 	int alu_offset; /**< first ALU instruction */
193 	int alu_end; /**< last ALU instruction, relative to alu_offset */
194 	int flags;
195 };
196 
197 /**
198  * Stores an R300 fragment program in its compiled-to-hardware form.
199  */
200 struct r300_fragment_program_code {
201 	struct {
202 		unsigned int length; /**< total # of texture instructions used */
203 		uint32_t inst[R400_PFS_MAX_TEX_INST];
204 	} tex;
205 
206 	struct {
207 		unsigned int length; /**< total # of ALU instructions used */
208 		struct {
209 			uint32_t rgb_inst;
210 			uint32_t rgb_addr;
211 			uint32_t alpha_inst;
212 			uint32_t alpha_addr;
213 			uint32_t r400_ext_addr;
214 		} inst[R400_PFS_MAX_ALU_INST];
215 	} alu;
216 
217 	uint32_t config; /* US_CONFIG */
218 	uint32_t pixsize; /* US_PIXSIZE */
219 	uint32_t code_offset; /* US_CODE_OFFSET */
220 	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
221 	uint32_t code_addr[4]; /* US_CODE_ADDR */
222 	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
223 	 * for r400 cards */
224 	unsigned int r390_mode:1;
225 };
226 
227 
228 struct r500_fragment_program_code {
229 	struct {
230 		uint32_t inst0;
231 		uint32_t inst1;
232 		uint32_t inst2;
233 		uint32_t inst3;
234 		uint32_t inst4;
235 		uint32_t inst5;
236 	} inst[R500_PFS_MAX_INST];
237 
238 	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
239 
240 	int max_temp_idx;
241 
242 	uint32_t us_fc_ctrl;
243 
244 	uint32_t int_constants[32];
245 	uint32_t int_constant_count;
246 };
247 
248 struct rX00_fragment_program_code {
249 	union {
250 		struct r300_fragment_program_code r300;
251 		struct r500_fragment_program_code r500;
252 	} code;
253 
254 	unsigned writes_depth:1;
255 
256 	struct rc_constant_list constants;
257 	unsigned *constants_remap_table;
258 };
259 
260 
261 #define R300_VS_MAX_ALU		256
262 #define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
263 #define R500_VS_MAX_ALU	        1024
264 #define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
265 #define R300_VS_MAX_TEMPS	32
266 /* This is the max for all chipsets (r300-r500) */
267 #define R300_VS_MAX_FC_OPS 16
268 #define R300_VS_MAX_LOOP_DEPTH 1
269 
270 #define VSF_MAX_INPUTS 32
271 #define VSF_MAX_OUTPUTS 32
272 
273 struct r300_vertex_program_code {
274 	int length;
275 	union {
276 		uint32_t d[R500_VS_MAX_ALU_DWORDS];
277 		float f[R500_VS_MAX_ALU_DWORDS];
278 	} body;
279 
280 	int pos_end;
281 	int num_temporaries;	/* Number of temp vars used by program */
282 	int inputs[VSF_MAX_INPUTS];
283 	int outputs[VSF_MAX_OUTPUTS];
284 
285 	struct rc_constant_list constants;
286 	unsigned *constants_remap_table;
287 
288 	uint32_t InputsRead;
289 	uint32_t OutputsWritten;
290 
291 	unsigned int num_fc_ops;
292 	uint32_t fc_ops;
293 	union {
294 	        uint32_t r300[R300_VS_MAX_FC_OPS];
295 		struct {
296 			uint32_t lw;
297 			uint32_t uw;
298 		} r500[R300_VS_MAX_FC_OPS];
299 	} fc_op_addrs;
300 	int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
301 };
302 
303 #endif /* RADEON_CODE_H */
304 
305