1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <assert.h>
29 #include <stdio.h>
30 
31 #include "pipe/p_video_codec.h"
32 
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
35 
36 #include "vl/vl_mpeg12_decoder.h"
37 
38 #include "radeonsi/si_pipe.h"
39 #include "radeon_video.h"
40 #include "radeon_vcn_dec.h"
41 
42 #define FB_BUFFER_OFFSET		0x1000
43 #define FB_BUFFER_SIZE			2048
44 #define IT_SCALING_TABLE_SIZE		992
45 #define RDECODE_SESSION_CONTEXT_SIZE	(128 * 1024)
46 
47 #define RDECODE_GPCOM_VCPU_CMD		0x2070c
48 #define RDECODE_GPCOM_VCPU_DATA0	0x20710
49 #define RDECODE_GPCOM_VCPU_DATA1	0x20714
50 #define RDECODE_ENGINE_CNTL		0x20718
51 
52 #define NUM_BUFFERS			4
53 #define NUM_MPEG2_REFS			6
54 #define NUM_H264_REFS			17
55 #define NUM_VC1_REFS			5
56 
57 struct radeon_decoder {
58 	struct pipe_video_codec		base;
59 
60 	unsigned			stream_handle;
61 	unsigned			stream_type;
62 	unsigned			frame_number;
63 
64 	struct pipe_screen		*screen;
65 	struct radeon_winsys		*ws;
66 	struct radeon_winsys_cs		*cs;
67 
68 	void				*msg;
69 	uint32_t			*fb;
70 	uint8_t				*it;
71 	void				*bs_ptr;
72 
73 	struct rvid_buffer		msg_fb_it_buffers[NUM_BUFFERS];
74 	struct rvid_buffer		bs_buffers[NUM_BUFFERS];
75 	struct rvid_buffer		dpb;
76 	struct rvid_buffer		ctx;
77 	struct rvid_buffer		sessionctx;
78 
79 	unsigned			bs_size;
80 	unsigned			cur_buffer;
81 	void				*render_pic_list[16];
82 };
83 
get_h264_msg(struct radeon_decoder * dec,struct pipe_h264_picture_desc * pic)84 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
85 		struct pipe_h264_picture_desc *pic)
86 {
87 	rvcn_dec_message_avc_t result;
88 
89 	memset(&result, 0, sizeof(result));
90 	switch (pic->base.profile) {
91 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
92 		result.profile = RDECODE_H264_PROFILE_BASELINE;
93 		break;
94 
95 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
96 		result.profile = RDECODE_H264_PROFILE_MAIN;
97 		break;
98 
99 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
100 		result.profile = RDECODE_H264_PROFILE_HIGH;
101 		break;
102 
103 	default:
104 		assert(0);
105 		break;
106 	}
107 
108 	result.level = dec->base.level;
109 
110 	result.sps_info_flags = 0;
111 	result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
112 	result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
113 	result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
114 	result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
115 	result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
116 
117 	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
118 	result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
119 	result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
120 	result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
121 	result.log2_max_pic_order_cnt_lsb_minus4 =
122 		pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
123 
124 	switch (dec->base.chroma_format) {
125 	case PIPE_VIDEO_CHROMA_FORMAT_NONE:
126 		break;
127 	case PIPE_VIDEO_CHROMA_FORMAT_400:
128 		result.chroma_format = 0;
129 		break;
130 	case PIPE_VIDEO_CHROMA_FORMAT_420:
131 		result.chroma_format = 1;
132 		break;
133 	case PIPE_VIDEO_CHROMA_FORMAT_422:
134 		result.chroma_format = 2;
135 		break;
136 	case PIPE_VIDEO_CHROMA_FORMAT_444:
137 		result.chroma_format = 3;
138 		break;
139 	}
140 
141 	result.pps_info_flags = 0;
142 	result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
143 	result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
144 	result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
145 	result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
146 	result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
147 	result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
148 	result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
149 	result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
150 
151 	result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
152 	result.slice_group_map_type = pic->pps->slice_group_map_type;
153 	result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
154 	result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
155 	result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
156 	result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
157 
158 	memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
159 	memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
160 
161 	memcpy(dec->it, result.scaling_list_4x4, 6*16);
162 	memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
163 
164 	result.num_ref_frames = pic->num_ref_frames;
165 
166 	result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
167 	result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
168 
169 	result.frame_num = pic->frame_num;
170 	memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
171 	result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
172 	result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
173 	memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
174 
175 	result.decoded_pic_idx = pic->frame_num;
176 
177 	return result;
178 }
179 
radeon_dec_destroy_associated_data(void * data)180 static void radeon_dec_destroy_associated_data(void *data)
181 {
182 	/* NOOP, since we only use an intptr */
183 }
184 
get_h265_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h265_picture_desc * pic)185 static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
186 					struct pipe_video_buffer *target,
187 					struct pipe_h265_picture_desc *pic)
188 {
189 	rvcn_dec_message_hevc_t result;
190 	unsigned i, j;
191 
192 	memset(&result, 0, sizeof(result));
193 	result.sps_info_flags = 0;
194 	result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
195 	result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
196 	result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
197 	result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
198 	result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
199 	result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
200 	result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
201 	result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
202 	result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
203 	if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO)
204 		result.sps_info_flags |= 1 << 9;
205 	if (pic->UseRefPicList == true)
206 		result.sps_info_flags |= 1 << 10;
207 
208 	result.chroma_format = pic->pps->sps->chroma_format_idc;
209 	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
210 	result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
211 	result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
212 	result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
213 	result.log2_min_luma_coding_block_size_minus3 =
214 		pic->pps->sps->log2_min_luma_coding_block_size_minus3;
215 	result.log2_diff_max_min_luma_coding_block_size =
216 		pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
217 	result.log2_min_transform_block_size_minus2 =
218 		pic->pps->sps->log2_min_transform_block_size_minus2;
219 	result.log2_diff_max_min_transform_block_size =
220 		pic->pps->sps->log2_diff_max_min_transform_block_size;
221 	result.max_transform_hierarchy_depth_inter =
222 		pic->pps->sps->max_transform_hierarchy_depth_inter;
223 	result.max_transform_hierarchy_depth_intra =
224 		pic->pps->sps->max_transform_hierarchy_depth_intra;
225 	result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
226 	result.pcm_sample_bit_depth_chroma_minus1 =
227 		pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
228 	result.log2_min_pcm_luma_coding_block_size_minus3 =
229 		pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
230 	result.log2_diff_max_min_pcm_luma_coding_block_size =
231 		pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
232 	result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
233 
234 	result.pps_info_flags = 0;
235 	result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
236 	result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
237 	result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
238 	result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
239 	result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
240 	result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
241 	result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
242 	result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
243 	result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
244 	result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
245 	result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
246 	result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
247 	result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
248 	result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
249 	result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
250 	result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
251 	result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
252 	result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
253 	result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
254 	result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
255 
256 	result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
257 	result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
258 	result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
259 	result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
260 	result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
261 	result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
262 	result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
263 	result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
264 	result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
265 	result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
266 	result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
267 	result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
268 	result.init_qp_minus26 = pic->pps->init_qp_minus26;
269 
270 	for (i = 0; i < 19; ++i)
271 		result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
272 
273 	for (i = 0; i < 21; ++i)
274 		result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
275 
276 	result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
277 	result.curr_poc = pic->CurrPicOrderCntVal;
278 
279 	for (i = 0 ; i < 16 ; i++) {
280 		for (j = 0; (pic->ref[j] != NULL) && (j < 16) ; j++) {
281 			if (dec->render_pic_list[i] == pic->ref[j])
282 				break;
283 			if (j == 15)
284 				dec->render_pic_list[i] = NULL;
285 			else if (pic->ref[j+1] == NULL)
286 				dec->render_pic_list[i] = NULL;
287 		}
288 	}
289 	for (i = 0 ; i < 16 ; i++) {
290 		if (dec->render_pic_list[i] == NULL) {
291 			dec->render_pic_list[i] = target;
292 			result.curr_idx = i;
293 			break;
294 		}
295 	}
296 
297 	vl_video_buffer_set_associated_data(target, &dec->base,
298 					    (void *)(uintptr_t)result.curr_idx,
299 					    &radeon_dec_destroy_associated_data);
300 
301 	for (i = 0; i < 16; ++i) {
302 		struct pipe_video_buffer *ref = pic->ref[i];
303 		uintptr_t ref_pic = 0;
304 
305 		result.poc_list[i] = pic->PicOrderCntVal[i];
306 
307 		if (ref)
308 			ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
309 		else
310 			ref_pic = 0x7F;
311 		result.ref_pic_list[i] = ref_pic;
312 	}
313 
314 	for (i = 0; i < 8; ++i) {
315 		result.ref_pic_set_st_curr_before[i] = 0xFF;
316 		result.ref_pic_set_st_curr_after[i] = 0xFF;
317 		result.ref_pic_set_lt_curr[i] = 0xFF;
318 	}
319 
320 	for (i = 0; i < pic->NumPocStCurrBefore; ++i)
321 		result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
322 
323 	for (i = 0; i < pic->NumPocStCurrAfter; ++i)
324 		result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
325 
326 	for (i = 0; i < pic->NumPocLtCurr; ++i)
327 		result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
328 
329 	for (i = 0; i < 6; ++i)
330 		result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
331 
332 	for (i = 0; i < 2; ++i)
333 		result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
334 
335 	memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
336 	memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
337 	memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
338 	memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
339 
340 	for (i = 0 ; i < 2 ; i++) {
341 		for (j = 0 ; j < 15 ; j++)
342 			result.direct_reflist[i][j] = pic->RefPicList[i][j];
343 	}
344 
345 	if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
346 		if (target->buffer_format == PIPE_FORMAT_P016) {
347 			result.p010_mode = 1;
348 			result.msb_mode = 1;
349 		} else {
350 			result.p010_mode = 0;
351 			result.luma_10to8 = 5;
352 			result.chroma_10to8 = 5;
353 			result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
354 			result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
355 		}
356 	}
357 
358 	return result;
359 }
360 
calc_ctx_size_h265_main(struct radeon_decoder * dec)361 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
362 {
363 	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
364 	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
365 
366 	unsigned max_references = dec->base.max_references + 1;
367 
368 	if (dec->base.width * dec->base.height >= 4096*2000)
369 		max_references = MAX2(max_references, 8);
370 	else
371 		max_references = MAX2(max_references, 17);
372 
373 	width = align (width, 16);
374 	height = align (height, 16);
375 	return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
376 }
377 
calc_ctx_size_h265_main10(struct radeon_decoder * dec,struct pipe_h265_picture_desc * pic)378 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic)
379 {
380 	unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
381 	unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
382 	unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
383 
384 	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
385 	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
386 	unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 ||
387 			pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
388 
389 	unsigned max_references = dec->base.max_references + 1;
390 
391 	if (dec->base.width * dec->base.height >= 4096*2000)
392 		max_references = MAX2(max_references, 8);
393 	else
394 		max_references = MAX2(max_references, 17);
395 
396 	block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
397 	log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
398 
399 	width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
400 	height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
401 
402 	num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
403 	context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
404 	max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
405 
406 	cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
407 	db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
408 
409 	return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
410 }
411 
get_vc1_msg(struct pipe_vc1_picture_desc * pic)412 static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic)
413 {
414 	rvcn_dec_message_vc1_t result;
415 
416 	memset(&result, 0, sizeof(result));
417 	switch(pic->base.profile) {
418 	case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
419 		result.profile = RDECODE_VC1_PROFILE_SIMPLE;
420 		result.level = 1;
421 		break;
422 
423 	case PIPE_VIDEO_PROFILE_VC1_MAIN:
424 		result.profile = RDECODE_VC1_PROFILE_MAIN;
425 		result.level = 2;
426 		break;
427 
428 	case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
429 		result.profile = RDECODE_VC1_PROFILE_ADVANCED;
430 		result.level = 4;
431 		break;
432 
433 	default:
434 		assert(0);
435 	}
436 
437 	result.sps_info_flags |= pic->postprocflag << 7;
438 	result.sps_info_flags |= pic->pulldown << 6;
439 	result.sps_info_flags |= pic->interlace << 5;
440 	result.sps_info_flags |= pic->tfcntrflag << 4;
441 	result.sps_info_flags |= pic->finterpflag << 3;
442 	result.sps_info_flags |= pic->psf << 1;
443 
444 	result.pps_info_flags |= pic->range_mapy_flag << 31;
445 	result.pps_info_flags |= pic->range_mapy << 28;
446 	result.pps_info_flags |= pic->range_mapuv_flag << 27;
447 	result.pps_info_flags |= pic->range_mapuv << 24;
448 	result.pps_info_flags |= pic->multires << 21;
449 	result.pps_info_flags |= pic->maxbframes << 16;
450 	result.pps_info_flags |= pic->overlap << 11;
451 	result.pps_info_flags |= pic->quantizer << 9;
452 	result.pps_info_flags |= pic->panscan_flag << 7;
453 	result.pps_info_flags |= pic->refdist_flag << 6;
454 	result.pps_info_flags |= pic->vstransform << 0;
455 
456 	if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
457 		result.pps_info_flags |= pic->syncmarker << 20;
458 		result.pps_info_flags |= pic->rangered << 19;
459 		result.pps_info_flags |= pic->loopfilter << 5;
460 		result.pps_info_flags |= pic->fastuvmc << 4;
461 		result.pps_info_flags |= pic->extended_mv << 3;
462 		result.pps_info_flags |= pic->extended_dmv << 8;
463 		result.pps_info_flags |= pic->dquant << 1;
464 	}
465 
466 	result.chroma_format = 1;
467 
468 	return result;
469 }
470 
get_ref_pic_idx(struct radeon_decoder * dec,struct pipe_video_buffer * ref)471 static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref)
472 {
473 	uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
474 	uint32_t max = MAX2(dec->frame_number, 1) - 1;
475 	uintptr_t frame;
476 
477 	/* seems to be the most sane fallback */
478 	if (!ref)
479 		return max;
480 
481 	/* get the frame number from the associated data */
482 	frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
483 
484 	/* limit the frame number to a valid range */
485 	return MAX2(MIN2(frame, max), min);
486 }
487 
get_mpeg2_msg(struct radeon_decoder * dec,struct pipe_mpeg12_picture_desc * pic)488 static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec,
489 				       struct pipe_mpeg12_picture_desc *pic)
490 {
491 	const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
492 	rvcn_dec_message_mpeg2_vld_t	result;
493 	unsigned i;
494 
495 	memset(&result, 0, sizeof(result));
496 	result.decoded_pic_idx = dec->frame_number;
497 
498 	result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
499 	result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
500 
501 	if(pic->intra_matrix) {
502 		result.load_intra_quantiser_matrix = 1;
503 		for (i = 0; i < 64; ++i) {
504 			result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
505 		}
506 	}
507 	if(pic->non_intra_matrix) {
508 		result.load_nonintra_quantiser_matrix = 1;
509 		for (i = 0; i < 64; ++i) {
510 			result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
511 		}
512 	}
513 
514 	result.profile_and_level_indication = 0;
515 	result.chroma_format = 0x1;
516 
517 	result.picture_coding_type = pic->picture_coding_type;
518 	result.f_code[0][0] = pic->f_code[0][0] + 1;
519 	result.f_code[0][1] = pic->f_code[0][1] + 1;
520 	result.f_code[1][0] = pic->f_code[1][0] + 1;
521 	result.f_code[1][1] = pic->f_code[1][1] + 1;
522 	result.intra_dc_precision = pic->intra_dc_precision;
523 	result.pic_structure = pic->picture_structure;
524 	result.top_field_first = pic->top_field_first;
525 	result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
526 	result.concealment_motion_vectors = pic->concealment_motion_vectors;
527 	result.q_scale_type = pic->q_scale_type;
528 	result.intra_vlc_format = pic->intra_vlc_format;
529 	result.alternate_scan = pic->alternate_scan;
530 
531 	return result;
532 }
533 
get_mpeg4_msg(struct radeon_decoder * dec,struct pipe_mpeg4_picture_desc * pic)534 static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec,
535 				       struct pipe_mpeg4_picture_desc *pic)
536 {
537 	rvcn_dec_message_mpeg4_asp_vld_t result;
538 	unsigned i;
539 
540 	memset(&result, 0, sizeof(result));
541 	result.decoded_pic_idx = dec->frame_number;
542 
543 	result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
544 	result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
545 
546 	result.variant_type = 0;
547 	result.profile_and_level_indication = 0xF0;
548 
549 	result.video_object_layer_verid = 0x5;
550 	result.video_object_layer_shape = 0x0;
551 
552 	result.video_object_layer_width = dec->base.width;
553 	result.video_object_layer_height = dec->base.height;
554 
555 	result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
556 
557 	result.short_video_header = pic->short_video_header;
558 	result.interlaced = pic->interlaced;
559 	result.load_intra_quant_mat = 1;
560 	result.load_nonintra_quant_mat = 1;
561 	result.quarter_sample = pic->quarter_sample;
562 	result.complexity_estimation_disable = 1;
563 	result.resync_marker_disable = pic->resync_marker_disable;
564 	result.newpred_enable = 0;
565 	result.reduced_resolution_vop_enable = 0;
566 
567 	result.quant_type = pic->quant_type;
568 
569 	for (i = 0; i < 64; ++i) {
570 		result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
571 		result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
572 	}
573 
574 	return result;
575 }
576 
rvcn_dec_message_create(struct radeon_decoder * dec)577 static void rvcn_dec_message_create(struct radeon_decoder *dec)
578 {
579 	rvcn_dec_message_header_t *header = dec->msg;
580 	rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t);
581 	unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
582 
583 	memset(dec->msg, 0, sizes);
584 	header->header_size = sizeof(rvcn_dec_message_header_t);
585 	header->total_size = sizes;
586 	header->num_buffers = 1;
587 	header->msg_type = RDECODE_MSG_CREATE;
588 	header->stream_handle = dec->stream_handle;
589 	header->status_report_feedback_number = 0;
590 
591 	header->index[0].message_id = RDECODE_MESSAGE_CREATE;
592 	header->index[0].offset = sizeof(rvcn_dec_message_header_t);
593 	header->index[0].size = sizeof(rvcn_dec_message_create_t);
594 	header->index[0].filled = 0;
595 
596 	create->stream_type = dec->stream_type;
597 	create->session_flags = 0;
598 	create->width_in_samples = dec->base.width;
599 	create->height_in_samples = dec->base.height;
600 }
601 
rvcn_dec_message_decode(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)602 static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
603 					struct pipe_video_buffer *target,
604 					struct pipe_picture_desc *picture)
605 {
606 	struct r600_texture *luma = (struct r600_texture *)
607 				((struct vl_video_buffer *)target)->resources[0];
608 	struct r600_texture *chroma = (struct r600_texture *)
609 				((struct vl_video_buffer *)target)->resources[1];
610 	rvcn_dec_message_header_t *header;
611 	rvcn_dec_message_index_t *index;
612 	rvcn_dec_message_decode_t *decode;
613 	unsigned sizes = 0, offset_decode, offset_codec;
614 	void *codec;
615 
616 	header = dec->msg;
617 	sizes += sizeof(rvcn_dec_message_header_t);
618 	index = (void*)header + sizeof(rvcn_dec_message_header_t);
619 	sizes += sizeof(rvcn_dec_message_index_t);
620 	offset_decode = sizes;
621 	decode = (void*)index + sizeof(rvcn_dec_message_index_t);
622 	sizes += sizeof(rvcn_dec_message_decode_t);
623 	offset_codec = sizes;
624 	codec = (void*)decode + sizeof(rvcn_dec_message_decode_t);
625 
626 	memset(dec->msg, 0, sizes);
627 	header->header_size = sizeof(rvcn_dec_message_header_t);
628 	header->total_size = sizes;
629 	header->num_buffers = 2;
630 	header->msg_type = RDECODE_MSG_DECODE;
631 	header->stream_handle = dec->stream_handle;
632 	header->status_report_feedback_number = dec->frame_number;
633 
634 	header->index[0].message_id = RDECODE_MESSAGE_DECODE;
635 	header->index[0].offset = offset_decode;
636 	header->index[0].size = sizeof(rvcn_dec_message_decode_t);
637 	header->index[0].filled = 0;
638 
639 	index->offset = offset_codec;
640 	index->size = sizeof(rvcn_dec_message_avc_t);
641 	index->filled = 0;
642 
643 	decode->stream_type = dec->stream_type;
644 	decode->decode_flags = 0x1;
645 	decode->width_in_samples = dec->base.width;
646 	decode->height_in_samples = dec->base.height;
647 
648 	decode->bsd_size = align(dec->bs_size, 128);
649 	decode->dpb_size = dec->dpb.res->buf->size;
650 	decode->dt_size =
651 		((struct r600_resource *)((struct vl_video_buffer *)target)->resources[0])->buf->size +
652 		((struct r600_resource *)((struct vl_video_buffer *)target)->resources[1])->buf->size;
653 
654 	decode->sct_size = 0;
655 	decode->sc_coeff_size = 0;
656 
657 	decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
658 	decode->db_pitch = align(dec->base.width, 32);
659 	decode->db_surf_tile_config = 0;
660 
661 	decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
662 	decode->dt_uv_pitch = decode->dt_pitch / 2;
663 
664 	decode->dt_tiling_mode = 0;
665 	decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR;
666 	decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR;
667 	decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced;
668 	decode->dt_surf_tile_config = 0;
669 	decode->dt_uv_surf_tile_config = 0;
670 
671 	decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
672 	decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
673 	if (decode->dt_field_mode) {
674 		decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset +
675 				luma->surface.u.gfx9.surf_slice_size;
676 		decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset +
677 				chroma->surface.u.gfx9.surf_slice_size;
678 	} else {
679 		decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
680 		decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
681 	}
682 
683 	switch (u_reduce_video_profile(picture->profile)) {
684 	case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
685 		rvcn_dec_message_avc_t avc =
686 			get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
687 		memcpy(codec, (void*)&avc, sizeof(rvcn_dec_message_avc_t));
688 		index->message_id = RDECODE_MESSAGE_AVC;
689 		break;
690 	}
691 	case PIPE_VIDEO_FORMAT_HEVC: {
692 		rvcn_dec_message_hevc_t hevc =
693 			get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
694 
695 		memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t));
696 		index->message_id = RDECODE_MESSAGE_HEVC;
697 		if (dec->ctx.res == NULL) {
698 			unsigned ctx_size;
699 			if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
700 				ctx_size = calc_ctx_size_h265_main10(dec,
701 					(struct pipe_h265_picture_desc*)picture);
702 			else
703 				ctx_size = calc_ctx_size_h265_main(dec);
704 			if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
705 				RVID_ERR("Can't allocated context buffer.\n");
706 			si_vid_clear_buffer(dec->base.context, &dec->ctx);
707 		}
708 		break;
709 	}
710 	case PIPE_VIDEO_FORMAT_VC1: {
711 		rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
712 
713 		memcpy(codec, (void*)&vc1, sizeof(rvcn_dec_message_vc1_t));
714 		if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
715 		    (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
716 			decode->width_in_samples = align(decode->width_in_samples, 16) / 16;
717 			decode->height_in_samples = align(decode->height_in_samples, 16) / 16;
718 		}
719 		index->message_id = RDECODE_MESSAGE_VC1;
720 		break;
721 
722 	}
723 	case PIPE_VIDEO_FORMAT_MPEG12: {
724 		rvcn_dec_message_mpeg2_vld_t mpeg2 =
725 			get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
726 
727 		memcpy(codec, (void*)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t));
728 		index->message_id = RDECODE_MESSAGE_MPEG2_VLD;
729 		break;
730 	}
731 	case PIPE_VIDEO_FORMAT_MPEG4: {
732 		rvcn_dec_message_mpeg4_asp_vld_t mpeg4 =
733 			get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
734 
735 		memcpy(codec, (void*)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t));
736 		index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD;
737 		break;
738 	}
739 	default:
740 		assert(0);
741 		return NULL;
742 	}
743 
744 	if (dec->ctx.res)
745 		decode->hw_ctxt_size = dec->ctx.res->buf->size;
746 
747 	return luma->resource.buf;
748 }
749 
rvcn_dec_message_destroy(struct radeon_decoder * dec)750 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
751 {
752 	rvcn_dec_message_header_t *header = dec->msg;
753 
754 	memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
755 	header->header_size = sizeof(rvcn_dec_message_header_t);
756 	header->total_size = sizeof(rvcn_dec_message_header_t) -
757 			sizeof(rvcn_dec_message_index_t);
758 	header->num_buffers = 0;
759 	header->msg_type = RDECODE_MSG_DESTROY;
760 	header->stream_handle = dec->stream_handle;
761 	header->status_report_feedback_number = 0;
762 }
763 
rvcn_dec_message_feedback(struct radeon_decoder * dec)764 static void rvcn_dec_message_feedback(struct radeon_decoder *dec)
765 {
766 	rvcn_dec_feedback_header_t *header = (void*)dec->fb;
767 
768 	header->header_size = sizeof(rvcn_dec_feedback_header_t);
769 	header->total_size = sizeof(rvcn_dec_feedback_header_t);
770 	header->num_buffers = 0;
771 }
772 
773 /* flush IB to the hardware */
flush(struct radeon_decoder * dec,unsigned flags)774 static int flush(struct radeon_decoder *dec, unsigned flags)
775 {
776 	return dec->ws->cs_flush(dec->cs, flags, NULL);
777 }
778 
779 /* add a new set register command to the IB */
set_reg(struct radeon_decoder * dec,unsigned reg,uint32_t val)780 static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val)
781 {
782 	radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0));
783 	radeon_emit(dec->cs, val);
784 }
785 
786 /* send a command to the VCPU through the GPCOM registers */
send_cmd(struct radeon_decoder * dec,unsigned cmd,struct pb_buffer * buf,uint32_t off,enum radeon_bo_usage usage,enum radeon_bo_domain domain)787 static void send_cmd(struct radeon_decoder *dec, unsigned cmd,
788 		     struct pb_buffer* buf, uint32_t off,
789 		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
790 {
791 	uint64_t addr;
792 
793 	dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
794 			   domain, RADEON_PRIO_UVD);
795 	addr = dec->ws->buffer_get_virtual_address(buf);
796 	addr = addr + off;
797 
798 	set_reg(dec, RDECODE_GPCOM_VCPU_DATA0, addr);
799 	set_reg(dec, RDECODE_GPCOM_VCPU_DATA1, addr >> 32);
800 	set_reg(dec, RDECODE_GPCOM_VCPU_CMD, cmd << 1);
801 }
802 
803 /* do the codec needs an IT buffer ?*/
have_it(struct radeon_decoder * dec)804 static bool have_it(struct radeon_decoder *dec)
805 {
806 	return dec->stream_type == RDECODE_CODEC_H264_PERF ||
807 		dec->stream_type == RDECODE_CODEC_H265;
808 }
809 
810 /* map the next available message/feedback/itscaling buffer */
map_msg_fb_it_buf(struct radeon_decoder * dec)811 static void map_msg_fb_it_buf(struct radeon_decoder *dec)
812 {
813 	struct rvid_buffer* buf;
814 	uint8_t *ptr;
815 
816 	/* grab the current message/feedback buffer */
817 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
818 
819 	/* and map it for CPU access */
820 	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
821 
822 	/* calc buffer offsets */
823 	dec->msg = ptr;
824 
825 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
826 	if (have_it(dec))
827 		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
828 }
829 
830 /* unmap and send a message command to the VCPU */
send_msg_buf(struct radeon_decoder * dec)831 static void send_msg_buf(struct radeon_decoder *dec)
832 {
833 	struct rvid_buffer* buf;
834 
835 	/* ignore the request if message/feedback buffer isn't mapped */
836 	if (!dec->msg || !dec->fb)
837 		return;
838 
839 	/* grab the current message buffer */
840 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
841 
842 	/* unmap the buffer */
843 	dec->ws->buffer_unmap(buf->res->buf);
844 	dec->msg = NULL;
845 	dec->fb = NULL;
846 	dec->it = NULL;
847 
848 	if (dec->sessionctx.res)
849 		send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER,
850 			 dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,
851 			 RADEON_DOMAIN_VRAM);
852 
853 	/* and send it to the hardware */
854 	send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0,
855 		 RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
856 }
857 
858 /* cycle to the next set of buffers */
next_buffer(struct radeon_decoder * dec)859 static void next_buffer(struct radeon_decoder *dec)
860 {
861 	++dec->cur_buffer;
862 	dec->cur_buffer %= NUM_BUFFERS;
863 }
864 
calc_ctx_size_h264_perf(struct radeon_decoder * dec)865 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec)
866 {
867 	unsigned width_in_mb, height_in_mb, ctx_size;
868 	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
869 	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
870 
871 	unsigned max_references = dec->base.max_references + 1;
872 
873 	// picture width & height in 16 pixel units
874 	width_in_mb = width / VL_MACROBLOCK_WIDTH;
875 	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
876 
877 	unsigned fs_in_mb = width_in_mb * height_in_mb;
878 	unsigned num_dpb_buffer;
879 	switch(dec->base.level) {
880 	case 30:
881 		num_dpb_buffer = 8100 / fs_in_mb;
882 		break;
883 	case 31:
884 		num_dpb_buffer = 18000 / fs_in_mb;
885 		break;
886 	case 32:
887 		num_dpb_buffer = 20480 / fs_in_mb;
888 		break;
889 	case 41:
890 		num_dpb_buffer = 32768 / fs_in_mb;
891 		break;
892 	case 42:
893 		num_dpb_buffer = 34816 / fs_in_mb;
894 		break;
895 	case 50:
896 		num_dpb_buffer = 110400 / fs_in_mb;
897 		break;
898 	case 51:
899 		num_dpb_buffer = 184320 / fs_in_mb;
900 		break;
901 	default:
902 		num_dpb_buffer = 184320 / fs_in_mb;
903 		break;
904 	}
905 	num_dpb_buffer++;
906 	max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
907 	ctx_size = max_references * align(width_in_mb * height_in_mb  * 192, 256);
908 
909 	return ctx_size;
910 }
911 
912 /* calculate size of reference picture buffer */
calc_dpb_size(struct radeon_decoder * dec)913 static unsigned calc_dpb_size(struct radeon_decoder *dec)
914 {
915 	unsigned width_in_mb, height_in_mb, image_size, dpb_size;
916 
917 	// always align them to MB size for dpb calculation
918 	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
919 	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
920 
921 	// always one more for currently decoded picture
922 	unsigned max_references = dec->base.max_references + 1;
923 
924 	// aligned size of a single frame
925 	image_size = align(width, 32) * height;
926 	image_size += image_size / 2;
927 	image_size = align(image_size, 1024);
928 
929 	// picture width & height in 16 pixel units
930 	width_in_mb = width / VL_MACROBLOCK_WIDTH;
931 	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
932 
933 	switch (u_reduce_video_profile(dec->base.profile)) {
934 	case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
935 		unsigned fs_in_mb = width_in_mb * height_in_mb;
936 		unsigned num_dpb_buffer;
937 
938 		switch(dec->base.level) {
939 		case 30:
940 			num_dpb_buffer = 8100 / fs_in_mb;
941 			break;
942 		case 31:
943 			num_dpb_buffer = 18000 / fs_in_mb;
944 			break;
945 		case 32:
946 			num_dpb_buffer = 20480 / fs_in_mb;
947 			break;
948 		case 41:
949 			num_dpb_buffer = 32768 / fs_in_mb;
950 			break;
951 		case 42:
952 			num_dpb_buffer = 34816 / fs_in_mb;
953 			break;
954 		case 50:
955 			num_dpb_buffer = 110400 / fs_in_mb;
956 			break;
957 		case 51:
958 			num_dpb_buffer = 184320 / fs_in_mb;
959 			break;
960 		default:
961 			num_dpb_buffer = 184320 / fs_in_mb;
962 			break;
963 		}
964 		num_dpb_buffer++;
965 		max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
966 		dpb_size = image_size * max_references;
967 		break;
968 	}
969 
970 	case PIPE_VIDEO_FORMAT_HEVC:
971 		if (dec->base.width * dec->base.height >= 4096*2000)
972 			max_references = MAX2(max_references, 8);
973 		else
974 			max_references = MAX2(max_references, 17);
975 
976 		width = align (width, 16);
977 		height = align (height, 16);
978 		if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
979 			dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references;
980 		else
981 			dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references;
982 		break;
983 
984 	case PIPE_VIDEO_FORMAT_VC1:
985 		// the firmware seems to allways assume a minimum of ref frames
986 		max_references = MAX2(NUM_VC1_REFS, max_references);
987 
988 		// reference picture buffer
989 		dpb_size = image_size * max_references;
990 
991 		// CONTEXT_BUFFER
992 		dpb_size += width_in_mb * height_in_mb * 128;
993 
994 		// IT surface buffer
995 		dpb_size += width_in_mb * 64;
996 
997 		// DB surface buffer
998 		dpb_size += width_in_mb * 128;
999 
1000 		// BP
1001 		dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
1002 		break;
1003 
1004 	case PIPE_VIDEO_FORMAT_MPEG12:
1005 		// reference picture buffer, must be big enough for all frames
1006 		dpb_size = image_size * NUM_MPEG2_REFS;
1007 		break;
1008 
1009 	case PIPE_VIDEO_FORMAT_MPEG4:
1010 		// reference picture buffer
1011 		dpb_size = image_size * max_references;
1012 
1013 		// CM
1014 		dpb_size += width_in_mb * height_in_mb * 64;
1015 
1016 		// IT surface buffer
1017 		dpb_size += align(width_in_mb * height_in_mb * 32, 64);
1018 
1019 		dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
1020 		break;
1021 
1022 	default:
1023 		// something is missing here
1024 		assert(0);
1025 
1026 		// at least use a sane default value
1027 		dpb_size = 32 * 1024 * 1024;
1028 		break;
1029 	}
1030 	return dpb_size;
1031 }
1032 
1033 /**
1034  * destroy this video decoder
1035  */
radeon_dec_destroy(struct pipe_video_codec * decoder)1036 static void radeon_dec_destroy(struct pipe_video_codec *decoder)
1037 {
1038 	struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
1039 	unsigned i;
1040 
1041 	assert(decoder);
1042 
1043 	map_msg_fb_it_buf(dec);
1044 	rvcn_dec_message_destroy(dec);
1045 	send_msg_buf(dec);
1046 
1047 	flush(dec, 0);
1048 
1049 	dec->ws->cs_destroy(dec->cs);
1050 
1051 	for (i = 0; i < NUM_BUFFERS; ++i) {
1052 		si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
1053 		si_vid_destroy_buffer(&dec->bs_buffers[i]);
1054 	}
1055 
1056 	si_vid_destroy_buffer(&dec->dpb);
1057 	si_vid_destroy_buffer(&dec->ctx);
1058 	si_vid_destroy_buffer(&dec->sessionctx);
1059 
1060 	FREE(dec);
1061 }
1062 
1063 /**
1064  * start decoding of a new frame
1065  */
radeon_dec_begin_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)1066 static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
1067 			     struct pipe_video_buffer *target,
1068 			     struct pipe_picture_desc *picture)
1069 {
1070 	struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
1071 	uintptr_t frame;
1072 
1073 	assert(decoder);
1074 
1075 	frame = ++dec->frame_number;
1076 	vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
1077 					    &radeon_dec_destroy_associated_data);
1078 
1079 	dec->bs_size = 0;
1080 	dec->bs_ptr = dec->ws->buffer_map(
1081 		dec->bs_buffers[dec->cur_buffer].res->buf,
1082 		dec->cs, PIPE_TRANSFER_WRITE);
1083 }
1084 
1085 /**
1086  * decode a macroblock
1087  */
radeon_dec_decode_macroblock(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,const struct pipe_macroblock * macroblocks,unsigned num_macroblocks)1088 static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,
1089 				   struct pipe_video_buffer *target,
1090 				   struct pipe_picture_desc *picture,
1091 				   const struct pipe_macroblock *macroblocks,
1092 				   unsigned num_macroblocks)
1093 {
1094 	/* not supported (yet) */
1095 	assert(0);
1096 }
1097 
1098 /**
1099  * decode a bitstream
1100  */
radeon_dec_decode_bitstream(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)1101 static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
1102 				  struct pipe_video_buffer *target,
1103 				  struct pipe_picture_desc *picture,
1104 				  unsigned num_buffers,
1105 				  const void * const *buffers,
1106 				  const unsigned *sizes)
1107 {
1108 	struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
1109 	unsigned i;
1110 
1111 	assert(decoder);
1112 
1113 	if (!dec->bs_ptr)
1114 		return;
1115 
1116 	for (i = 0; i < num_buffers; ++i) {
1117 		struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
1118 		unsigned new_size = dec->bs_size + sizes[i];
1119 
1120 		if (new_size > buf->res->buf->size) {
1121 			dec->ws->buffer_unmap(buf->res->buf);
1122 			if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
1123 				RVID_ERR("Can't resize bitstream buffer!");
1124 				return;
1125 			}
1126 
1127 			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
1128 							  PIPE_TRANSFER_WRITE);
1129 			if (!dec->bs_ptr)
1130 				return;
1131 
1132 			dec->bs_ptr += dec->bs_size;
1133 		}
1134 
1135 		memcpy(dec->bs_ptr, buffers[i], sizes[i]);
1136 		dec->bs_size += sizes[i];
1137 		dec->bs_ptr += sizes[i];
1138 	}
1139 }
1140 
1141 /**
1142  * end decoding of the current frame
1143  */
radeon_dec_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)1144 static void radeon_dec_end_frame(struct pipe_video_codec *decoder,
1145 			   struct pipe_video_buffer *target,
1146 			   struct pipe_picture_desc *picture)
1147 {
1148 	struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
1149 	struct pb_buffer *dt;
1150 	struct rvid_buffer *msg_fb_it_buf, *bs_buf;
1151 
1152 	assert(decoder);
1153 
1154 	if (!dec->bs_ptr)
1155 		return;
1156 
1157 	msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
1158 	bs_buf = &dec->bs_buffers[dec->cur_buffer];
1159 
1160 	memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
1161 	dec->ws->buffer_unmap(bs_buf->res->buf);
1162 
1163 	map_msg_fb_it_buf(dec);
1164 	dt = rvcn_dec_message_decode(dec, target, picture);
1165 	rvcn_dec_message_feedback(dec);
1166 	send_msg_buf(dec);
1167 
1168 	send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
1169 		 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
1170 	if (dec->ctx.res)
1171 		send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
1172 			RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
1173 	send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
1174 		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
1175 	send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0,
1176 		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
1177 	send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
1178 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
1179 	if (have_it(dec))
1180 		send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
1181 			 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
1182 	set_reg(dec, RDECODE_ENGINE_CNTL, 1);
1183 
1184 	flush(dec, PIPE_FLUSH_ASYNC);
1185 	next_buffer(dec);
1186 }
1187 
1188 /**
1189  * flush any outstanding command buffers to the hardware
1190  */
radeon_dec_flush(struct pipe_video_codec * decoder)1191 static void radeon_dec_flush(struct pipe_video_codec *decoder)
1192 {
1193 }
1194 
1195 /**
1196  * create and HW decoder
1197  */
radeon_create_decoder(struct pipe_context * context,const struct pipe_video_codec * templ)1198 struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
1199 					     const struct pipe_video_codec *templ)
1200 {
1201 	struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
1202 	struct r600_common_context *rctx = (struct r600_common_context*)context;
1203 	unsigned width = templ->width, height = templ->height;
1204 	unsigned dpb_size, bs_buf_size, stream_type = 0;
1205 	struct radeon_decoder *dec;
1206 	int r, i;
1207 
1208 	switch(u_reduce_video_profile(templ->profile)) {
1209 	case PIPE_VIDEO_FORMAT_MPEG12:
1210 		if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
1211 			return vl_create_mpeg12_decoder(context, templ);
1212 		stream_type = RDECODE_CODEC_MPEG2_VLD;
1213 		break;
1214 	case PIPE_VIDEO_FORMAT_MPEG4:
1215 		width = align(width, VL_MACROBLOCK_WIDTH);
1216 		height = align(height, VL_MACROBLOCK_HEIGHT);
1217 		stream_type = RDECODE_CODEC_MPEG4;
1218 		break;
1219 	case PIPE_VIDEO_FORMAT_VC1:
1220 		stream_type = RDECODE_CODEC_VC1;
1221 		break;
1222 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1223 		width = align(width, VL_MACROBLOCK_WIDTH);
1224 		height = align(height, VL_MACROBLOCK_HEIGHT);
1225 		stream_type = RDECODE_CODEC_H264_PERF;
1226 		break;
1227 	case PIPE_VIDEO_FORMAT_HEVC:
1228 		stream_type = RDECODE_CODEC_H265;
1229 		break;
1230 	default:
1231 		assert(0);
1232 		break;
1233 	}
1234 
1235 	dec = CALLOC_STRUCT(radeon_decoder);
1236 
1237 	if (!dec)
1238 		return NULL;
1239 
1240 	dec->base = *templ;
1241 	dec->base.context = context;
1242 	dec->base.width = width;
1243 	dec->base.height = height;
1244 
1245 	dec->base.destroy = radeon_dec_destroy;
1246 	dec->base.begin_frame = radeon_dec_begin_frame;
1247 	dec->base.decode_macroblock = radeon_dec_decode_macroblock;
1248 	dec->base.decode_bitstream = radeon_dec_decode_bitstream;
1249 	dec->base.end_frame = radeon_dec_end_frame;
1250 	dec->base.flush = radeon_dec_flush;
1251 
1252 	dec->stream_type = stream_type;
1253 	dec->stream_handle = si_vid_alloc_stream_handle();
1254 	dec->screen = context->screen;
1255 	dec->ws = ws;
1256 	dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
1257 	if (!dec->cs) {
1258 		RVID_ERR("Can't get command submission context.\n");
1259 		goto error;
1260 	}
1261 
1262 	for (i = 0; i < 16; i++)
1263 		dec->render_pic_list[i] = NULL;
1264 	bs_buf_size = width * height * (512 / (16 * 16));
1265 	for (i = 0; i < NUM_BUFFERS; ++i) {
1266 		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
1267 		if (have_it(dec))
1268 			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
1269 		/* use vram to improve performance, workaround an unknown bug */
1270 		if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
1271                                           msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
1272 			RVID_ERR("Can't allocated message buffers.\n");
1273 			goto error;
1274 		}
1275 
1276 		if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
1277                                           bs_buf_size, PIPE_USAGE_STAGING)) {
1278 			RVID_ERR("Can't allocated bitstream buffers.\n");
1279 			goto error;
1280 		}
1281 
1282 		si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
1283 		si_vid_clear_buffer(context, &dec->bs_buffers[i]);
1284 	}
1285 
1286 	dpb_size = calc_dpb_size(dec);
1287 
1288 	if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
1289 		RVID_ERR("Can't allocated dpb.\n");
1290 		goto error;
1291 	}
1292 
1293 	si_vid_clear_buffer(context, &dec->dpb);
1294 
1295 	if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
1296 		unsigned ctx_size = calc_ctx_size_h264_perf(dec);
1297 		if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
1298 			RVID_ERR("Can't allocated context buffer.\n");
1299 			goto error;
1300 		}
1301 		si_vid_clear_buffer(context, &dec->ctx);
1302 	}
1303 
1304 	if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
1305                                   RDECODE_SESSION_CONTEXT_SIZE,
1306                                   PIPE_USAGE_DEFAULT)) {
1307 		RVID_ERR("Can't allocated session ctx.\n");
1308 		goto error;
1309 	}
1310 	si_vid_clear_buffer(context, &dec->sessionctx);
1311 
1312 	map_msg_fb_it_buf(dec);
1313 	rvcn_dec_message_create(dec);
1314 	send_msg_buf(dec);
1315 	r = flush(dec, 0);
1316 	if (r)
1317 		goto error;
1318 
1319 	next_buffer(dec);
1320 
1321 	return &dec->base;
1322 
1323 error:
1324 	if (dec->cs) dec->ws->cs_destroy(dec->cs);
1325 
1326 	for (i = 0; i < NUM_BUFFERS; ++i) {
1327 		si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
1328 		si_vid_destroy_buffer(&dec->bs_buffers[i]);
1329 	}
1330 
1331 	si_vid_destroy_buffer(&dec->dpb);
1332 	si_vid_destroy_buffer(&dec->ctx);
1333 	si_vid_destroy_buffer(&dec->sessionctx);
1334 
1335 	FREE(dec);
1336 
1337 	return NULL;
1338 }
1339