1 /*
2  * This copyright notice applies to this header file only:
3  *
4  * Copyright (c) 2010-2017 NVIDIA Corporation
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the software, and to permit persons to whom the
12  * software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  */
27 
28 /*****************************************************************************************************/
29 //! \file cuviddec.h
30 //! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
31 //! \date 2015-2017
32 //! This file contains constants, structure definitions and function prototypes used for decoding.
33 /*****************************************************************************************************/
34 
35 #if !defined(__CUDA_VIDEO_H__)
36 #define __CUDA_VIDEO_H__
37 
38 #ifndef __dynlink_cuda_h__
39 #include "host-common/dynlink_cuda.h"
40 #endif // __dynlink_cuda_h__
41 
42 #if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
43 #if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
44 #define __CUVID_DEVPTR64
45 #endif
46 #endif
47 
48 #if defined(__cplusplus)
49 extern "C" {
50 #endif /* __cplusplus */
51 
52 typedef void *CUvideodecoder;
53 typedef struct _CUcontextlock_st *CUvideoctxlock;
54 
55 /*********************************************************************************/
56 //! \enum cudaVideoCodec
57 //! Video codec enums
58 //! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
59 /*********************************************************************************/
60 typedef enum cudaVideoCodec_enum {
61     cudaVideoCodec_MPEG1=0,                                         /**<  MPEG1             */
62     cudaVideoCodec_MPEG2,                                           /**<  MPEG2             */
63     cudaVideoCodec_MPEG4,                                           /**<  MPEG4             */
64     cudaVideoCodec_VC1,                                             /**<  VC1               */
65     cudaVideoCodec_H264,                                            /**<  H264              */
66     cudaVideoCodec_JPEG,                                            /**<  JPEG              */
67     cudaVideoCodec_H264_SVC,                                        /**<  H264-SVC          */
68     cudaVideoCodec_H264_MVC,                                        /**<  H264-MVC          */
69     cudaVideoCodec_HEVC,                                            /**<  HEVC              */
70     cudaVideoCodec_VP8,                                             /**<  VP8               */
71     cudaVideoCodec_VP9,                                             /**<  VP9               */
72     cudaVideoCodec_NumCodecs,                                       /**<  Max codecs        */
73     // Uncompressed YUV
74     cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)      */
75     cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)      */
76     cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)      */
77     cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
78     cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)       */
79 } cudaVideoCodec;
80 
81 /*********************************************************************************/
82 //! \enum cudaVideoSurfaceFormat
83 //! Video surface format enums used for output format of decoded output
84 //! These enums are used in CUVIDDECODECREATEINFO structure
85 /*********************************************************************************/
86 typedef enum cudaVideoSurfaceFormat_enum {
87     cudaVideoSurfaceFormat_NV12=0,       /**< NV12 format          */
88     cudaVideoSurfaceFormat_P016=1        /**< 16 bit semiplaner format. Can be used for 10 bit(6LSB bits 0),
89                                               12 bit (4LSB bits 0) */
90 } cudaVideoSurfaceFormat;
91 
92 /******************************************************************************************************************/
93 //! \enum cudaVideoDeinterlaceMode
94 //! Deinterlacing mode enums
95 //! These enums are used in CUVIDDECODECREATEINFO structure
96 //! Use cudaVideoDeinterlaceMode_Weave for progressive content and for content that doesn't need deinterlacing
97 //! cudaVideoDeinterlaceMode_Adaptive needs more video memory than other DImodes
98 /******************************************************************************************************************/
99 typedef enum cudaVideoDeinterlaceMode_enum {
100     cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
101     cudaVideoDeinterlaceMode_Bob,       /**< Drop one field                       */
102     cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing               */
103 } cudaVideoDeinterlaceMode;
104 
105 /**************************************************************************************************************/
106 //! \enum cudaVideoChromaFormat
107 //! Chroma format enums
108 //! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
109 //! JPEG supports Monochrome, YUV 4:2:0, YUV 4:2:2 and YUV 4:4:4 chroma formats.
110 //! H264, HEVC, VP9, VP8, VC1, MPEG1, MPEG2 and MPEG4 support YUV 4:2:0 chroma format only.
111 /**************************************************************************************************************/
112 typedef enum cudaVideoChromaFormat_enum {
113     cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
114     cudaVideoChromaFormat_420,           /**< YUV 4:2:0  */
115     cudaVideoChromaFormat_422,           /**< YUV 4:2:2  */
116     cudaVideoChromaFormat_444            /**< YUV 4:4:4  */
117 } cudaVideoChromaFormat;
118 
119 /*************************************************************************************************************/
120 //! \enum cudaVideoCreateFlags
121 //! Decoder flag enums to select preferred decode path
122 //! cudaVideoCreate_Default and cudaVideoCreate_PreferCUVID are most optimized, use these whenever possible
123 /*************************************************************************************************************/
124 typedef enum cudaVideoCreateFlags_enum {
125     cudaVideoCreate_Default     = 0x00,     /**< Default operation mode: use dedicated video engines                        */
126     cudaVideoCreate_PreferCUDA  = 0x01,     /**< Use CUDA-based decoder (requires valid vidLock object for multi-threading) */
127     cudaVideoCreate_PreferDXVA  = 0x02,     /**< Go through DXVA internally if possible (requires D3D9 interop)             */
128     cudaVideoCreate_PreferCUVID = 0x04      /**< Use dedicated video engines directly                                       */
129 } cudaVideoCreateFlags;
130 
131 
132 /**************************************************************************************************************/
133 //! \struct CUVIDDECODECAPS;
134 //! This structure is used in cuvidGetDecoderCaps API
135 /**************************************************************************************************************/
136 typedef struct _CUVIDDECODECAPS
137 {
138     cudaVideoCodec          eCodecType;                 /**< IN: cudaVideoCodec_XXX                                 */
139     cudaVideoChromaFormat   eChromaFormat;              /**< IN: cudaVideoChromaFormat_XXX                          */
140     unsigned int            nBitDepthMinus8;            /**< IN: The Value "BitDepth minus 8"                       */
141     unsigned int            reserved1[3];               /**< Reserved for future use - set to zero                  */
142 
143     unsigned char           bIsSupported;               /**< OUT: 1 if codec supported, 0 if not supported          */
144     unsigned char           reserved2[3];               /**< Reserved for future use - set to zero                  */
145     unsigned int            nMaxWidth;                  /**< OUT: Max supported coded width in pixels               */
146     unsigned int            nMaxHeight;                 /**< OUT: Max supported coded height in pixels              */
147     unsigned int            nMaxMBCount;                /**< OUT: Max supported macroblock count
148                                                                   CodedWidth*CodedHeight/256 must be <= nMaxMBCount */
149     unsigned short          nMinWidth;                  /**< OUT: Min supported coded width in pixels               */
150     unsigned short          nMinHeight;                 /**< OUT: Min supported coded height in pixels              */
151     unsigned int            reserved3[11];              /**< Reserved for future use - set to zero                  */
152 } CUVIDDECODECAPS;
153 
154 /**************************************************************************************************************/
155 //! \struct CUVIDDECODECREATEINFO
156 //! This structure is used in cuvidCreateDecoder API
157 /**************************************************************************************************************/
158 typedef struct _CUVIDDECODECREATEINFO
159 {
160     unsigned long ulWidth;              /**< IN: Coded sequence width in pixels                                             */
161     unsigned long ulHeight;             /**< IN: Coded sequence height in pixels                                            */
162     unsigned long ulNumDecodeSurfaces;  /**< IN: Maximum number of internal decode surfaces                                 */
163     cudaVideoCodec CodecType;           /**< IN: cudaVideoCodec_XXX                                                         */
164     cudaVideoChromaFormat ChromaFormat; /**< IN: cudaVideoChromaFormat_XXX                                                  */
165     unsigned long ulCreationFlags;      /**< IN: Decoder creation flags (cudaVideoCreateFlags_XXX)                          */
166     unsigned long bitDepthMinus8;       /**< IN: The value "BitDepth minus 8"                                               */
167     unsigned long ulIntraDecodeOnly;    /**< IN: Set 1 only if video has all intra frames (default value is 0). This will
168                                              optimize video memory for Intra frames only decoding. The support is limited
169                                              to specific codecs(H264 rightnow), the flag will be ignored for codecs which
170                                              are not supported. However decoding might fail if the flag is enabled in case
171                                              of supported codecs for regular bit streams having P and/or B frames.          */
172     unsigned long Reserved1[3];         /**< Reserved for future use - set to zero                                          */
173     /**
174     * IN: area of the frame that should be displayed
175     */
176     struct {
177         short left;
178         short top;
179         short right;
180         short bottom;
181     } display_area;
182 
183     cudaVideoSurfaceFormat OutputFormat;       /**< IN: cudaVideoSurfaceFormat_XXX                                     */
184     cudaVideoDeinterlaceMode DeinterlaceMode;  /**< IN: cudaVideoDeinterlaceMode_XXX                                   */
185     unsigned long ulTargetWidth;               /**< IN: Post-processed output width (Should be aligned to 2)           */
186     unsigned long ulTargetHeight;              /**< IN: Post-processed output height (Should be aligbed to 2)          */
187     unsigned long ulNumOutputSurfaces;         /**< IN: Maximum number of output surfaces simultaneously mapped        */
188     CUvideoctxlock vidLock;                    /**< IN: If non-NULL, context lock used for synchronizing ownership of
189                                                     the cuda context. Needed for cudaVideoCreate_PreferCUDA decode     */
190     /**
191     * IN: target rectangle in the output frame (for aspect ratio conversion)
192     * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
193     */
194     struct {
195         short left;
196         short top;
197         short right;
198         short bottom;
199     } target_rect;
200     unsigned long Reserved2[5];                /**< Reserved for future use - set to zero */
201 } CUVIDDECODECREATEINFO;
202 
203 /*********************************************************/
204 //! \struct CUVIDH264DPBENTRY
205 //! H.264 DPB entry
206 //! This structure is used in CUVIDH264PICPARAMS structure
207 /*********************************************************/
208 typedef struct _CUVIDH264DPBENTRY
209 {
210     int PicIdx;                 /**< picture index of reference frame                                        */
211     int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term)                    */
212     int is_long_term;           /**< 0=short term reference, 1=long term reference                           */
213     int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
214     int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields                    */
215     int FieldOrderCnt[2];       /**< field order count of top and bottom fields                              */
216 } CUVIDH264DPBENTRY;
217 
218 /************************************************************/
219 //! \struct CUVIDH264MVCEXT
220 //! H.264 MVC picture parameters ext
221 //! This structure is used in CUVIDH264PICPARAMS structure
222 /************************************************************/
223 typedef struct _CUVIDH264MVCEXT
224 {
225     int num_views_minus1;                  /**< Max number of coded views minus 1 in video : Range - 0 to 1023              */
226     int view_id;                           /**< view identifier                                                             */
227     unsigned char inter_view_flag;         /**< 1 if used for inter-view prediction, 0 if not                               */
228     unsigned char num_inter_view_refs_l0;  /**< number of inter-view ref pics in RefPicList0                                */
229     unsigned char num_inter_view_refs_l1;  /**< number of inter-view ref pics in RefPicList1                                */
230     unsigned char MVCReserved8Bits;        /**< Reserved bits                                                               */
231     int InterViewRefsL0[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList0 */
232     int InterViewRefsL1[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList1 */
233 } CUVIDH264MVCEXT;
234 
235 /*********************************************************/
236 //! \struct CUVIDH264SVCEXT
237 //! H.264 SVC picture parameters ext
238 //! This structure is used in CUVIDH264PICPARAMS structure
239 /*********************************************************/
240 typedef struct _CUVIDH264SVCEXT
241 {
242     unsigned char profile_idc;
243     unsigned char level_idc;
244     unsigned char DQId;
245     unsigned char DQIdMax;
246     unsigned char disable_inter_layer_deblocking_filter_idc;
247     unsigned char ref_layer_chroma_phase_y_plus1;
248     signed char   inter_layer_slice_alpha_c0_offset_div2;
249     signed char   inter_layer_slice_beta_offset_div2;
250 
251     unsigned short DPBEntryValidFlag;
252     unsigned char inter_layer_deblocking_filter_control_present_flag;
253     unsigned char extended_spatial_scalability_idc;
254     unsigned char adaptive_tcoeff_level_prediction_flag;
255     unsigned char slice_header_restriction_flag;
256     unsigned char chroma_phase_x_plus1_flag;
257     unsigned char chroma_phase_y_plus1;
258 
259     unsigned char tcoeff_level_prediction_flag;
260     unsigned char constrained_intra_resampling_flag;
261     unsigned char ref_layer_chroma_phase_x_plus1_flag;
262     unsigned char store_ref_base_pic_flag;
263     unsigned char Reserved8BitsA;
264     unsigned char Reserved8BitsB;
265 
266     short scaled_ref_layer_left_offset;
267     short scaled_ref_layer_top_offset;
268     short scaled_ref_layer_right_offset;
269     short scaled_ref_layer_bottom_offset;
270     unsigned short Reserved16Bits;
271     struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded.
272                                              Linked list ends at the target layer. */
273     int bRefBaseLayer;                  /**< whether to store ref base pic */
274 } CUVIDH264SVCEXT;
275 
276 /******************************************************/
277 //! \struct CUVIDH264PICPARAMS
278 //! H.264 picture parameters
279 //! This structure is used in CUVIDPICPARAMS structure
280 /******************************************************/
281 typedef struct _CUVIDH264PICPARAMS
282 {
283     // SPS
284     int log2_max_frame_num_minus4;
285     int pic_order_cnt_type;
286     int log2_max_pic_order_cnt_lsb_minus4;
287     int delta_pic_order_always_zero_flag;
288     int frame_mbs_only_flag;
289     int direct_8x8_inference_flag;
290     int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
291     unsigned char residual_colour_transform_flag;
292     unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
293     unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
294     unsigned char qpprime_y_zero_transform_bypass_flag;
295     // PPS
296     int entropy_coding_mode_flag;
297     int pic_order_present_flag;
298     int num_ref_idx_l0_active_minus1;
299     int num_ref_idx_l1_active_minus1;
300     int weighted_pred_flag;
301     int weighted_bipred_idc;
302     int pic_init_qp_minus26;
303     int deblocking_filter_control_present_flag;
304     int redundant_pic_cnt_present_flag;
305     int transform_8x8_mode_flag;
306     int MbaffFrameFlag;
307     int constrained_intra_pred_flag;
308     int chroma_qp_index_offset;
309     int second_chroma_qp_index_offset;
310     int ref_pic_flag;
311     int frame_num;
312     int CurrFieldOrderCnt[2];
313     // DPB
314     CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
315     // Quantization Matrices (raster-order)
316     unsigned char WeightScale4x4[6][16];
317     unsigned char WeightScale8x8[2][64];
318     // FMO/ASO
319     unsigned char fmo_aso_enable;
320     unsigned char num_slice_groups_minus1;
321     unsigned char slice_group_map_type;
322     signed char pic_init_qs_minus26;
323     unsigned int slice_group_change_rate_minus1;
324     union
325     {
326         unsigned long long slice_group_map_addr;
327         const unsigned char *pMb2SliceGroupMap;
328     } fmo;
329     unsigned int  Reserved[12];
330     // SVC/MVC
331     union
332     {
333         CUVIDH264MVCEXT mvcext;
334         CUVIDH264SVCEXT svcext;
335     };
336 } CUVIDH264PICPARAMS;
337 
338 
339 /********************************************************/
340 //! \struct CUVIDMPEG2PICPARAMS
341 //! MPEG-2 picture parameters
342 //! This structure is used in CUVIDPICPARAMS structure
343 /********************************************************/
344 typedef struct _CUVIDMPEG2PICPARAMS
345 {
346     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
347     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
348     int picture_coding_type;
349     int full_pel_forward_vector;
350     int full_pel_backward_vector;
351     int f_code[2][2];
352     int intra_dc_precision;
353     int frame_pred_frame_dct;
354     int concealment_motion_vectors;
355     int q_scale_type;
356     int intra_vlc_format;
357     int alternate_scan;
358     int top_field_first;
359     // Quantization matrices (raster order)
360     unsigned char QuantMatrixIntra[64];
361     unsigned char QuantMatrixInter[64];
362 } CUVIDMPEG2PICPARAMS;
363 
364 // MPEG-4 has VOP types instead of Picture types
365 #define I_VOP 0
366 #define P_VOP 1
367 #define B_VOP 2
368 #define S_VOP 3
369 
370 /*******************************************************/
371 //! \struct CUVIDMPEG4PICPARAMS
372 //! MPEG-4 picture parameters
373 //! This structure is used in CUVIDPICPARAMS structure
374 /*******************************************************/
375 typedef struct _CUVIDMPEG4PICPARAMS
376 {
377     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
378     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
379     // VOL
380     int video_object_layer_width;
381     int video_object_layer_height;
382     int vop_time_increment_bitcount;
383     int top_field_first;
384     int resync_marker_disable;
385     int quant_type;
386     int quarter_sample;
387     int short_video_header;
388     int divx_flags;
389     // VOP
390     int vop_coding_type;
391     int vop_coded;
392     int vop_rounding_type;
393     int alternate_vertical_scan_flag;
394     int interlaced;
395     int vop_fcode_forward;
396     int vop_fcode_backward;
397     int trd[2];
398     int trb[2];
399     // Quantization matrices (raster order)
400     unsigned char QuantMatrixIntra[64];
401     unsigned char QuantMatrixInter[64];
402     int gmc_enabled;
403 } CUVIDMPEG4PICPARAMS;
404 
405 /********************************************************/
406 //! \struct CUVIDVC1PICPARAMS
407 //! VC1 picture parameters
408 //! This structure is used in CUVIDPICPARAMS structure
409 /********************************************************/
410 typedef struct _CUVIDVC1PICPARAMS
411 {
412     int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
413     int BackwardRefIdx;     /**< Picture index of backward reference (B-frames)  */
414     int FrameWidth;         /**< Actual frame width                              */
415     int FrameHeight;        /**< Actual frame height                             */
416     // PICTURE
417     int intra_pic_flag;     /**< Set to 1 for I,BI frames */
418     int ref_pic_flag;       /**< Set to 1 for I,P frames  */
419     int progressive_fcm;    /**< Progressive frame        */
420     // SEQUENCE
421     int profile;
422     int postprocflag;
423     int pulldown;
424     int interlace;
425     int tfcntrflag;
426     int finterpflag;
427     int psf;
428     int multires;
429     int syncmarker;
430     int rangered;
431     int maxbframes;
432     // ENTRYPOINT
433     int panscan_flag;
434     int refdist_flag;
435     int extended_mv;
436     int dquant;
437     int vstransform;
438     int loopfilter;
439     int fastuvmc;
440     int overlap;
441     int quantizer;
442     int extended_dmv;
443     int range_mapy_flag;
444     int range_mapy;
445     int range_mapuv_flag;
446     int range_mapuv;
447     int rangeredfrm;    // range reduction state
448 } CUVIDVC1PICPARAMS;
449 
450 /***********************************************************/
451 //! \struct CUVIDJPEGPICPARAMS
452 //! JPEG picture parameters
453 //! This structure is used in CUVIDPICPARAMS structure
454 /***********************************************************/
455 typedef struct _CUVIDJPEGPICPARAMS
456 {
457     int Reserved;
458 } CUVIDJPEGPICPARAMS;
459 
460 
461 /*******************************************************/
462 //! \struct CUVIDHEVCPICPARAMS
463 //! HEVC picture parameters
464 //! This structure is used in CUVIDPICPARAMS structure
465 /*******************************************************/
466 typedef struct _CUVIDHEVCPICPARAMS
467 {
468     // sps
469     int pic_width_in_luma_samples;
470     int pic_height_in_luma_samples;
471     unsigned char log2_min_luma_coding_block_size_minus3;
472     unsigned char log2_diff_max_min_luma_coding_block_size;
473     unsigned char log2_min_transform_block_size_minus2;
474     unsigned char log2_diff_max_min_transform_block_size;
475     unsigned char pcm_enabled_flag;
476     unsigned char log2_min_pcm_luma_coding_block_size_minus3;
477     unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
478     unsigned char pcm_sample_bit_depth_luma_minus1;
479 
480     unsigned char pcm_sample_bit_depth_chroma_minus1;
481     unsigned char pcm_loop_filter_disabled_flag;
482     unsigned char strong_intra_smoothing_enabled_flag;
483     unsigned char max_transform_hierarchy_depth_intra;
484     unsigned char max_transform_hierarchy_depth_inter;
485     unsigned char amp_enabled_flag;
486     unsigned char separate_colour_plane_flag;
487     unsigned char log2_max_pic_order_cnt_lsb_minus4;
488 
489     unsigned char num_short_term_ref_pic_sets;
490     unsigned char long_term_ref_pics_present_flag;
491     unsigned char num_long_term_ref_pics_sps;
492     unsigned char sps_temporal_mvp_enabled_flag;
493     unsigned char sample_adaptive_offset_enabled_flag;
494     unsigned char scaling_list_enable_flag;
495     unsigned char IrapPicFlag;
496     unsigned char IdrPicFlag;
497 
498     unsigned char bit_depth_luma_minus8;
499     unsigned char bit_depth_chroma_minus8;
500     unsigned char reserved1[14];
501 
502     // pps
503     unsigned char dependent_slice_segments_enabled_flag;
504     unsigned char slice_segment_header_extension_present_flag;
505     unsigned char sign_data_hiding_enabled_flag;
506     unsigned char cu_qp_delta_enabled_flag;
507     unsigned char diff_cu_qp_delta_depth;
508     signed char init_qp_minus26;
509     signed char pps_cb_qp_offset;
510     signed char pps_cr_qp_offset;
511 
512     unsigned char constrained_intra_pred_flag;
513     unsigned char weighted_pred_flag;
514     unsigned char weighted_bipred_flag;
515     unsigned char transform_skip_enabled_flag;
516     unsigned char transquant_bypass_enabled_flag;
517     unsigned char entropy_coding_sync_enabled_flag;
518     unsigned char log2_parallel_merge_level_minus2;
519     unsigned char num_extra_slice_header_bits;
520 
521     unsigned char loop_filter_across_tiles_enabled_flag;
522     unsigned char loop_filter_across_slices_enabled_flag;
523     unsigned char output_flag_present_flag;
524     unsigned char num_ref_idx_l0_default_active_minus1;
525     unsigned char num_ref_idx_l1_default_active_minus1;
526     unsigned char lists_modification_present_flag;
527     unsigned char cabac_init_present_flag;
528     unsigned char pps_slice_chroma_qp_offsets_present_flag;
529 
530     unsigned char deblocking_filter_override_enabled_flag;
531     unsigned char pps_deblocking_filter_disabled_flag;
532     signed char   pps_beta_offset_div2;
533     signed char   pps_tc_offset_div2;
534     unsigned char tiles_enabled_flag;
535     unsigned char uniform_spacing_flag;
536     unsigned char num_tile_columns_minus1;
537     unsigned char num_tile_rows_minus1;
538 
539     unsigned short column_width_minus1[21];
540     unsigned short row_height_minus1[21];
541     unsigned int   reserved3[15];
542 
543     // RefPicSets
544     int NumBitsForShortTermRPSInSlice;
545     int NumDeltaPocsOfRefRpsIdx;
546     int NumPocTotalCurr;
547     int NumPocStCurrBefore;
548     int NumPocStCurrAfter;
549     int NumPocLtCurr;
550     int CurrPicOrderCntVal;
551     int RefPicIdx[16];                      // [refpic] Indices of valid reference pictures (-1 if unused for reference)
552     int PicOrderCntVal[16];                 // [refpic]
553     unsigned char IsLongTerm[16];           // [refpic] 0=not a long-term reference, 1=long-term reference
554     unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
555     unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
556     unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
557     unsigned char RefPicSetInterLayer0[8];
558     unsigned char RefPicSetInterLayer1[8];
559     unsigned int  reserved4[12];
560 
561     // scaling lists (diag order)
562     unsigned char ScalingList4x4[6][16];       // [matrixId][i]
563     unsigned char ScalingList8x8[6][64];       // [matrixId][i]
564     unsigned char ScalingList16x16[6][64];     // [matrixId][i]
565     unsigned char ScalingList32x32[2][64];     // [matrixId][i]
566     unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
567     unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
568 } CUVIDHEVCPICPARAMS;
569 
570 
571 /***********************************************************/
572 //! \struct CUVIDVP8PICPARAMS
573 //! VP8 picture parameters
574 //! This structure is used in CUVIDPICPARAMS structure
575 /***********************************************************/
576 typedef struct _CUVIDVP8PICPARAMS
577 {
578     int width;
579     int height;
580     unsigned int first_partition_size;
581     //Frame Indexes
582     unsigned char LastRefIdx;
583     unsigned char GoldenRefIdx;
584     unsigned char AltRefIdx;
585     union {
586         struct {
587             unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
588             unsigned char version : 3;
589             unsigned char show_frame : 1;
590             unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
591             unsigned char Reserved2Bits : 2;
592         };
593         unsigned char wFrameTagFlags;
594     };
595     unsigned char Reserved1[4];
596     unsigned int  Reserved2[3];
597 } CUVIDVP8PICPARAMS;
598 
599 /***********************************************************/
600 //! \struct CUVIDVP9PICPARAMS
601 //! VP9 picture parameters
602 //! This structure is used in CUVIDPICPARAMS structure
603 /***********************************************************/
604 typedef struct _CUVIDVP9PICPARAMS
605 {
606     unsigned int width;
607     unsigned int height;
608 
609     //Frame Indices
610     unsigned char LastRefIdx;
611     unsigned char GoldenRefIdx;
612     unsigned char AltRefIdx;
613     unsigned char colorSpace;
614 
615     unsigned short profile : 3;
616     unsigned short frameContextIdx : 2;
617     unsigned short frameType : 1;
618     unsigned short showFrame : 1;
619     unsigned short errorResilient : 1;
620     unsigned short frameParallelDecoding : 1;
621     unsigned short subSamplingX : 1;
622     unsigned short subSamplingY : 1;
623     unsigned short intraOnly : 1;
624     unsigned short allow_high_precision_mv : 1;
625     unsigned short refreshEntropyProbs : 1;
626     unsigned short reserved2Bits : 2;
627 
628     unsigned short reserved16Bits;
629 
630     unsigned char  refFrameSignBias[4];
631 
632     unsigned char bitDepthMinus8Luma;
633     unsigned char bitDepthMinus8Chroma;
634     unsigned char loopFilterLevel;
635     unsigned char loopFilterSharpness;
636 
637     unsigned char modeRefLfEnabled;
638     unsigned char log2_tile_columns;
639     unsigned char log2_tile_rows;
640 
641     unsigned char segmentEnabled : 1;
642     unsigned char segmentMapUpdate : 1;
643     unsigned char segmentMapTemporalUpdate : 1;
644     unsigned char segmentFeatureMode : 1;
645     unsigned char reserved4Bits : 4;
646 
647 
648     unsigned char segmentFeatureEnable[8][4];
649     short         segmentFeatureData[8][4];
650     unsigned char mb_segment_tree_probs[7];
651     unsigned char segment_pred_probs[3];
652     unsigned char reservedSegment16Bits[2];
653 
654     int qpYAc;
655     int qpYDc;
656     int qpChDc;
657     int qpChAc;
658 
659     unsigned int activeRefIdx[3];
660     unsigned int resetFrameContext;
661     unsigned int mcomp_filter_type;
662     unsigned int mbRefLfDelta[4];
663     unsigned int mbModeLfDelta[2];
664     unsigned int frameTagSize;
665     unsigned int offsetToDctParts;
666     unsigned int reserved128Bits[4];
667 
668 } CUVIDVP9PICPARAMS;
669 
670 
671 /******************************************************************************************/
672 //! \struct CUVIDPICPARAMS
673 //! Picture parameters for decoding
674 //! This structure is used in cuvidDecodePicture API
675 //! IN  for cuvidDecodePicture
676 /******************************************************************************************/
677 typedef struct _CUVIDPICPARAMS
678 {
679     int PicWidthInMbs;                     /**< IN: Coded frame size in macroblocks                           */
680     int FrameHeightInMbs;                  /**< IN: Coded frame height in macroblocks                         */
681     int CurrPicIdx;                        /**< IN: Output index of the current picture                       */
682     int field_pic_flag;                    /**< IN: 0=frame picture, 1=field picture                          */
683     int bottom_field_flag;                 /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
684     int second_field;                      /**< IN: Second field of a complementary field pair                */
685     // Bitstream data
686     unsigned int nBitstreamDataLen;        /**< IN: Number of bytes in bitstream data buffer                  */
687     const unsigned char *pBitstreamData;   /**< IN: Ptr to bitstream data for this picture (slice-layer)      */
688     unsigned int nNumSlices;               /**< IN: Number of slices in this picture                          */
689     const unsigned int *pSliceDataOffsets; /**< IN: nNumSlices entries, contains offset of each slice within
690                                                         the bitstream data buffer                             */
691     int ref_pic_flag;                      /**< IN: This picture is a reference picture                       */
692     int intra_pic_flag;                    /**< IN: This picture is entirely intra coded                      */
693     unsigned int Reserved[30];             /**< Reserved for future use                                       */
694     // IN: Codec-specific data
695     union {
696         CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
697         CUVIDH264PICPARAMS  h264;
698         CUVIDVC1PICPARAMS   vc1;
699         CUVIDMPEG4PICPARAMS mpeg4;
700         CUVIDJPEGPICPARAMS  jpeg;
701         CUVIDHEVCPICPARAMS  hevc;
702         CUVIDVP8PICPARAMS   vp8;
703         CUVIDVP9PICPARAMS   vp9;
704         unsigned int CodecReserved[1024];
705     } CodecSpecific;
706 } CUVIDPICPARAMS;
707 
708 
709 /******************************************************/
710 //! \struct CUVIDPROCPARAMS
711 //! Picture parameters for postprocessing
712 //! This structure is used in cuvidMapVideoFrame API
713 /******************************************************/
714 typedef struct _CUVIDPROCPARAMS
715 {
716     int progressive_frame;              /**< IN: Input is progressive (deinterlace_mode will be ignored)                */
717     int second_field;                   /**< IN: Output the second field (ignored if deinterlace mode is Weave)         */
718     int top_field_first;                /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */
719     int unpaired_field;                 /**< IN: Input only contains one field (2nd field is invalid)                   */
720     // The fields below are used for raw YUV input
721     unsigned int reserved_flags;        /**< Reserved for future use (set to zero)                                      */
722     unsigned int reserved_zero;         /**< Reserved (set to zero)                                                     */
723     unsigned long long raw_input_dptr;  /**< IN: Input CUdeviceptr for raw YUV extensions                               */
724     unsigned int raw_input_pitch;       /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately)      */
725     unsigned int raw_input_format;      /**< IN: Input YUV format (cudaVideoCodec_enum)                                 */
726     unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions                              */
727     unsigned int raw_output_pitch;      /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately)     */
728     unsigned int Reserved1;             /**< Reserved for future use (set to zero)                                      */
729     CUstream output_stream;             /**< IN: stream object used by cuvidMapVideoFrame                               */
730     unsigned int Reserved[46];          /**< Reserved for future use (set to zero)                                      */
731     void *Reserved2[2];                 /**< Reserved for future use (set to zero)                                      */
732 } CUVIDPROCPARAMS;
733 
734 
735 /***********************************************************************************************************/
736 //! VIDEO_DECODER
737 //!
738 //! In order to minimize decode latencies, there should be always at least 2 pictures in the decode
739 //! queue at any time, in order to make sure that all decode engines are always busy.
740 //!
741 //! Overall data flow:
742 //!  - cuvidGetDecoderCaps(...)
743 //!  - cuvidCreateDecoder(...)
744 //!  - For each picture:
745 //!    + cuvidDecodePicture(N)
746 //!    + cuvidMapVideoFrame(N-4)
747 //!    + do some processing in cuda
748 //!    + cuvidUnmapVideoFrame(N-4)
749 //!    + cuvidDecodePicture(N+1)
750 //!    + cuvidMapVideoFrame(N-3)
751 //!    + ...
752 //!  - cuvidDestroyDecoder(...)
753 //!
754 //! NOTE:
755 //! - When the cuda context is created from a D3D device, the D3D device must also be created
756 //!   with the D3DCREATE_MULTITHREADED flag.
757 //! - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
758 //! - cuvidDecodePicture may block the calling thread if there are too many pictures pending
759 //!   in the decode queue
760 /***********************************************************************************************************/
761 
762 
763 /**********************************************************************************************************************/
764 //! \fn CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc)
765 //! Queries decode capabilities of NVDEC-HW based on CodecType, ChromaFormat and BitDepthMinus8 parameters.
766 //! 1. Application fills IN parameters CodecType, ChromaFormat and BitDepthMinus8 of CUVIDDECODECAPS structure
767 //! 2. On calling cuvidGetDecoderCaps, driver fills OUT parameters if the IN parameters are supported
768 //!    If IN parameters passed to the driver are not supported by NVDEC-HW, then all OUT params are set to 0.
769 //! E.g. on Geforce GTX 960:
770 //!   App fills - eCodecType = cudaVideoCodec_H264; eChromaFormat = cudaVideoChromaFormat_420; nBitDepthMinus8 = 0;
771 //!   Given IN parameters are supported, hence driver fills: bIsSupported = 1; nMinWidth   = 48; nMinHeight  = 16;
772 //!   nMaxWidth = 4096; nMaxHeight = 4096; nMaxMBCount = 65536;
773 //! CodedWidth*CodedHeight/256 must be less than or equal to nMaxMBCount
774 /**********************************************************************************************************************/
775 typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS *pdc);
776 
777 /********************************************************************************************************************/
778 //! \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
779 //! Create the decoder object based on pdci. A handle to the created decoder is returned
780 /********************************************************************************************************************/
781 typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
782 /********************************************************************************************************************/
783 //! \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
784 //! Destroy the decoder object.
785 /********************************************************************************************************************/
786 typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
787 
788 /********************************************************************************************************************/
789 //! \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
790 //! Decode a single picture (field or frame)
791 //! Kicks off HW decoding
792 /********************************************************************************************************************/
793 typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
794 
795 
796 #if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
797 /************************************************************************************************************************/
798 //! \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr,
799 //!                                         unsigned int  *pPitch, CUVIDPROCPARAMS *pVPP);
800 //! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
801 //! pitch of the video frame
802 /************************************************************************************************************************/
803 typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
804                                            unsigned int *pDevPtr, unsigned int *pPitch,
805                                            CUVIDPROCPARAMS *pVPP);
806 
807 /********************************************************************************************************************/
808 //! \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
809 //! Unmap a previously mapped video frame
810 /********************************************************************************************************************/
811 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
812 #endif
813 
814 #if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
815 /************************************************************************************************************************/
816 //! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
817 //!                                           unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
818 //! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
819 //! pitch of the video frame
820 /************************************************************************************************************************/
821 typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
822                                              unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
823 
824 /********************************************************************************************************************/
825 //! \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
826 //! Unmap a previously mapped video frame
827 /********************************************************************************************************************/
828 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
829 
830 #if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
831 #define tcuvidMapVideoFrame      tcuvidMapVideoFrame64
832 #define tcuvidUnmapVideoFrame    tcuvidUnmapVideoFrame64
833 #endif
834 #endif
835 
836 
837 
838 /********************************************************************************************************************/
839 //!
840 //! Context-locking: to facilitate multi-threaded implementations, the following 4 functions
841 //! provide a simple mutex-style host synchronization. If a non-NULL context is specified
842 //! in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
843 //! context before making any cuda calls.
844 //! A multi-threaded application could create a lock associated with a context handle so that
845 //! multiple threads can safely share the same cuda context:
846 //!  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
847 //!    that can be passed to cuvidCtxLockCreate.
848 //!  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
849 //!
850 //! NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
851 //! decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
852 /********************************************************************************************************************/
853 
854 /********************************************************************************************************************/
855 //! \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
856 //! This API is used to create CtxLock object
857 /********************************************************************************************************************/
858 typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
859 
860 /********************************************************************************************************************/
861 //! \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
862 //! This API is used to free CtxLock object
863 /********************************************************************************************************************/
864 typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
865 
866 /********************************************************************************************************************/
867 //! \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
868 //! This API is used to acquire ctxlock
869 /********************************************************************************************************************/
870 typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
871 
872 /********************************************************************************************************************/
873 //! \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
874 //! This API is used to release ctxlock
875 /********************************************************************************************************************/
876 typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
877 
878 /**********************************************************************************************/
879 
880 extern tcuvidGetDecoderCaps       *cuvidGetDecoderCaps;
881 extern tcuvidCreateDecoder        *cuvidCreateDecoder;
882 extern tcuvidDestroyDecoder       *cuvidDestroyDecoder;
883 extern tcuvidDecodePicture        *cuvidDecodePicture;
884 extern tcuvidMapVideoFrame        *cuvidMapVideoFrame;
885 extern tcuvidUnmapVideoFrame      *cuvidUnmapVideoFrame;
886 
887 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
888 extern tcuvidMapVideoFrame64      *cuvidMapVideoFrame64;
889 extern tcuvidUnmapVideoFrame64    *cuvidUnmapVideoFrame64;
890 #endif
891 
892 // extern tcuvidGetVideoFrameSurface *cuvidGetVideoFrameSurface;
893 
894 extern tcuvidCtxLockCreate        *cuvidCtxLockCreate;
895 extern tcuvidCtxLockDestroy       *cuvidCtxLockDestroy;
896 extern tcuvidCtxLock              *cuvidCtxLock;
897 extern tcuvidCtxUnlock            *cuvidCtxUnlock;
898 
899 #if defined(__cplusplus)
900 }
901 // Auto-lock helper for C++ applications
902 class CCtxAutoLock
903 {
904 private:
905     CUvideoctxlock m_ctx;
906 public:
907     CCtxAutoLock(CUvideoctxlock ctx);
908     ~CCtxAutoLock();
909 };
910 #endif /* __cplusplus */
911 
912 #endif // __CUDA_VIDEO_H__
913 
914