1 /*
2  *  Copyright (C) 2012 Intel Corporation.  All Rights Reserved.
3  *
4  *  This is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This software is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this software; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
17  *  USA.
18  */
19 
20 #ifdef LIBVNCSERVER_CONFIG_LIBVA
21 
22 #include <X11/Xlib.h>
23 #include <va/va_x11.h>
24 
25 enum _slice_types {
26 	SLICE_TYPE_P = 0,  /* Predicted */
27 	SLICE_TYPE_B = 1,  /* Bi-predicted */
28 	SLICE_TYPE_I = 2,  /* Intra coded */
29 };
30 
31 #define SURFACE_NUM     7
32 
33 VADisplay       va_dpy = NULL;
34 VAConfigID      va_config_id;
35 VASurfaceID     va_surface_id[SURFACE_NUM];
36 VAContextID     va_context_id = 0;
37 
38 VABufferID      va_pic_param_buf_id[SURFACE_NUM];
39 VABufferID      va_mat_param_buf_id[SURFACE_NUM];
40 VABufferID      va_sp_param_buf_id[SURFACE_NUM];
41 VABufferID      va_d_param_buf_id[SURFACE_NUM];
42 
43 static int cur_height = 0;
44 static int cur_width = 0;
45 static unsigned int num_frames = 0;
46 static int sid = 0;
47 static unsigned int frame_id = 0;
48 static int field_order_count = 0;
49 static VASurfaceID curr_surface = VA_INVALID_ID;
50 
51 VAStatus gva_status;
52 VASurfaceStatus gsurface_status;
53 #define CHECK_SURF(X) \
54     gva_status = vaQuerySurfaceStatus(va_dpy, X, &gsurface_status); \
55     if (gsurface_status != 4) printf("ss: %d\n", gsurface_status);
56 
57 #ifdef _DEBUG
58 #define DebugLog(A) rfbClientLog A
59 #else
60 #define DebugLog(A)
61 #endif
62 
63 #define CHECK_VASTATUS(va_status,func)                  \
64     if (va_status != VA_STATUS_SUCCESS) {                   \
65         /*fprintf(stderr,"%s:%s (%d) failed,exit\n", __func__, func, __LINE__);*/ \
66         rfbClientErr("%s:%s:%d failed (0x%x),exit\n", __func__, func, __LINE__, va_status); \
67         exit(1);                                \
68     } else  { \
69         /*fprintf(stderr,">> SUCCESS for: %s:%s (%d)\n", __func__, func, __LINE__);*/ \
70         DebugLog(("%s:%s:%d success\n", __func__, func, __LINE__)); \
71     }
72 
73 /*
74  * Forward declarations
75  */
76 static void h264_decode_frame(int f_width, int f_height, char *framedata, int framesize, int slice_type);
77 static void SetVAPictureParameterBufferH264(VAPictureParameterBufferH264 *p, int width, int height);
78 static void SetVASliceParameterBufferH264(VASliceParameterBufferH264 *p);
79 static void SetVASliceParameterBufferH264_Intra(VASliceParameterBufferH264 *p, int first);
80 
81 static void put_updated_rectangle(rfbClient *client, int x, int y, int width, int height, int f_width, int f_height, int first_for_frame);
82 static void nv12_to_rgba(const VAImage vaImage, rfbClient *client, int ch_x, int ch_y, int ch_w, int ch_h);
83 
84 
85 /* FIXME: get this value from the server instead of hardcoding 32bit pixels */
86 #define BPP (4 * 8)
87 
string_of_FOURCC(uint32_t fourcc)88 static const char *string_of_FOURCC(uint32_t fourcc)
89 {
90     static int buf;
91     static char str[2][5];
92 
93     buf ^= 1;
94     str[buf][0] = fourcc;
95     str[buf][1] = fourcc >> 8;
96     str[buf][2] = fourcc >> 16;
97     str[buf][3] = fourcc >> 24;
98     str[buf][4] = '\0';
99     return str[buf];
100 }
101 
string_of_VAImageFormat(VAImageFormat * imgfmt)102 static inline const char *string_of_VAImageFormat(VAImageFormat *imgfmt)
103 {
104     return string_of_FOURCC(imgfmt->fourcc);
105 }
106 
107 
108 static rfbBool
HandleH264(rfbClient * client,int rx,int ry,int rw,int rh)109 HandleH264 (rfbClient* client, int rx, int ry, int rw, int rh)
110 {
111     rfbH264Header hdr;
112     char *framedata;
113 
114     DebugLog(("Framebuffer update with H264 (x: %d, y: %d, w: %d, h: %d)\n", rx, ry, rw, rh));
115 
116     /* First, read the frame size and allocate buffer to store the data */
117     if (!ReadFromRFBServer(client, (char *)&hdr, sz_rfbH264Header))
118         return FALSE;
119 
120     hdr.slice_type = rfbClientSwap32IfLE(hdr.slice_type);
121     hdr.nBytes = rfbClientSwap32IfLE(hdr.nBytes);
122     hdr.width = rfbClientSwap32IfLE(hdr.width);
123     hdr.height = rfbClientSwap32IfLE(hdr.height);
124 
125     framedata = (char*) malloc(hdr.nBytes);
126 
127     /* Obtain frame data from the server */
128     DebugLog(("Reading %d bytes of frame data (type: %d)\n", hdr.nBytes, hdr.slice_type));
129     if (!ReadFromRFBServer(client, framedata, hdr.nBytes))
130         return FALSE;
131 
132     /* First make sure we have a large enough raw buffer to hold the
133      * decompressed data.  In practice, with a fixed BPP, fixed frame
134      * buffer size and the first update containing the entire frame
135      * buffer, this buffer allocation should only happen once, on the
136      * first update.
137      */
138     if ( client->raw_buffer_size < (( rw * rh ) * ( BPP / 8 ))) {
139         if ( client->raw_buffer != NULL ) {
140             free( client->raw_buffer );
141         }
142 
143         client->raw_buffer_size = (( rw * rh ) * ( BPP / 8 ));
144         client->raw_buffer = (char*) malloc( client->raw_buffer_size );
145         rfbClientLog("Allocated raw buffer of %d bytes (%dx%dx%d BPP)\n", client->raw_buffer_size, rw, rh, BPP);
146     }
147 
148     /* Decode frame if frame data was sent. Server only sends frame data for the first
149      * framebuffer update message for a particular frame buffer contents.
150      * If more than 1 rectangle is updated, the messages after the first one (with
151      * the H.264 frame) have nBytes == 0.
152      */
153     if (hdr.nBytes > 0) {
154         DebugLog(("  decoding %d bytes of H.264 data\n", hdr.nBytes));
155         h264_decode_frame(hdr.width, hdr.height, framedata, hdr.nBytes, hdr.slice_type);
156     }
157 
158     DebugLog(("  updating rectangle (%d, %d)-(%d, %d)\n", rx, ry, rw, rh));
159     put_updated_rectangle(client, rx, ry, rw, rh, hdr.width, hdr.height, hdr.nBytes != 0);
160 
161     free(framedata);
162 
163     return TRUE;
164 }
165 
h264_cleanup_decoder()166 static void h264_cleanup_decoder()
167 {
168     VAStatus va_status;
169 
170     rfbClientLog("%s()\n", __FUNCTION__);
171 
172     if (va_surface_id[0] != VA_INVALID_ID) {
173         va_status = vaDestroySurfaces(va_dpy, &va_surface_id[0], SURFACE_NUM);
174         CHECK_VASTATUS(va_status, "vaDestroySurfaces");
175     }
176 
177     if (va_context_id) {
178         va_status = vaDestroyContext(va_dpy, va_context_id);
179         CHECK_VASTATUS(va_status, "vaDestroyContext");
180         va_context_id = 0;
181     }
182 
183     num_frames = 0;
184     sid = 0;
185     frame_id = 0;
186     field_order_count = 0;
187 }
188 
h264_init_decoder(int width,int height)189 static void h264_init_decoder(int width, int height)
190 {
191     VAStatus va_status;
192 
193     if (va_context_id) {
194         rfbClientLog("%s: va_dpy already initialized\n", __FUNCTION__);
195     }
196 
197     if (va_dpy != NULL) {
198         rfbClientLog("%s: Re-initializing H.264 decoder\n", __FUNCTION__);
199     }
200     else {
201         rfbClientLog("%s: initializing H.264 decoder\n", __FUNCTION__);
202 
203         /* Attach VA display to local X display */
204         Display *win_display = (Display *)XOpenDisplay(":0.0");
205         if (win_display == NULL) {
206             rfbClientErr("Can't connect to local display\n");
207             exit(-1);
208         }
209 
210         int major_ver, minor_ver;
211         va_dpy = vaGetDisplay(win_display);
212         va_status = vaInitialize(va_dpy, &major_ver, &minor_ver);
213         CHECK_VASTATUS(va_status, "vaInitialize");
214         rfbClientLog("%s: libva version %d.%d found\n", __FUNCTION__, major_ver, minor_ver);
215     }
216 
217     /* Check for VLD entrypoint */
218     int num_entrypoints;
219     VAEntrypoint    entrypoints[5];
220     int vld_entrypoint_found = 0;
221 
222     /* Change VAProfileH264High if needed */
223     VAProfile profile = VAProfileH264High;
224     va_status = vaQueryConfigEntrypoints(va_dpy, profile, entrypoints, &num_entrypoints);
225     CHECK_VASTATUS(va_status, "vaQueryConfigEntrypoints");
226     int i;
227     for (i = 0; i < num_entrypoints; ++i) {
228         if (entrypoints[i] == VAEntrypointVLD) {
229             vld_entrypoint_found = 1;
230             break;
231         }
232     }
233 
234     if (vld_entrypoint_found == 0) {
235         rfbClientErr("VLD entrypoint not found\n");
236         exit(1);
237     }
238 
239     /* Create configuration for the decode pipeline */
240     VAConfigAttrib attrib;
241     attrib.type = VAConfigAttribRTFormat;
242     va_status = vaCreateConfig(va_dpy, profile, VAEntrypointVLD, &attrib, 1, &va_config_id);
243     CHECK_VASTATUS(va_status, "vaCreateConfig");
244 
245     /* Create VA surfaces */
246     for (i = 0; i < SURFACE_NUM; ++i) {
247         va_surface_id[i]       = VA_INVALID_ID;
248         va_pic_param_buf_id[i] = VA_INVALID_ID;
249         va_mat_param_buf_id[i] = VA_INVALID_ID;
250         va_sp_param_buf_id[i]  = VA_INVALID_ID;
251         va_d_param_buf_id[i]   = VA_INVALID_ID;
252     }
253     va_status = vaCreateSurfaces(va_dpy, width, height, VA_RT_FORMAT_YUV420, SURFACE_NUM, &va_surface_id[0]);
254     CHECK_VASTATUS(va_status, "vaCreateSurfaces");
255     for (i = 0; i < SURFACE_NUM; ++i) {
256         DebugLog(("%s: va_surface_id[%d] = %p\n", __FUNCTION__, i, va_surface_id[i]));
257     }
258 
259     /* Create VA context */
260     va_status = vaCreateContext(va_dpy, va_config_id, width, height, 0/*VA_PROGRESSIVE*/,  &va_surface_id[0], SURFACE_NUM, &va_context_id);
261     CHECK_VASTATUS(va_status, "vaCreateContext");
262     DebugLog(("%s: VA context created (id: %d)\n", __FUNCTION__, va_context_id));
263 
264 
265     /* Instantiate decode pipeline */
266     va_status = vaBeginPicture(va_dpy, va_context_id, va_surface_id[0]);
267     CHECK_VASTATUS(va_status, "vaBeginPicture");
268 
269     rfbClientLog("%s: H.264 decoder initialized\n", __FUNCTION__);
270 }
271 
h264_decode_frame(int f_width,int f_height,char * framedata,int framesize,int slice_type)272 static void h264_decode_frame(int f_width, int f_height, char *framedata, int framesize, int slice_type)
273 {
274     VAStatus va_status;
275 
276     DebugLog(("%s: called for frame of %d bytes (%dx%d) slice_type=%d\n", __FUNCTION__, framesize, width, height, slice_type));
277 
278     /* Initialize decode pipeline if necessary */
279     if ( (f_width > cur_width) || (f_height > cur_height) ) {
280         if (va_dpy != NULL)
281             h264_cleanup_decoder();
282         cur_width = f_width;
283         cur_height = f_height;
284 
285         h264_init_decoder(f_width, f_height);
286         rfbClientLog("%s: decoder initialized\n", __FUNCTION__);
287     }
288 
289     /* Decode frame */
290     static VAPictureH264 va_picture_h264, va_old_picture_h264;
291 
292     /* The server should always send an I-frame when a new client connects
293      * or when the resolution of the framebuffer changes, but we check
294      * just in case.
295      */
296     if ( (slice_type != SLICE_TYPE_I) && (num_frames == 0) ) {
297         rfbClientLog("First frame is not an I frame !!! Skipping!!!\n");
298         return;
299     }
300 
301     DebugLog(("%s: frame_id=%d va_surface_id[%d]=0x%x field_order_count=%d\n", __FUNCTION__, frame_id, sid, va_surface_id[sid], field_order_count));
302 
303     va_picture_h264.picture_id = va_surface_id[sid];
304     va_picture_h264.frame_idx  = frame_id;
305     va_picture_h264.flags = 0;
306     va_picture_h264.BottomFieldOrderCnt = field_order_count;
307     va_picture_h264.TopFieldOrderCnt = field_order_count;
308 
309     /* Set up picture parameter buffer */
310     if (va_pic_param_buf_id[sid] == VA_INVALID_ID) {
311         va_status = vaCreateBuffer(va_dpy, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferH264), 1, NULL, &va_pic_param_buf_id[sid]);
312         CHECK_VASTATUS(va_status, "vaCreateBuffer(PicParam)");
313     }
314     CHECK_SURF(va_surface_id[sid]);
315 
316     VAPictureParameterBufferH264 *pic_param_buf = NULL;
317     va_status = vaMapBuffer(va_dpy, va_pic_param_buf_id[sid], (void **)&pic_param_buf);
318     CHECK_VASTATUS(va_status, "vaMapBuffer(PicParam)");
319 
320     SetVAPictureParameterBufferH264(pic_param_buf, f_width, f_height);
321     memcpy(&pic_param_buf->CurrPic, &va_picture_h264, sizeof(VAPictureH264));
322 
323     if (slice_type == SLICE_TYPE_P) {
324         memcpy(&pic_param_buf->ReferenceFrames[0], &va_old_picture_h264, sizeof(VAPictureH264));
325         pic_param_buf->ReferenceFrames[0].flags = 0;
326     }
327     else if (slice_type != SLICE_TYPE_I) {
328         rfbClientLog("Frame type %d not supported!!!\n");
329         return;
330     }
331     pic_param_buf->frame_num = frame_id;
332 
333     va_status = vaUnmapBuffer(va_dpy, va_pic_param_buf_id[sid]);
334     CHECK_VASTATUS(va_status, "vaUnmapBuffer(PicParam)");
335 
336     /* Set up IQ matrix buffer */
337     if (va_mat_param_buf_id[sid] == VA_INVALID_ID) {
338         va_status = vaCreateBuffer(va_dpy, va_context_id, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferH264), 1, NULL, &va_mat_param_buf_id[sid]);
339         CHECK_VASTATUS(va_status, "vaCreateBuffer(IQMatrix)");
340     }
341     CHECK_SURF(va_surface_id[sid]);
342 
343     VAIQMatrixBufferH264 *iq_matrix_buf = NULL;
344     va_status = vaMapBuffer(va_dpy, va_mat_param_buf_id[sid], (void **)&iq_matrix_buf);
345     CHECK_VASTATUS(va_status, "vaMapBuffer(IQMatrix)");
346 
347     static const unsigned char m_MatrixBufferH264[]= {
348         /* ScalingList4x4[6][16] */
349         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
350         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
351         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
352         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
353         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
354         0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
355         /* ScalingList8x8[2][64] */
356         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371         0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
372     };
373 
374     memcpy(iq_matrix_buf, m_MatrixBufferH264, 224);
375     va_status = vaUnmapBuffer(va_dpy, va_mat_param_buf_id[sid]);
376     CHECK_VASTATUS(va_status, "vaUnmapBuffer(IQMatrix)");
377 
378     VABufferID buffer_ids[2];
379     buffer_ids[0] = va_pic_param_buf_id[sid];
380     buffer_ids[1] = va_mat_param_buf_id[sid];
381 
382     CHECK_SURF(va_surface_id[sid]);
383     va_status = vaRenderPicture(va_dpy, va_context_id, buffer_ids, 2);
384     CHECK_VASTATUS(va_status, "vaRenderPicture");
385 
386     /* Set up slice parameter buffer */
387     if (va_sp_param_buf_id[sid] == VA_INVALID_ID) {
388         va_status = vaCreateBuffer(va_dpy, va_context_id, VASliceParameterBufferType, sizeof(VASliceParameterBufferH264), 1, NULL, &va_sp_param_buf_id[sid]);
389         CHECK_VASTATUS(va_status, "vaCreateBuffer(SliceParam)");
390     }
391     CHECK_SURF(va_surface_id[sid]);
392 
393     VASliceParameterBufferH264 *slice_param_buf = NULL;
394     va_status = vaMapBuffer(va_dpy, va_sp_param_buf_id[sid], (void **)&slice_param_buf);
395     CHECK_VASTATUS(va_status, "vaMapBuffer(SliceParam)");
396 
397     static int t2_first = 1;
398     if (slice_type == SLICE_TYPE_I) {
399         SetVASliceParameterBufferH264_Intra(slice_param_buf, t2_first);
400         t2_first = 0;
401     } else {
402         SetVASliceParameterBufferH264(slice_param_buf);
403         memcpy(&slice_param_buf->RefPicList0[0], &va_old_picture_h264, sizeof(VAPictureH264));
404         slice_param_buf->RefPicList0[0].flags = 0;
405     }
406     slice_param_buf->slice_data_bit_offset = 0;
407     slice_param_buf->slice_data_size = framesize;
408 
409     va_status = vaUnmapBuffer(va_dpy, va_sp_param_buf_id[sid]);
410     CHECK_VASTATUS(va_status, "vaUnmapBuffer(SliceParam)");
411     CHECK_SURF(va_surface_id[sid]);
412 
413     /* Set up slice data buffer and copy H.264 encoded data */
414     if (va_d_param_buf_id[sid] == VA_INVALID_ID) {
415         /* TODO use estimation matching framebuffer dimensions instead of this large value */
416         va_status = vaCreateBuffer(va_dpy, va_context_id, VASliceDataBufferType, 4177920, 1, NULL, &va_d_param_buf_id[sid]); /* 1080p size */
417         CHECK_VASTATUS(va_status, "vaCreateBuffer(SliceData)");
418     }
419 
420     char *slice_data_buf;
421     va_status = vaMapBuffer(va_dpy, va_d_param_buf_id[sid], (void **)&slice_data_buf);
422     CHECK_VASTATUS(va_status, "vaMapBuffer(SliceData)");
423     memcpy(slice_data_buf, framedata, framesize);
424 
425     CHECK_SURF(va_surface_id[sid]);
426     va_status = vaUnmapBuffer(va_dpy, va_d_param_buf_id[sid]);
427     CHECK_VASTATUS(va_status, "vaUnmapBuffer(SliceData)");
428 
429     buffer_ids[0] = va_sp_param_buf_id[sid];
430     buffer_ids[1] = va_d_param_buf_id[sid];
431 
432     CHECK_SURF(va_surface_id[sid]);
433     va_status = vaRenderPicture(va_dpy, va_context_id, buffer_ids, 2);
434     CHECK_VASTATUS(va_status, "vaRenderPicture");
435 
436     va_status = vaEndPicture(va_dpy, va_context_id);
437     CHECK_VASTATUS(va_status, "vaEndPicture");
438 
439     /* Prepare next one... */
440     int sid_new = (sid + 1) % SURFACE_NUM;
441     DebugLog(("%s: new Surface ID = %d\n", __FUNCTION__, sid_new));
442     va_status = vaBeginPicture(va_dpy, va_context_id, va_surface_id[sid_new]);
443     CHECK_VASTATUS(va_status, "vaBeginPicture");
444 
445     /* Get decoded data */
446     va_status = vaSyncSurface(va_dpy, va_surface_id[sid]);
447     CHECK_VASTATUS(va_status, "vaSyncSurface");
448     CHECK_SURF(va_surface_id[sid]);
449 
450     curr_surface = va_surface_id[sid];
451 
452     sid = sid_new;
453 
454     field_order_count += 2;
455     ++frame_id;
456     if (frame_id > 15) {
457         frame_id = 0;
458     }
459 
460     ++num_frames;
461 
462     memcpy(&va_old_picture_h264, &va_picture_h264, sizeof(VAPictureH264));
463 }
464 
put_updated_rectangle(rfbClient * client,int x,int y,int width,int height,int f_width,int f_height,int first_for_frame)465 static void put_updated_rectangle(rfbClient *client, int x, int y, int width, int height, int f_width, int f_height, int first_for_frame)
466 {
467     if (curr_surface == VA_INVALID_ID) {
468         rfbClientErr("%s: called, but current surface is invalid\n", __FUNCTION__);
469         return;
470     }
471 
472     VAStatus va_status;
473 
474     if (client->outputWindow) {
475         /* use efficient vaPutSurface() method of putting the framebuffer on the screen */
476         if (first_for_frame) {
477             /* vaPutSurface() clears window contents outside the given destination rectangle => always update full screen. */
478             va_status = vaPutSurface(va_dpy, curr_surface, client->outputWindow, 0, 0, f_width, f_height, 0, 0, f_width, f_height, NULL, 0, VA_FRAME_PICTURE);
479             CHECK_VASTATUS(va_status, "vaPutSurface");
480         }
481     }
482     else if (client->frameBuffer) {
483         /* ... or copy the changed framebuffer region manually as a fallback */
484         VAImage decoded_image;
485         decoded_image.image_id = VA_INVALID_ID;
486         decoded_image.buf      = VA_INVALID_ID;
487         va_status = vaDeriveImage(va_dpy, curr_surface, &decoded_image);
488         CHECK_VASTATUS(va_status, "vaDeriveImage");
489 
490         if ((decoded_image.image_id == VA_INVALID_ID) || (decoded_image.buf == VA_INVALID_ID)) {
491             rfbClientErr("%s: vaDeriveImage() returned success but VA image is invalid (id: %d, buf: %d)\n", __FUNCTION__, decoded_image.image_id, decoded_image.buf);
492         }
493 
494         nv12_to_rgba(decoded_image, client, x, y, width, height);
495 
496         va_status = vaDestroyImage(va_dpy, decoded_image.image_id);
497         CHECK_VASTATUS(va_status, "vaDestroyImage");
498     }
499 }
500 
SetVAPictureParameterBufferH264(VAPictureParameterBufferH264 * p,int width,int height)501 static void SetVAPictureParameterBufferH264(VAPictureParameterBufferH264 *p, int width, int height)
502 {
503     int i;
504     unsigned int width_in_mbs = (width + 15) / 16;
505     unsigned int height_in_mbs = (height + 15) / 16;
506 
507     memset(p, 0, sizeof(VAPictureParameterBufferH264));
508     p->picture_width_in_mbs_minus1 = width_in_mbs - 1;
509     p->picture_height_in_mbs_minus1 = height_in_mbs - 1;
510     p->num_ref_frames = 1;
511     p->seq_fields.value = 145;
512     p->pic_fields.value = 0x501;
513     for (i = 0; i < 16; i++) {
514         p->ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
515         p->ReferenceFrames[i].picture_id = 0xffffffff;
516     }
517 }
518 
SetVASliceParameterBufferH264(VASliceParameterBufferH264 * p)519 static void SetVASliceParameterBufferH264(VASliceParameterBufferH264 *p)
520 {
521     int i;
522     memset(p, 0, sizeof(VASliceParameterBufferH264));
523     p->slice_data_size = 0;
524     p->slice_data_bit_offset = 64;
525     p->slice_alpha_c0_offset_div2 = 2;
526     p->slice_beta_offset_div2 = 2;
527     p->chroma_weight_l0_flag = 1;
528     p->chroma_weight_l0[0][0]=1;
529     p->chroma_offset_l0[0][0]=0;
530     p->chroma_weight_l0[0][1]=1;
531     p->chroma_offset_l0[0][1]=0;
532     p->luma_weight_l1_flag = 1;
533     p->chroma_weight_l1_flag = 1;
534     p->luma_weight_l0[0]=0x01;
535     for (i = 0; i < 32; i++) {
536         p->RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
537         p->RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
538     }
539     p->RefPicList1[0].picture_id = 0xffffffff;
540 }
541 
SetVASliceParameterBufferH264_Intra(VASliceParameterBufferH264 * p,int first)542 static void SetVASliceParameterBufferH264_Intra(VASliceParameterBufferH264 *p, int first)
543 {
544     int i;
545     memset(p, 0, sizeof(VASliceParameterBufferH264));
546     p->slice_data_size = 0;
547     p->slice_data_bit_offset = 64;
548     p->slice_alpha_c0_offset_div2 = 2;
549     p->slice_beta_offset_div2 = 2;
550     p->slice_type = 2;
551     if (first) {
552         p->luma_weight_l0_flag = 1;
553         p->chroma_weight_l0_flag = 1;
554         p->luma_weight_l1_flag = 1;
555         p->chroma_weight_l1_flag = 1;
556     } else {
557         p->chroma_weight_l0_flag = 1;
558         p->chroma_weight_l0[0][0]=1;
559         p->chroma_offset_l0[0][0]=0;
560         p->chroma_weight_l0[0][1]=1;
561         p->chroma_offset_l0[0][1]=0;
562         p->luma_weight_l1_flag = 1;
563         p->chroma_weight_l1_flag = 1;
564         p->luma_weight_l0[0]=0x01;
565     }
566     for (i = 0; i < 32; i++) {
567         p->RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
568         p->RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
569     }
570     p->RefPicList1[0].picture_id = 0xffffffff;
571     p->RefPicList0[0].picture_id = 0xffffffff;
572 }
573 
nv12_to_rgba(const VAImage vaImage,rfbClient * client,int ch_x,int ch_y,int ch_w,int ch_h)574 static void nv12_to_rgba(const VAImage vaImage, rfbClient *client, int ch_x, int ch_y, int ch_w, int ch_h)
575 {
576     DebugLog(("%s: converting region (%d, %d)-(%d, %d) from NV12->RGBA\n", __FUNCTION__, ch_x, ch_y, ch_w, ch_h));
577 
578     VAStatus va_status;
579     uint8_t *nv12_buf;
580     va_status = vaMapBuffer(va_dpy, vaImage.buf, (void **)&nv12_buf);
581     CHECK_VASTATUS(va_status, "vaMapBuffer(DecodedData)");
582 
583     /* adjust x, y, width, height of the affected area so
584      * x, y, width and height are always even.
585      */
586     if (ch_x % 2) { --ch_x; ++ch_w; }
587     if (ch_y % 2) { --ch_y; ++ch_h; }
588     if ((ch_x + ch_w) % 2) { ++ch_w; }
589     if ((ch_y + ch_h) % 2) { ++ch_h; }
590 
591     /* point nv12_buf and dst to upper left corner of changed area */
592     uint8_t *nv12_y  = &nv12_buf[vaImage.offsets[0] + vaImage.pitches[0] * ch_y + ch_x];
593     uint8_t *nv12_uv = &nv12_buf[vaImage.offsets[1] + vaImage.pitches[1] * (ch_y / 2) + ch_x];
594     uint32_t *dst    = &((uint32_t*)client->frameBuffer)[client->width * ch_y + ch_x];
595 
596     /* TODO: optimize R, G, B calculation. Possible ways to do this:
597      *       - use lookup tables
598      *       - convert from floating point to integer arithmetic
599      *       - use MMX/SSE to vectorize calculations
600      *       - use GPU (VA VPP, shader...)
601      */
602     int src_x, src_y;
603     for (src_y = 0; src_y < ch_h; src_y += 2) {
604         for (src_x = 0; src_x < ch_w; src_x += 2) {
605             uint8_t nv_u = nv12_uv[src_x];
606             uint8_t nv_v = nv12_uv[src_x + 1];
607             uint8_t nv_y[4] = { nv12_y[                     src_x], nv12_y[                     src_x + 1],
608                                 nv12_y[vaImage.pitches[0] + src_x], nv12_y[vaImage.pitches[0] + src_x + 1] };
609 
610         int i;
611             for (i = 0; i < 4; ++i) {
612                 double R = 1.164 * (nv_y[i] - 16)                        + 1.596 * (nv_v - 128);
613                 double G = 1.164 * (nv_y[i] - 16) - 0.391 * (nv_u - 128) - 0.813 * (nv_v - 128);
614                 double B = 1.164 * (nv_y[i] - 16) + 2.018 * (nv_u - 128);
615 
616                 /* clamp R, G, B values. For some Y, U, V combinations,
617                  * the results of the above calculations fall outside of
618                  * the range 0-255.
619                  */
620                 if (R < 0.0) R = 0.0;
621                 if (G < 0.0) G = 0.0;
622                 if (B < 0.0) B = 0.0;
623                 if (R > 255.0) R = 255.0;
624                 if (G > 255.0) G = 255.0;
625                 if (B > 255.0) B = 255.0;
626 
627                 dst[client->width * (i / 2) + src_x + (i % 2)] = 0
628                                | ((unsigned int)(R + 0.5) << client->format.redShift)
629                                | ((unsigned int)(G + 0.5) << client->format.greenShift)
630                                | ((unsigned int)(B + 0.5) << client->format.blueShift);
631             }
632         }
633 
634         nv12_y  += 2 * vaImage.pitches[0];
635         nv12_uv += vaImage.pitches[1];
636         dst     += 2 * client->width;
637     }
638 
639     CHECK_SURF(va_surface_id[sid]);
640     va_status = vaUnmapBuffer(va_dpy, vaImage.buf);
641     CHECK_VASTATUS(va_status, "vaUnmapBuffer(DecodedData)");
642 }
643 
644 #endif /* LIBVNCSERVER_CONFIG_LIBVA */
645