1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "host-common/MediaH264DecoderCuvid.h"
16 #include "host-common/H264NaluParser.h"
17 #include "host-common/YuvConverter.h"
18 #include "android/main-emugl.h"
19 // MediaH264DecoderCuvid.h
20 #include <cstdint>
21 #include <string>
22 #include <vector>
23 
24 #ifdef _WIN32
25 #define WIN32_LEAN_AND_MEAN 1
26 #include <windows.h>
27 #include <winioctl.h>
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 
33 extern "C" {
34 #define INIT_CUDA_GL 1
35 #include "host-common/dynlink_cuda.h"
36 #include "host-common/dynlink_cudaGL.h"
37 #include "host-common/dynlink_nvcuvid.h"
38 }
39 #define MEDIA_H264_DEBUG 0
40 
41 #if MEDIA_H264_DEBUG
42 #define H264_DPRINT(fmt, ...)                                              \
43     fprintf(stderr, "h264-cuvid-dec: %s:%d " fmt "\n", __func__, __LINE__, \
44             ##__VA_ARGS__);
45 #else
46 #define H264_DPRINT(fmt, ...)
47 #endif
48 
49 #define NVDEC_API_CALL(cuvidAPI)                                     \
50     do {                                                             \
51         CUresult errorCode = cuvidAPI;                               \
52         if (errorCode != CUDA_SUCCESS) {                             \
53             H264_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
54                         (int)errorCode);                             \
55         }                                                            \
56     } while (0)
57 
58 namespace android {
59 namespace emulation {
60 
61 using InitContextParam = H264PingInfoParser::InitContextParam;
62 using DecodeFrameParam = H264PingInfoParser::DecodeFrameParam;
63 using ResetParam = H264PingInfoParser::ResetParam;
64 using GetImageParam = H264PingInfoParser::GetImageParam;
65 using TextureFrame = MediaHostRenderer::TextureFrame;
66 
MediaH264DecoderCuvid(uint64_t id,H264PingInfoParser parser)67 MediaH264DecoderCuvid::MediaH264DecoderCuvid(uint64_t id,
68                                              H264PingInfoParser parser)
69     : mId(id), mParser(parser) {
70     auto useGpuTextureEnv = android::base::System::getEnvironmentVariable(
71             "ANDROID_EMU_CODEC_USE_GPU_TEXTURE");
72     if (useGpuTextureEnv != "") {
73         if (mParser.version() == 200) {
74             if (emuglConfig_get_current_renderer() == SELECTED_RENDERER_HOST) {
75                 mUseGpuTexture = true;
76             } else {
77                 H264_DPRINT(
78                         "cannot use gpu texture to save decoded frame in "
79                         "non-host gpu mode");
80                 if (emuglConfig_get_current_renderer() ==
81                     SELECTED_RENDERER_SWIFTSHADER_INDIRECT) {
82                     H264_DPRINT("your gpu mode is: swiftshader_indirect");
83                 }
84             }
85         }
86     }
87 };
88 
clone()89 MediaH264DecoderPlugin* MediaH264DecoderCuvid::clone() {
90     return new MediaH264DecoderCuvid(mId, mParser);
91 };
92 
~MediaH264DecoderCuvid()93 MediaH264DecoderCuvid::~MediaH264DecoderCuvid() {
94     destroyH264Context();
95 }
96 
reset(void * ptr)97 void MediaH264DecoderCuvid::reset(void* ptr) {
98     destroyH264Context();
99     ResetParam param{};
100     mParser.parseResetParams(ptr, param);
101     initH264ContextInternal(param.width, param.height, param.outputWidth,
102                             param.outputHeight, param.outputPixelFormat);
103 }
104 
initH264Context(void * ptr)105 void MediaH264DecoderCuvid::initH264Context(void* ptr) {
106     InitContextParam param{};
107     mParser.parseInitContextParams(ptr, param);
108     initH264ContextInternal(param.width, param.height, param.outputWidth,
109                             param.outputHeight, param.outputPixelFormat);
110 }
111 
initH264ContextInternal(unsigned int width,unsigned int height,unsigned int outWidth,unsigned int outHeight,PixelFormat outPixFmt)112 void MediaH264DecoderCuvid::initH264ContextInternal(unsigned int width,
113                                                     unsigned int height,
114                                                     unsigned int outWidth,
115                                                     unsigned int outHeight,
116                                                     PixelFormat outPixFmt) {
117     if (!initCudaDrivers()) {
118         H264_DPRINT("Failed to initH264Context because driver is not working");
119         return;
120     }
121 
122     if (mCudaContext != nullptr) {
123         destroyH264Context();
124     }
125     H264_DPRINT("%s(w=%u h=%u out_w=%u out_h=%u pixfmt=%u)", __func__, width,
126                 height, outWidth, outHeight, (uint8_t)outPixFmt);
127     mWidth = width;
128     mHeight = height;
129 
130     mOutputWidth = outWidth;
131     mOutputHeight = outHeight;
132     mOutPixFmt = outPixFmt;
133     mOutBufferSize = outWidth * outHeight * 3 / 2;
134 
135     // cudat stuff
136     const int gpuIndex = 0;
137     const int cudaFlags = 0;
138     CUdevice cudaDevice = 0;
139     CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
140     if (myres != CUDA_SUCCESS) {
141         H264_DPRINT("Failed to get cuda device, error code %d", (int)myres);
142         return;
143     }
144 
145     char buf[1024];
146     myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
147     if (myres != CUDA_SUCCESS) {
148         H264_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
149         return;
150     }
151 
152     H264_DPRINT("using gpu device %s", buf);
153 
154     myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
155     if (myres != CUDA_SUCCESS) {
156         H264_DPRINT("Failed to create cuda context, error code %d", (int)myres);
157     }
158 
159     NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
160 
161     CUVIDPARSERPARAMS videoParserParameters = {};
162     videoParserParameters.CodecType = cudaVideoCodec_H264;
163     videoParserParameters.ulMaxNumDecodeSurfaces = 1;
164     videoParserParameters.ulMaxDisplayDelay = 1;
165     videoParserParameters.pUserData = this;
166     videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
167     videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
168     videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
169     NVDEC_API_CALL(
170             cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
171 
172     H264_DPRINT("Successfully created cuda context %p", mCudaContext);
173 }
174 
destroyH264Context()175 void MediaH264DecoderCuvid::destroyH264Context() {
176     H264_DPRINT("destroyH264Context calling");
177 
178     for (auto texFrame : mSavedTexFrames) {
179             mRenderer.putTextureFrame(texFrame);
180     }
181     mRenderer.cleanUpTextures();
182     mSavedTexFrames.clear();
183     if (mCudaContext != nullptr) {
184         NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
185         if (mCudaParser != nullptr) {
186             NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
187             mCudaParser = nullptr;
188         }
189 
190         if (mCudaDecoder != nullptr) {
191             NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
192             mCudaDecoder = nullptr;
193         }
194         NVDEC_API_CALL(cuCtxPopCurrent(NULL));
195         NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
196     }
197 
198     if (mCudaContext != nullptr) {
199         CUresult myres = cuCtxDestroy(mCudaContext);
200         if (myres != CUDA_SUCCESS) {
201             H264_DPRINT("Failed to destroy cuda context; error code %d",
202                         (int)myres);
203         }
204         mCudaContext = nullptr;
205     }
206 }
207 
decodeFrame(void * ptr)208 void MediaH264DecoderCuvid::decodeFrame(void* ptr) {
209     DecodeFrameParam param{};
210     mParser.parseDecodeFrameParams(ptr, param);
211 
212     const uint8_t* frame = param.pData;
213     size_t szBytes = param.size;
214     uint64_t inputPts = param.pts;
215 
216     const bool enableSnapshot = true;
217     if (enableSnapshot) {
218         std::vector<uint8_t> v;
219         v.assign(frame, frame + szBytes);
220         bool hasSps = H264NaluParser::checkSpsFrame(frame, szBytes);
221         if (hasSps) {
222             mSnapshotState = SnapshotState{};
223             mSnapshotState.saveSps(v);
224         } else {
225             bool hasPps = H264NaluParser::checkPpsFrame(frame, szBytes);
226             if (hasPps) {
227                 mSnapshotState.savePps(v);
228                 mSnapshotState.savedPackets.clear();
229                 mSnapshotState.savedDecodedFrame.data.clear();
230             } else {
231                 bool isIFrame = H264NaluParser::checkIFrame(frame, szBytes);
232                 if (isIFrame) {
233                     mSnapshotState.savedPackets.clear();
234                 }
235                 mSnapshotState.savePacket(std::move(v), inputPts);
236                 H264_DPRINT("saving packet; total is %d",
237                             (int)(mSnapshotState.savedPackets.size()));
238             }
239         }
240     }
241 
242     decodeFrameInternal(param.pConsumedBytes, param.pDecoderErrorCode, frame,
243                         szBytes, inputPts);
244 }
245 
decodeFrameInternal(uint64_t * pRetSzBytes,int32_t * pRetErr,const uint8_t * frame,size_t szBytes,uint64_t inputPts)246 void MediaH264DecoderCuvid::decodeFrameInternal(uint64_t* pRetSzBytes,
247                                                 int32_t* pRetErr,
248                                                 const uint8_t* frame,
249                                                 size_t szBytes,
250                                                 uint64_t inputPts) {
251     mIsInFlush = false;
252     H264_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
253     Err h264Err = Err::NoErr;
254 
255     CUVIDSOURCEDATAPACKET packet = {0};
256     packet.payload = frame;
257     packet.payload_size = szBytes;
258     packet.flags = CUVID_PKT_TIMESTAMP;
259     packet.timestamp = inputPts;
260     if (!frame || szBytes == 0) {
261         packet.flags |= CUVID_PKT_ENDOFSTREAM;
262     }
263     NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
264     if (pRetSzBytes) {
265         *pRetSzBytes = szBytes;
266     }
267     if (pRetErr) {
268         *pRetErr = (int32_t)h264Err;
269     }
270 }
271 
doFlush()272 void MediaH264DecoderCuvid::doFlush() {
273     if (!mIsInFlush) {
274         return;
275     }
276     H264_DPRINT("started flushing");
277     CUVIDSOURCEDATAPACKET packet = {0};
278     packet.payload = NULL;
279     packet.payload_size = 0;
280     packet.flags |= CUVID_PKT_ENDOFSTREAM;
281     NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
282     H264_DPRINT("done one flushing");
283 }
284 
flush(void * ptr)285 void MediaH264DecoderCuvid::flush(void* ptr) {
286     mIsInFlush = true;
287     doFlush();
288 }
289 
getImage(void * ptr)290 void MediaH264DecoderCuvid::getImage(void* ptr) {
291     H264_DPRINT("getImage %p", ptr);
292     GetImageParam param{};
293     mParser.parseGetImageParams(ptr, param);
294 
295     int* retErr = param.pDecoderErrorCode;
296     uint32_t* retWidth = param.pRetWidth;
297     uint32_t* retHeight = param.pRetHeight;
298     uint64_t* retPts = param.pRetPts;
299     uint32_t* retColorPrimaries = param.pRetColorPrimaries;
300     uint32_t* retColorRange = param.pRetColorRange;
301     uint32_t* retColorTransfer = param.pRetColorTransfer;
302     uint32_t* retColorSpace = param.pRetColorSpace;
303 
304     static int numbers = 0;
305     H264_DPRINT("calling getImage %d colorbuffer %d", numbers++,
306                 (int)param.hostColorBufferId);
307     doFlush();
308     uint8_t* dst = param.pDecodedFrame;
309     int myOutputWidth = mOutputWidth;
310     int myOutputHeight = mOutputHeight;
311     std::vector<uint8_t> decodedFrame;
312     TextureFrame decodedTexFrame;
313     {
314         std::lock_guard<std::mutex> g(mFrameLock);
315         mImageReady = !mSavedFrames.empty();
316         if (!mImageReady) {
317             H264_DPRINT("%s: no new frame yet", __func__);
318             *retErr = static_cast<int>(Err::NoDecodedFrame);
319             return;
320         }
321 
322         std::vector<uint8_t>& myFrame = mSavedFrames.front();
323         std::swap(decodedFrame, myFrame);
324         decodedTexFrame = mSavedTexFrames.front();
325         mOutputPts = mSavedPts.front();
326 
327         myOutputWidth = mSavedW.front();
328         myOutputHeight = mSavedH.front();
329         *retWidth = myOutputWidth;
330         *retHeight = myOutputHeight;
331 
332         mSavedFrames.pop_front();
333         mSavedTexFrames.pop_front();
334         mSavedPts.pop_front();
335         mSavedW.pop_front();
336         mSavedH.pop_front();
337     }
338 
339     bool needToCopyToGuest = true;
340 
341     if (mUseGpuTexture) {
342         needToCopyToGuest = false;
343     } else {
344         YuvConverter<uint8_t> convert8(myOutputWidth, myOutputHeight);
345         convert8.UVInterleavedToPlanar(decodedFrame.data());
346     }
347 
348     if (mParser.version() == 200) {
349         if (param.hostColorBufferId >= 0) {
350             needToCopyToGuest = false;
351             if (mUseGpuTexture) {
352                 mRenderer.renderToHostColorBufferWithTextures(
353                         param.hostColorBufferId, myOutputWidth, myOutputHeight,
354                         decodedTexFrame);
355             } else {
356                 mRenderer.renderToHostColorBuffer(param.hostColorBufferId,
357                                                   myOutputWidth, myOutputHeight,
358                                                   decodedFrame.data());
359             }
360         } else {
361             if (mUseGpuTexture) {
362                 // no colorbuffer to send the textures to, just recycle
363                 // them back to Renderer
364                 mRenderer.putTextureFrame(decodedTexFrame);
365             }
366         }
367     }
368 
369     if (needToCopyToGuest) {
370         memcpy(dst, decodedFrame.data(),
371                myOutputHeight * myOutputWidth * 3 / 2);
372     }
373 
374     mImageReady = false;
375     *retErr = myOutputHeight * myOutputWidth * 3 / 2;
376     *retPts = mOutputPts;
377     *retColorPrimaries = mColorPrimaries;
378     *retColorRange = mColorRange;
379     *retColorTransfer = mColorTransfer;
380     *retColorSpace = mColorSpace;
381     H264_DPRINT("Frame primary %d range %d transfer %d space %d",
382                 (int)mColorPrimaries, (int)mColorRange, (int)mColorTransfer,
383                 (int)mColorSpace);
384     H264_DPRINT("Copying completed pts %lld", (long long)mOutputPts);
385 }
386 
initCudaDrivers()387 bool MediaH264DecoderCuvid::initCudaDrivers() {
388     if (s_isCudaInitialized) {
389         return true;
390     }
391 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
392     typedef HMODULE CUDADRIVER;
393 #else
394     typedef void* CUDADRIVER;
395 #endif
396     CUDADRIVER hHandleDriver = 0;
397     if (CUDA_SUCCESS != cuInit(0, __CUDA_API_VERSION, hHandleDriver)) {
398         fprintf(stderr,
399                 "Failed to call cuInit, cannot use nvidia cuvid decoder for "
400                 "h264 stream\n");
401         return false;
402     }
403     if (CUDA_SUCCESS != cuvidInit(0)) {
404         fprintf(stderr,
405                 "Failed to call cuvidInit, cannot use nvidia cuvid decoder for "
406                 "h264 stream\n");
407         return false;
408     }
409 
410     int numGpuCards = 0;
411     CUresult myres = cuDeviceGetCount(&numGpuCards);
412     if (myres != CUDA_SUCCESS) {
413         H264_DPRINT(
414                 "Failed to get number of GPU cards installed on host; error "
415                 "code %d",
416                 (int)myres);
417         return false;
418     }
419 
420     if (numGpuCards <= 0) {
421         H264_DPRINT("There are no nvidia GPU cards on this host.");
422         return false;
423     }
424 
425     // lukily, we get cuda initialized.
426     s_isCudaInitialized = true;
427 
428     return true;
429 }
430 
HandleVideoSequence(CUVIDEOFORMAT * pVideoFormat)431 int MediaH264DecoderCuvid::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
432     int nDecodeSurface = 8;  // need 8 for 4K video
433 
434     CUVIDDECODECAPS decodecaps;
435     memset(&decodecaps, 0, sizeof(decodecaps));
436 
437     decodecaps.eCodecType = pVideoFormat->codec;
438     decodecaps.eChromaFormat = pVideoFormat->chroma_format;
439     decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
440 
441     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
442     NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
443     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
444 
445     if (!decodecaps.bIsSupported) {
446         H264_DPRINT("Codec not supported on this GPU.");
447         return nDecodeSurface;
448     }
449 
450     if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
451         (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
452         H264_DPRINT("Resolution not supported on this GPU");
453         return nDecodeSurface;
454     }
455 
456     if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
457         decodecaps.nMaxMBCount) {
458         H264_DPRINT("MBCount not supported on this GPU");
459         return nDecodeSurface;
460     }
461 
462     mLumaWidth =
463             pVideoFormat->display_area.right - pVideoFormat->display_area.left;
464     mLumaHeight =
465             pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
466     mChromaHeight = mLumaHeight * 0.5;  // NV12
467     mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
468 
469     if (pVideoFormat->video_signal_description.video_full_range_flag)
470         mColorRange = 2;
471     else
472         mColorRange = 0;
473 
474     mColorPrimaries = pVideoFormat->video_signal_description.color_primaries;
475     mColorTransfer =
476             pVideoFormat->video_signal_description.transfer_characteristics;
477     mColorSpace = pVideoFormat->video_signal_description.matrix_coefficients;
478 
479     CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
480     videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
481     videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
482     videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
483     H264_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
484     videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
485     if (pVideoFormat->progressive_sequence)
486         videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
487     else
488         videoDecodeCreateInfo.DeinterlaceMode =
489                 cudaVideoDeinterlaceMode_Adaptive;
490     videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
491     // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
492     // NVDEC hardware
493     videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
494     videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
495     videoDecodeCreateInfo.vidLock = mCtxLock;
496     videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
497     videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
498     if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
499         H264_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
500         mOutputWidth = mLumaWidth;
501         mOutputHeight = mLumaHeight;
502         H264_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
503         unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
504         if (mOutBufferSize < newOutBufferSize) {
505             mOutBufferSize = newOutBufferSize;
506         }
507     }
508 
509     videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
510     videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
511 
512     mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
513     mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
514 
515     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
516     if (mCudaDecoder != nullptr) {
517         NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
518         mCudaDecoder = nullptr;
519     }
520     {
521         size_t free, total;
522         cuMemGetInfo(&free, &total);
523         H264_DPRINT("free memory %g M, total %g M", free / 1048576.0,
524                     total / 1048576.0);
525     }
526     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
527     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
528     NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
529     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
530     H264_DPRINT("successfully called. decoder %p", mCudaDecoder);
531     return nDecodeSurface;
532 }
533 
HandlePictureDecode(CUVIDPICPARAMS * pPicParams)534 int MediaH264DecoderCuvid::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
535     NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
536     H264_DPRINT("successfully called.");
537     return 1;
538 }
539 
540 extern "C" {
541 
542 #define MEDIA_H264_COPY_Y_TEXTURE 1
543 #define MEDIA_H264_COPY_UV_TEXTURE 2
544 
545 struct h264_cuvid_copy_context {
546     CUdeviceptr src_frame;
547     unsigned int src_pitch;
548 
549     // this usually >= dest_height due to padding, e.g.
550     // src_surface_height: 1088, dest_height: 1080
551     // so, when copying UV data, the src has to start at
552     // offset = src_pitch * src_surface_height
553     unsigned int src_surface_height;
554 
555     unsigned int dest_width;
556     unsigned int dest_height;
557 };
558 
cuda_copy_decoded_frame(void * privData,int mode,uint32_t dest_texture_handle)559 void cuda_copy_decoded_frame(void* privData,
560                              int mode,
561                              uint32_t dest_texture_handle) {
562     h264_cuvid_copy_context* copy_context =
563             static_cast<h264_cuvid_copy_context*>(privData);
564 
565     const unsigned int GL_TEXTURE_2D = 0x0DE1;
566     const unsigned int cudaGraphicsMapFlagsNone = 0x0;
567     CUgraphicsResource CudaRes{0};
568     H264_DPRINT("cuda copy decoded frame testure %d", (int)dest_texture_handle);
569     NVDEC_API_CALL(cuGraphicsGLRegisterImage(&CudaRes, dest_texture_handle,
570                                              GL_TEXTURE_2D, 0x0));
571     CUarray texture_ptr;
572     NVDEC_API_CALL(cuGraphicsMapResources(1, &CudaRes, 0));
573     NVDEC_API_CALL(
574             cuGraphicsSubResourceGetMappedArray(&texture_ptr, CudaRes, 0, 0));
575     CUdeviceptr dpSrcFrame = copy_context->src_frame;
576     CUDA_MEMCPY2D m = {0};
577     m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
578     m.srcDevice = dpSrcFrame;
579     m.srcPitch = copy_context->src_pitch;
580     m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
581     m.dstArray = texture_ptr;
582     m.dstPitch = copy_context->dest_width * 1;
583     m.WidthInBytes = copy_context->dest_width * 1;
584     m.Height = copy_context->dest_height;
585     H264_DPRINT("dstPitch %d, WidthInBytes %d Height %d surface-height %d",
586                 (int)m.dstPitch, (int)m.WidthInBytes, (int)m.Height,
587                 (int)copy_context->src_surface_height);
588 
589     if (mode == MEDIA_H264_COPY_Y_TEXTURE) {  // copy Y data
590         NVDEC_API_CALL(cuMemcpy2D(&m));
591     } else if (mode == MEDIA_H264_COPY_UV_TEXTURE) {  // copy UV data
592         m.srcDevice =
593                 (CUdeviceptr)((uint8_t*)dpSrcFrame +
594                               m.srcPitch * copy_context->src_surface_height);
595         m.Height = m.Height / 2;
596         NVDEC_API_CALL(cuMemcpy2D(&m));
597     }
598     NVDEC_API_CALL(cuGraphicsUnmapResources(1, &CudaRes, 0));
599     NVDEC_API_CALL(cuGraphicsUnregisterResource(CudaRes));
600 }
601 
cuda_nv12_updater(void * privData,uint32_t type,uint32_t * textures,void * callerData)602 void cuda_nv12_updater(void* privData,
603                        uint32_t type,
604                        uint32_t* textures,
605                        void* callerData) {
606     constexpr uint32_t kFRAMEWORK_FORMAT_NV12 = 3;
607     if (type != kFRAMEWORK_FORMAT_NV12) {
608         return;
609     }
610     H264_DPRINT("copyiong Ytex %d", textures[0]);
611     H264_DPRINT("copyiong UVtex %d", textures[1]);
612     cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_Y_TEXTURE, textures[0]);
613     cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_UV_TEXTURE, textures[1]);
614 }
615 
616 }  // end extern C
617 
HandlePictureDisplay(CUVIDPARSERDISPINFO * pDispInfo)618 int MediaH264DecoderCuvid::HandlePictureDisplay(
619         CUVIDPARSERDISPINFO* pDispInfo) {
620     if (mIsLoadingFromSnapshot) {
621         return 1;
622     }
623 
624     CUVIDPROCPARAMS videoProcessingParameters = {};
625     videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
626     videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
627     videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
628     videoProcessingParameters.unpaired_field =
629             pDispInfo->repeat_first_field < 0;
630     videoProcessingParameters.output_stream = 0;
631     uint64_t myOutputPts = pDispInfo->timestamp;
632 
633     CUdeviceptr dpSrcFrame = 0;
634     unsigned int nSrcPitch = 0;
635     NVDEC_API_CALL(cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
636                                       &dpSrcFrame, &nSrcPitch,
637                                       &videoProcessingParameters));
638 
639     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
640     unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
641     std::vector<uint8_t> myFrame;
642     TextureFrame texFrame;
643     if (mUseGpuTexture) {
644             h264_cuvid_copy_context my_copy_context{
645                     .src_frame = dpSrcFrame,
646                     .src_pitch = nSrcPitch,
647                     .src_surface_height = mSurfaceHeight,
648                     .dest_width = mOutputWidth,
649                     .dest_height = mOutputHeight,
650             };
651             texFrame = mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
652             mRenderer.saveDecodedFrameToTexture(texFrame, &my_copy_context,
653                                                 (void*)cuda_nv12_updater);
654     } else {
655         myFrame.resize(newOutBufferSize);
656         uint8_t* pDecodedFrame = &(myFrame[0]);
657 
658         CUDA_MEMCPY2D m = {0};
659         m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
660         m.srcDevice = dpSrcFrame;
661         m.srcPitch = nSrcPitch;
662         m.dstMemoryType = CU_MEMORYTYPE_HOST;
663         m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
664         m.dstPitch = mOutputWidth * mBPP;
665         m.WidthInBytes = mOutputWidth * mBPP;
666         m.Height = mLumaHeight;
667         H264_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
668                     m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
669                     (int)m.Height);
670 
671         NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
672 
673         m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
674                                     m.srcPitch * mSurfaceHeight);
675         m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
676                                                 m.dstPitch * mLumaHeight);
677         m.Height = mChromaHeight;
678         NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
679     }
680 
681     NVDEC_API_CALL(cuStreamSynchronize(0));
682     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
683 
684     NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
685     if (!mIsLoadingFromSnapshot) {
686         std::lock_guard<std::mutex> g(mFrameLock);
687         mSavedFrames.push_back(myFrame);
688         mSavedTexFrames.push_back(texFrame);
689         mSavedPts.push_back(myOutputPts);
690         mSavedW.push_back(mOutputWidth);
691         mSavedH.push_back(mOutputHeight);
692     }
693     mImageReady = true;
694     H264_DPRINT("successfully called.");
695     return 1;
696 }
697 
oneShotDecode(std::vector<uint8_t> & data,uint64_t pts)698 void MediaH264DecoderCuvid::oneShotDecode(std::vector<uint8_t>& data,
699                                           uint64_t pts) {
700     H264_DPRINT("decoding pts %lld", (long long)pts);
701     decodeFrameInternal(nullptr, nullptr, data.data(), data.size(), pts);
702 }
703 
save(base::Stream * stream) const704 void MediaH264DecoderCuvid::save(base::Stream* stream) const {
705     stream->putBe32(mParser.version());
706     const int useGpuTexture = mUseGpuTexture ? 1 : 0;
707     stream->putBe32(useGpuTexture);
708 
709     stream->putBe32(mWidth);
710     stream->putBe32(mHeight);
711     stream->putBe32(mOutputWidth);
712     stream->putBe32(mOutputHeight);
713     stream->putBe32((int)mOutPixFmt);
714 
715     const int hasContext = mCudaContext == nullptr ? 0 : 1;
716     stream->putBe32(hasContext);
717 
718     mSnapshotState.savedFrames.clear();
719     mSnapshotState.savedDecodedFrame.data.clear();
720     for (size_t i = 0; i < mSavedFrames.size(); ++i) {
721         const std::vector<uint8_t>& myFrame = mSavedFrames.front();
722         int myOutputWidth = mSavedW.front();
723         int myOutputHeight = mSavedH.front();
724         int myOutputPts = mSavedPts.front();
725         mSnapshotState.saveDecodedFrame(
726                 myFrame, myOutputWidth, myOutputHeight,
727                 ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
728                              mColorSpace},
729                 myOutputPts);
730         mSavedFrames.pop_front();
731         mSavedTexFrames.pop_front();
732         mSavedW.pop_front();
733         mSavedH.pop_front();
734         mSavedPts.pop_front();
735     }
736     H264_DPRINT("saving packets now %d",
737                 (int)(mSnapshotState.savedPackets.size()));
738     mSnapshotState.save(stream);
739 }
740 
load(base::Stream * stream)741 bool MediaH264DecoderCuvid::load(base::Stream* stream) {
742     mIsLoadingFromSnapshot = true;
743     uint32_t version = stream->getBe32();
744     mParser = H264PingInfoParser{version};
745     const int useGpuTexture = stream->getBe32();
746     mUseGpuTexture = useGpuTexture ? true : false;
747 
748     mWidth = stream->getBe32();
749     mHeight = stream->getBe32();
750     mOutputWidth = stream->getBe32();
751     mOutputHeight = stream->getBe32();
752     mOutPixFmt = (PixelFormat)stream->getBe32();
753 
754     const int hasContext = stream->getBe32();
755     if (hasContext) {
756         initH264ContextInternal(mWidth, mHeight, mWidth, mHeight, mOutPixFmt);
757     }
758 
759     mSnapshotState.load(stream);
760 
761     H264_DPRINT("loaded packets %d, now restore decoder",
762                 (int)(mSnapshotState.savedPackets.size()));
763     if (hasContext && mSnapshotState.sps.size() > 0) {
764         oneShotDecode(mSnapshotState.sps, 0);
765         if (mSnapshotState.pps.size() > 0) {
766             oneShotDecode(mSnapshotState.pps, 0);
767             if (mSnapshotState.savedPackets.size() > 0) {
768                 for (int i = 0; i < mSnapshotState.savedPackets.size(); ++i) {
769                     PacketInfo& pkt = mSnapshotState.savedPackets[i];
770                     oneShotDecode(pkt.data, pkt.pts);
771                 }
772             }
773         }
774     }
775 
776     mImageReady = false;
777     for (size_t i = 0; i < mSnapshotState.savedFrames.size(); ++i) {
778         auto& frame = mSnapshotState.savedFrames[i];
779         mOutBufferSize = frame.data.size();
780         mOutputWidth = frame.width;
781         mOutputHeight = frame.height;
782         mColorPrimaries = frame.color.primaries;
783         mColorRange = frame.color.range;
784         mColorTransfer = frame.color.transfer;
785         mColorSpace = frame.color.space;
786         mOutputPts = frame.pts;
787         mSavedFrames.push_back(frame.data);
788         TextureFrame texFrame =
789                 mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
790         mSavedTexFrames.push_back(texFrame);
791         mSavedW.push_back(mOutputWidth);
792         mSavedH.push_back(mOutputHeight);
793         mSavedPts.push_back(mOutputPts);
794         mImageReady = true;
795     }
796     mIsLoadingFromSnapshot = false;
797     return true;
798 }
799 
800 bool MediaH264DecoderCuvid::s_isCudaInitialized = false;
801 // static
802 
803 }  // namespace emulation
804 }  // namespace android
805