1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "host-common/MediaH264DecoderCuvid.h"
16 #include "host-common/H264NaluParser.h"
17 #include "host-common/YuvConverter.h"
18 #include "android/main-emugl.h"
19 // MediaH264DecoderCuvid.h
20 #include <cstdint>
21 #include <string>
22 #include <vector>
23
24 #ifdef _WIN32
25 #define WIN32_LEAN_AND_MEAN 1
26 #include <windows.h>
27 #include <winioctl.h>
28 #endif
29
30 #include <stdio.h>
31 #include <string.h>
32
33 extern "C" {
34 #define INIT_CUDA_GL 1
35 #include "host-common/dynlink_cuda.h"
36 #include "host-common/dynlink_cudaGL.h"
37 #include "host-common/dynlink_nvcuvid.h"
38 }
39 #define MEDIA_H264_DEBUG 0
40
41 #if MEDIA_H264_DEBUG
42 #define H264_DPRINT(fmt, ...) \
43 fprintf(stderr, "h264-cuvid-dec: %s:%d " fmt "\n", __func__, __LINE__, \
44 ##__VA_ARGS__);
45 #else
46 #define H264_DPRINT(fmt, ...)
47 #endif
48
49 #define NVDEC_API_CALL(cuvidAPI) \
50 do { \
51 CUresult errorCode = cuvidAPI; \
52 if (errorCode != CUDA_SUCCESS) { \
53 H264_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
54 (int)errorCode); \
55 } \
56 } while (0)
57
58 namespace android {
59 namespace emulation {
60
61 using InitContextParam = H264PingInfoParser::InitContextParam;
62 using DecodeFrameParam = H264PingInfoParser::DecodeFrameParam;
63 using ResetParam = H264PingInfoParser::ResetParam;
64 using GetImageParam = H264PingInfoParser::GetImageParam;
65 using TextureFrame = MediaHostRenderer::TextureFrame;
66
MediaH264DecoderCuvid(uint64_t id,H264PingInfoParser parser)67 MediaH264DecoderCuvid::MediaH264DecoderCuvid(uint64_t id,
68 H264PingInfoParser parser)
69 : mId(id), mParser(parser) {
70 auto useGpuTextureEnv = android::base::System::getEnvironmentVariable(
71 "ANDROID_EMU_CODEC_USE_GPU_TEXTURE");
72 if (useGpuTextureEnv != "") {
73 if (mParser.version() == 200) {
74 if (emuglConfig_get_current_renderer() == SELECTED_RENDERER_HOST) {
75 mUseGpuTexture = true;
76 } else {
77 H264_DPRINT(
78 "cannot use gpu texture to save decoded frame in "
79 "non-host gpu mode");
80 if (emuglConfig_get_current_renderer() ==
81 SELECTED_RENDERER_SWIFTSHADER_INDIRECT) {
82 H264_DPRINT("your gpu mode is: swiftshader_indirect");
83 }
84 }
85 }
86 }
87 };
88
clone()89 MediaH264DecoderPlugin* MediaH264DecoderCuvid::clone() {
90 return new MediaH264DecoderCuvid(mId, mParser);
91 };
92
~MediaH264DecoderCuvid()93 MediaH264DecoderCuvid::~MediaH264DecoderCuvid() {
94 destroyH264Context();
95 }
96
reset(void * ptr)97 void MediaH264DecoderCuvid::reset(void* ptr) {
98 destroyH264Context();
99 ResetParam param{};
100 mParser.parseResetParams(ptr, param);
101 initH264ContextInternal(param.width, param.height, param.outputWidth,
102 param.outputHeight, param.outputPixelFormat);
103 }
104
initH264Context(void * ptr)105 void MediaH264DecoderCuvid::initH264Context(void* ptr) {
106 InitContextParam param{};
107 mParser.parseInitContextParams(ptr, param);
108 initH264ContextInternal(param.width, param.height, param.outputWidth,
109 param.outputHeight, param.outputPixelFormat);
110 }
111
initH264ContextInternal(unsigned int width,unsigned int height,unsigned int outWidth,unsigned int outHeight,PixelFormat outPixFmt)112 void MediaH264DecoderCuvid::initH264ContextInternal(unsigned int width,
113 unsigned int height,
114 unsigned int outWidth,
115 unsigned int outHeight,
116 PixelFormat outPixFmt) {
117 if (!initCudaDrivers()) {
118 H264_DPRINT("Failed to initH264Context because driver is not working");
119 return;
120 }
121
122 if (mCudaContext != nullptr) {
123 destroyH264Context();
124 }
125 H264_DPRINT("%s(w=%u h=%u out_w=%u out_h=%u pixfmt=%u)", __func__, width,
126 height, outWidth, outHeight, (uint8_t)outPixFmt);
127 mWidth = width;
128 mHeight = height;
129
130 mOutputWidth = outWidth;
131 mOutputHeight = outHeight;
132 mOutPixFmt = outPixFmt;
133 mOutBufferSize = outWidth * outHeight * 3 / 2;
134
135 // cudat stuff
136 const int gpuIndex = 0;
137 const int cudaFlags = 0;
138 CUdevice cudaDevice = 0;
139 CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
140 if (myres != CUDA_SUCCESS) {
141 H264_DPRINT("Failed to get cuda device, error code %d", (int)myres);
142 return;
143 }
144
145 char buf[1024];
146 myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
147 if (myres != CUDA_SUCCESS) {
148 H264_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
149 return;
150 }
151
152 H264_DPRINT("using gpu device %s", buf);
153
154 myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
155 if (myres != CUDA_SUCCESS) {
156 H264_DPRINT("Failed to create cuda context, error code %d", (int)myres);
157 }
158
159 NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
160
161 CUVIDPARSERPARAMS videoParserParameters = {};
162 videoParserParameters.CodecType = cudaVideoCodec_H264;
163 videoParserParameters.ulMaxNumDecodeSurfaces = 1;
164 videoParserParameters.ulMaxDisplayDelay = 1;
165 videoParserParameters.pUserData = this;
166 videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
167 videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
168 videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
169 NVDEC_API_CALL(
170 cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
171
172 H264_DPRINT("Successfully created cuda context %p", mCudaContext);
173 }
174
destroyH264Context()175 void MediaH264DecoderCuvid::destroyH264Context() {
176 H264_DPRINT("destroyH264Context calling");
177
178 for (auto texFrame : mSavedTexFrames) {
179 mRenderer.putTextureFrame(texFrame);
180 }
181 mRenderer.cleanUpTextures();
182 mSavedTexFrames.clear();
183 if (mCudaContext != nullptr) {
184 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
185 if (mCudaParser != nullptr) {
186 NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
187 mCudaParser = nullptr;
188 }
189
190 if (mCudaDecoder != nullptr) {
191 NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
192 mCudaDecoder = nullptr;
193 }
194 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
195 NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
196 }
197
198 if (mCudaContext != nullptr) {
199 CUresult myres = cuCtxDestroy(mCudaContext);
200 if (myres != CUDA_SUCCESS) {
201 H264_DPRINT("Failed to destroy cuda context; error code %d",
202 (int)myres);
203 }
204 mCudaContext = nullptr;
205 }
206 }
207
decodeFrame(void * ptr)208 void MediaH264DecoderCuvid::decodeFrame(void* ptr) {
209 DecodeFrameParam param{};
210 mParser.parseDecodeFrameParams(ptr, param);
211
212 const uint8_t* frame = param.pData;
213 size_t szBytes = param.size;
214 uint64_t inputPts = param.pts;
215
216 const bool enableSnapshot = true;
217 if (enableSnapshot) {
218 std::vector<uint8_t> v;
219 v.assign(frame, frame + szBytes);
220 bool hasSps = H264NaluParser::checkSpsFrame(frame, szBytes);
221 if (hasSps) {
222 mSnapshotState = SnapshotState{};
223 mSnapshotState.saveSps(v);
224 } else {
225 bool hasPps = H264NaluParser::checkPpsFrame(frame, szBytes);
226 if (hasPps) {
227 mSnapshotState.savePps(v);
228 mSnapshotState.savedPackets.clear();
229 mSnapshotState.savedDecodedFrame.data.clear();
230 } else {
231 bool isIFrame = H264NaluParser::checkIFrame(frame, szBytes);
232 if (isIFrame) {
233 mSnapshotState.savedPackets.clear();
234 }
235 mSnapshotState.savePacket(std::move(v), inputPts);
236 H264_DPRINT("saving packet; total is %d",
237 (int)(mSnapshotState.savedPackets.size()));
238 }
239 }
240 }
241
242 decodeFrameInternal(param.pConsumedBytes, param.pDecoderErrorCode, frame,
243 szBytes, inputPts);
244 }
245
decodeFrameInternal(uint64_t * pRetSzBytes,int32_t * pRetErr,const uint8_t * frame,size_t szBytes,uint64_t inputPts)246 void MediaH264DecoderCuvid::decodeFrameInternal(uint64_t* pRetSzBytes,
247 int32_t* pRetErr,
248 const uint8_t* frame,
249 size_t szBytes,
250 uint64_t inputPts) {
251 mIsInFlush = false;
252 H264_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
253 Err h264Err = Err::NoErr;
254
255 CUVIDSOURCEDATAPACKET packet = {0};
256 packet.payload = frame;
257 packet.payload_size = szBytes;
258 packet.flags = CUVID_PKT_TIMESTAMP;
259 packet.timestamp = inputPts;
260 if (!frame || szBytes == 0) {
261 packet.flags |= CUVID_PKT_ENDOFSTREAM;
262 }
263 NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
264 if (pRetSzBytes) {
265 *pRetSzBytes = szBytes;
266 }
267 if (pRetErr) {
268 *pRetErr = (int32_t)h264Err;
269 }
270 }
271
doFlush()272 void MediaH264DecoderCuvid::doFlush() {
273 if (!mIsInFlush) {
274 return;
275 }
276 H264_DPRINT("started flushing");
277 CUVIDSOURCEDATAPACKET packet = {0};
278 packet.payload = NULL;
279 packet.payload_size = 0;
280 packet.flags |= CUVID_PKT_ENDOFSTREAM;
281 NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
282 H264_DPRINT("done one flushing");
283 }
284
flush(void * ptr)285 void MediaH264DecoderCuvid::flush(void* ptr) {
286 mIsInFlush = true;
287 doFlush();
288 }
289
getImage(void * ptr)290 void MediaH264DecoderCuvid::getImage(void* ptr) {
291 H264_DPRINT("getImage %p", ptr);
292 GetImageParam param{};
293 mParser.parseGetImageParams(ptr, param);
294
295 int* retErr = param.pDecoderErrorCode;
296 uint32_t* retWidth = param.pRetWidth;
297 uint32_t* retHeight = param.pRetHeight;
298 uint64_t* retPts = param.pRetPts;
299 uint32_t* retColorPrimaries = param.pRetColorPrimaries;
300 uint32_t* retColorRange = param.pRetColorRange;
301 uint32_t* retColorTransfer = param.pRetColorTransfer;
302 uint32_t* retColorSpace = param.pRetColorSpace;
303
304 static int numbers = 0;
305 H264_DPRINT("calling getImage %d colorbuffer %d", numbers++,
306 (int)param.hostColorBufferId);
307 doFlush();
308 uint8_t* dst = param.pDecodedFrame;
309 int myOutputWidth = mOutputWidth;
310 int myOutputHeight = mOutputHeight;
311 std::vector<uint8_t> decodedFrame;
312 TextureFrame decodedTexFrame;
313 {
314 std::lock_guard<std::mutex> g(mFrameLock);
315 mImageReady = !mSavedFrames.empty();
316 if (!mImageReady) {
317 H264_DPRINT("%s: no new frame yet", __func__);
318 *retErr = static_cast<int>(Err::NoDecodedFrame);
319 return;
320 }
321
322 std::vector<uint8_t>& myFrame = mSavedFrames.front();
323 std::swap(decodedFrame, myFrame);
324 decodedTexFrame = mSavedTexFrames.front();
325 mOutputPts = mSavedPts.front();
326
327 myOutputWidth = mSavedW.front();
328 myOutputHeight = mSavedH.front();
329 *retWidth = myOutputWidth;
330 *retHeight = myOutputHeight;
331
332 mSavedFrames.pop_front();
333 mSavedTexFrames.pop_front();
334 mSavedPts.pop_front();
335 mSavedW.pop_front();
336 mSavedH.pop_front();
337 }
338
339 bool needToCopyToGuest = true;
340
341 if (mUseGpuTexture) {
342 needToCopyToGuest = false;
343 } else {
344 YuvConverter<uint8_t> convert8(myOutputWidth, myOutputHeight);
345 convert8.UVInterleavedToPlanar(decodedFrame.data());
346 }
347
348 if (mParser.version() == 200) {
349 if (param.hostColorBufferId >= 0) {
350 needToCopyToGuest = false;
351 if (mUseGpuTexture) {
352 mRenderer.renderToHostColorBufferWithTextures(
353 param.hostColorBufferId, myOutputWidth, myOutputHeight,
354 decodedTexFrame);
355 } else {
356 mRenderer.renderToHostColorBuffer(param.hostColorBufferId,
357 myOutputWidth, myOutputHeight,
358 decodedFrame.data());
359 }
360 } else {
361 if (mUseGpuTexture) {
362 // no colorbuffer to send the textures to, just recycle
363 // them back to Renderer
364 mRenderer.putTextureFrame(decodedTexFrame);
365 }
366 }
367 }
368
369 if (needToCopyToGuest) {
370 memcpy(dst, decodedFrame.data(),
371 myOutputHeight * myOutputWidth * 3 / 2);
372 }
373
374 mImageReady = false;
375 *retErr = myOutputHeight * myOutputWidth * 3 / 2;
376 *retPts = mOutputPts;
377 *retColorPrimaries = mColorPrimaries;
378 *retColorRange = mColorRange;
379 *retColorTransfer = mColorTransfer;
380 *retColorSpace = mColorSpace;
381 H264_DPRINT("Frame primary %d range %d transfer %d space %d",
382 (int)mColorPrimaries, (int)mColorRange, (int)mColorTransfer,
383 (int)mColorSpace);
384 H264_DPRINT("Copying completed pts %lld", (long long)mOutputPts);
385 }
386
initCudaDrivers()387 bool MediaH264DecoderCuvid::initCudaDrivers() {
388 if (s_isCudaInitialized) {
389 return true;
390 }
391 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
392 typedef HMODULE CUDADRIVER;
393 #else
394 typedef void* CUDADRIVER;
395 #endif
396 CUDADRIVER hHandleDriver = 0;
397 if (CUDA_SUCCESS != cuInit(0, __CUDA_API_VERSION, hHandleDriver)) {
398 fprintf(stderr,
399 "Failed to call cuInit, cannot use nvidia cuvid decoder for "
400 "h264 stream\n");
401 return false;
402 }
403 if (CUDA_SUCCESS != cuvidInit(0)) {
404 fprintf(stderr,
405 "Failed to call cuvidInit, cannot use nvidia cuvid decoder for "
406 "h264 stream\n");
407 return false;
408 }
409
410 int numGpuCards = 0;
411 CUresult myres = cuDeviceGetCount(&numGpuCards);
412 if (myres != CUDA_SUCCESS) {
413 H264_DPRINT(
414 "Failed to get number of GPU cards installed on host; error "
415 "code %d",
416 (int)myres);
417 return false;
418 }
419
420 if (numGpuCards <= 0) {
421 H264_DPRINT("There are no nvidia GPU cards on this host.");
422 return false;
423 }
424
425 // lukily, we get cuda initialized.
426 s_isCudaInitialized = true;
427
428 return true;
429 }
430
HandleVideoSequence(CUVIDEOFORMAT * pVideoFormat)431 int MediaH264DecoderCuvid::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
432 int nDecodeSurface = 8; // need 8 for 4K video
433
434 CUVIDDECODECAPS decodecaps;
435 memset(&decodecaps, 0, sizeof(decodecaps));
436
437 decodecaps.eCodecType = pVideoFormat->codec;
438 decodecaps.eChromaFormat = pVideoFormat->chroma_format;
439 decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
440
441 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
442 NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
443 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
444
445 if (!decodecaps.bIsSupported) {
446 H264_DPRINT("Codec not supported on this GPU.");
447 return nDecodeSurface;
448 }
449
450 if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
451 (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
452 H264_DPRINT("Resolution not supported on this GPU");
453 return nDecodeSurface;
454 }
455
456 if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
457 decodecaps.nMaxMBCount) {
458 H264_DPRINT("MBCount not supported on this GPU");
459 return nDecodeSurface;
460 }
461
462 mLumaWidth =
463 pVideoFormat->display_area.right - pVideoFormat->display_area.left;
464 mLumaHeight =
465 pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
466 mChromaHeight = mLumaHeight * 0.5; // NV12
467 mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
468
469 if (pVideoFormat->video_signal_description.video_full_range_flag)
470 mColorRange = 2;
471 else
472 mColorRange = 0;
473
474 mColorPrimaries = pVideoFormat->video_signal_description.color_primaries;
475 mColorTransfer =
476 pVideoFormat->video_signal_description.transfer_characteristics;
477 mColorSpace = pVideoFormat->video_signal_description.matrix_coefficients;
478
479 CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
480 videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
481 videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
482 videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
483 H264_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
484 videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
485 if (pVideoFormat->progressive_sequence)
486 videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
487 else
488 videoDecodeCreateInfo.DeinterlaceMode =
489 cudaVideoDeinterlaceMode_Adaptive;
490 videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
491 // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
492 // NVDEC hardware
493 videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
494 videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
495 videoDecodeCreateInfo.vidLock = mCtxLock;
496 videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
497 videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
498 if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
499 H264_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
500 mOutputWidth = mLumaWidth;
501 mOutputHeight = mLumaHeight;
502 H264_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
503 unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
504 if (mOutBufferSize < newOutBufferSize) {
505 mOutBufferSize = newOutBufferSize;
506 }
507 }
508
509 videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
510 videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
511
512 mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
513 mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
514
515 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
516 if (mCudaDecoder != nullptr) {
517 NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
518 mCudaDecoder = nullptr;
519 }
520 {
521 size_t free, total;
522 cuMemGetInfo(&free, &total);
523 H264_DPRINT("free memory %g M, total %g M", free / 1048576.0,
524 total / 1048576.0);
525 }
526 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
527 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
528 NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
529 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
530 H264_DPRINT("successfully called. decoder %p", mCudaDecoder);
531 return nDecodeSurface;
532 }
533
HandlePictureDecode(CUVIDPICPARAMS * pPicParams)534 int MediaH264DecoderCuvid::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
535 NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
536 H264_DPRINT("successfully called.");
537 return 1;
538 }
539
540 extern "C" {
541
542 #define MEDIA_H264_COPY_Y_TEXTURE 1
543 #define MEDIA_H264_COPY_UV_TEXTURE 2
544
545 struct h264_cuvid_copy_context {
546 CUdeviceptr src_frame;
547 unsigned int src_pitch;
548
549 // this usually >= dest_height due to padding, e.g.
550 // src_surface_height: 1088, dest_height: 1080
551 // so, when copying UV data, the src has to start at
552 // offset = src_pitch * src_surface_height
553 unsigned int src_surface_height;
554
555 unsigned int dest_width;
556 unsigned int dest_height;
557 };
558
cuda_copy_decoded_frame(void * privData,int mode,uint32_t dest_texture_handle)559 void cuda_copy_decoded_frame(void* privData,
560 int mode,
561 uint32_t dest_texture_handle) {
562 h264_cuvid_copy_context* copy_context =
563 static_cast<h264_cuvid_copy_context*>(privData);
564
565 const unsigned int GL_TEXTURE_2D = 0x0DE1;
566 const unsigned int cudaGraphicsMapFlagsNone = 0x0;
567 CUgraphicsResource CudaRes{0};
568 H264_DPRINT("cuda copy decoded frame testure %d", (int)dest_texture_handle);
569 NVDEC_API_CALL(cuGraphicsGLRegisterImage(&CudaRes, dest_texture_handle,
570 GL_TEXTURE_2D, 0x0));
571 CUarray texture_ptr;
572 NVDEC_API_CALL(cuGraphicsMapResources(1, &CudaRes, 0));
573 NVDEC_API_CALL(
574 cuGraphicsSubResourceGetMappedArray(&texture_ptr, CudaRes, 0, 0));
575 CUdeviceptr dpSrcFrame = copy_context->src_frame;
576 CUDA_MEMCPY2D m = {0};
577 m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
578 m.srcDevice = dpSrcFrame;
579 m.srcPitch = copy_context->src_pitch;
580 m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
581 m.dstArray = texture_ptr;
582 m.dstPitch = copy_context->dest_width * 1;
583 m.WidthInBytes = copy_context->dest_width * 1;
584 m.Height = copy_context->dest_height;
585 H264_DPRINT("dstPitch %d, WidthInBytes %d Height %d surface-height %d",
586 (int)m.dstPitch, (int)m.WidthInBytes, (int)m.Height,
587 (int)copy_context->src_surface_height);
588
589 if (mode == MEDIA_H264_COPY_Y_TEXTURE) { // copy Y data
590 NVDEC_API_CALL(cuMemcpy2D(&m));
591 } else if (mode == MEDIA_H264_COPY_UV_TEXTURE) { // copy UV data
592 m.srcDevice =
593 (CUdeviceptr)((uint8_t*)dpSrcFrame +
594 m.srcPitch * copy_context->src_surface_height);
595 m.Height = m.Height / 2;
596 NVDEC_API_CALL(cuMemcpy2D(&m));
597 }
598 NVDEC_API_CALL(cuGraphicsUnmapResources(1, &CudaRes, 0));
599 NVDEC_API_CALL(cuGraphicsUnregisterResource(CudaRes));
600 }
601
cuda_nv12_updater(void * privData,uint32_t type,uint32_t * textures,void * callerData)602 void cuda_nv12_updater(void* privData,
603 uint32_t type,
604 uint32_t* textures,
605 void* callerData) {
606 constexpr uint32_t kFRAMEWORK_FORMAT_NV12 = 3;
607 if (type != kFRAMEWORK_FORMAT_NV12) {
608 return;
609 }
610 H264_DPRINT("copyiong Ytex %d", textures[0]);
611 H264_DPRINT("copyiong UVtex %d", textures[1]);
612 cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_Y_TEXTURE, textures[0]);
613 cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_UV_TEXTURE, textures[1]);
614 }
615
616 } // end extern C
617
HandlePictureDisplay(CUVIDPARSERDISPINFO * pDispInfo)618 int MediaH264DecoderCuvid::HandlePictureDisplay(
619 CUVIDPARSERDISPINFO* pDispInfo) {
620 if (mIsLoadingFromSnapshot) {
621 return 1;
622 }
623
624 CUVIDPROCPARAMS videoProcessingParameters = {};
625 videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
626 videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
627 videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
628 videoProcessingParameters.unpaired_field =
629 pDispInfo->repeat_first_field < 0;
630 videoProcessingParameters.output_stream = 0;
631 uint64_t myOutputPts = pDispInfo->timestamp;
632
633 CUdeviceptr dpSrcFrame = 0;
634 unsigned int nSrcPitch = 0;
635 NVDEC_API_CALL(cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
636 &dpSrcFrame, &nSrcPitch,
637 &videoProcessingParameters));
638
639 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
640 unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
641 std::vector<uint8_t> myFrame;
642 TextureFrame texFrame;
643 if (mUseGpuTexture) {
644 h264_cuvid_copy_context my_copy_context{
645 .src_frame = dpSrcFrame,
646 .src_pitch = nSrcPitch,
647 .src_surface_height = mSurfaceHeight,
648 .dest_width = mOutputWidth,
649 .dest_height = mOutputHeight,
650 };
651 texFrame = mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
652 mRenderer.saveDecodedFrameToTexture(texFrame, &my_copy_context,
653 (void*)cuda_nv12_updater);
654 } else {
655 myFrame.resize(newOutBufferSize);
656 uint8_t* pDecodedFrame = &(myFrame[0]);
657
658 CUDA_MEMCPY2D m = {0};
659 m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
660 m.srcDevice = dpSrcFrame;
661 m.srcPitch = nSrcPitch;
662 m.dstMemoryType = CU_MEMORYTYPE_HOST;
663 m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
664 m.dstPitch = mOutputWidth * mBPP;
665 m.WidthInBytes = mOutputWidth * mBPP;
666 m.Height = mLumaHeight;
667 H264_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
668 m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
669 (int)m.Height);
670
671 NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
672
673 m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
674 m.srcPitch * mSurfaceHeight);
675 m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
676 m.dstPitch * mLumaHeight);
677 m.Height = mChromaHeight;
678 NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
679 }
680
681 NVDEC_API_CALL(cuStreamSynchronize(0));
682 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
683
684 NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
685 if (!mIsLoadingFromSnapshot) {
686 std::lock_guard<std::mutex> g(mFrameLock);
687 mSavedFrames.push_back(myFrame);
688 mSavedTexFrames.push_back(texFrame);
689 mSavedPts.push_back(myOutputPts);
690 mSavedW.push_back(mOutputWidth);
691 mSavedH.push_back(mOutputHeight);
692 }
693 mImageReady = true;
694 H264_DPRINT("successfully called.");
695 return 1;
696 }
697
oneShotDecode(std::vector<uint8_t> & data,uint64_t pts)698 void MediaH264DecoderCuvid::oneShotDecode(std::vector<uint8_t>& data,
699 uint64_t pts) {
700 H264_DPRINT("decoding pts %lld", (long long)pts);
701 decodeFrameInternal(nullptr, nullptr, data.data(), data.size(), pts);
702 }
703
save(base::Stream * stream) const704 void MediaH264DecoderCuvid::save(base::Stream* stream) const {
705 stream->putBe32(mParser.version());
706 const int useGpuTexture = mUseGpuTexture ? 1 : 0;
707 stream->putBe32(useGpuTexture);
708
709 stream->putBe32(mWidth);
710 stream->putBe32(mHeight);
711 stream->putBe32(mOutputWidth);
712 stream->putBe32(mOutputHeight);
713 stream->putBe32((int)mOutPixFmt);
714
715 const int hasContext = mCudaContext == nullptr ? 0 : 1;
716 stream->putBe32(hasContext);
717
718 mSnapshotState.savedFrames.clear();
719 mSnapshotState.savedDecodedFrame.data.clear();
720 for (size_t i = 0; i < mSavedFrames.size(); ++i) {
721 const std::vector<uint8_t>& myFrame = mSavedFrames.front();
722 int myOutputWidth = mSavedW.front();
723 int myOutputHeight = mSavedH.front();
724 int myOutputPts = mSavedPts.front();
725 mSnapshotState.saveDecodedFrame(
726 myFrame, myOutputWidth, myOutputHeight,
727 ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
728 mColorSpace},
729 myOutputPts);
730 mSavedFrames.pop_front();
731 mSavedTexFrames.pop_front();
732 mSavedW.pop_front();
733 mSavedH.pop_front();
734 mSavedPts.pop_front();
735 }
736 H264_DPRINT("saving packets now %d",
737 (int)(mSnapshotState.savedPackets.size()));
738 mSnapshotState.save(stream);
739 }
740
load(base::Stream * stream)741 bool MediaH264DecoderCuvid::load(base::Stream* stream) {
742 mIsLoadingFromSnapshot = true;
743 uint32_t version = stream->getBe32();
744 mParser = H264PingInfoParser{version};
745 const int useGpuTexture = stream->getBe32();
746 mUseGpuTexture = useGpuTexture ? true : false;
747
748 mWidth = stream->getBe32();
749 mHeight = stream->getBe32();
750 mOutputWidth = stream->getBe32();
751 mOutputHeight = stream->getBe32();
752 mOutPixFmt = (PixelFormat)stream->getBe32();
753
754 const int hasContext = stream->getBe32();
755 if (hasContext) {
756 initH264ContextInternal(mWidth, mHeight, mWidth, mHeight, mOutPixFmt);
757 }
758
759 mSnapshotState.load(stream);
760
761 H264_DPRINT("loaded packets %d, now restore decoder",
762 (int)(mSnapshotState.savedPackets.size()));
763 if (hasContext && mSnapshotState.sps.size() > 0) {
764 oneShotDecode(mSnapshotState.sps, 0);
765 if (mSnapshotState.pps.size() > 0) {
766 oneShotDecode(mSnapshotState.pps, 0);
767 if (mSnapshotState.savedPackets.size() > 0) {
768 for (int i = 0; i < mSnapshotState.savedPackets.size(); ++i) {
769 PacketInfo& pkt = mSnapshotState.savedPackets[i];
770 oneShotDecode(pkt.data, pkt.pts);
771 }
772 }
773 }
774 }
775
776 mImageReady = false;
777 for (size_t i = 0; i < mSnapshotState.savedFrames.size(); ++i) {
778 auto& frame = mSnapshotState.savedFrames[i];
779 mOutBufferSize = frame.data.size();
780 mOutputWidth = frame.width;
781 mOutputHeight = frame.height;
782 mColorPrimaries = frame.color.primaries;
783 mColorRange = frame.color.range;
784 mColorTransfer = frame.color.transfer;
785 mColorSpace = frame.color.space;
786 mOutputPts = frame.pts;
787 mSavedFrames.push_back(frame.data);
788 TextureFrame texFrame =
789 mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
790 mSavedTexFrames.push_back(texFrame);
791 mSavedW.push_back(mOutputWidth);
792 mSavedH.push_back(mOutputHeight);
793 mSavedPts.push_back(mOutputPts);
794 mImageReady = true;
795 }
796 mIsLoadingFromSnapshot = false;
797 return true;
798 }
799
800 bool MediaH264DecoderCuvid::s_isCudaInitialized = false;
801 // static
802
803 } // namespace emulation
804 } // namespace android
805