1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 *
10 */
11
12 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h"
13
14 #if defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
15
16 #include <string>
17 #include <vector>
18
19 #include "libyuv/convert_from.h"
20 #include "webrtc/base/checks.h"
21 #include "webrtc/base/logging.h"
22 #include "webrtc/base/scoped_ptr.h"
23 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"
24
25 namespace internal {
26
27 // Convenience function for creating a dictionary.
CreateCFDictionary(CFTypeRef * keys,CFTypeRef * values,size_t size)28 inline CFDictionaryRef CreateCFDictionary(CFTypeRef* keys,
29 CFTypeRef* values,
30 size_t size) {
31 return CFDictionaryCreate(kCFAllocatorDefault, keys, values, size,
32 &kCFTypeDictionaryKeyCallBacks,
33 &kCFTypeDictionaryValueCallBacks);
34 }
35
36 // Copies characters from a CFStringRef into a std::string.
CFStringToString(const CFStringRef cf_string)37 std::string CFStringToString(const CFStringRef cf_string) {
38 RTC_DCHECK(cf_string);
39 std::string std_string;
40 // Get the size needed for UTF8 plus terminating character.
41 size_t buffer_size =
42 CFStringGetMaximumSizeForEncoding(CFStringGetLength(cf_string),
43 kCFStringEncodingUTF8) +
44 1;
45 rtc::scoped_ptr<char[]> buffer(new char[buffer_size]);
46 if (CFStringGetCString(cf_string, buffer.get(), buffer_size,
47 kCFStringEncodingUTF8)) {
48 // Copy over the characters.
49 std_string.assign(buffer.get());
50 }
51 return std_string;
52 }
53
54 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,int32_t value)55 void SetVTSessionProperty(VTSessionRef session,
56 CFStringRef key,
57 int32_t value) {
58 CFNumberRef cfNum =
59 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value);
60 OSStatus status = VTSessionSetProperty(session, key, cfNum);
61 CFRelease(cfNum);
62 if (status != noErr) {
63 std::string key_string = CFStringToString(key);
64 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
65 << " to " << value << ": " << status;
66 }
67 }
68
69 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,bool value)70 void SetVTSessionProperty(VTSessionRef session, CFStringRef key, bool value) {
71 CFBooleanRef cf_bool = (value) ? kCFBooleanTrue : kCFBooleanFalse;
72 OSStatus status = VTSessionSetProperty(session, key, cf_bool);
73 if (status != noErr) {
74 std::string key_string = CFStringToString(key);
75 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
76 << " to " << value << ": " << status;
77 }
78 }
79
80 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,CFStringRef value)81 void SetVTSessionProperty(VTSessionRef session,
82 CFStringRef key,
83 CFStringRef value) {
84 OSStatus status = VTSessionSetProperty(session, key, value);
85 if (status != noErr) {
86 std::string key_string = CFStringToString(key);
87 std::string val_string = CFStringToString(value);
88 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
89 << " to " << val_string << ": " << status;
90 }
91 }
92
93 // Struct that we pass to the encoder per frame to encode. We receive it again
94 // in the encoder callback.
95 struct FrameEncodeParams {
FrameEncodeParamsinternal::FrameEncodeParams96 FrameEncodeParams(webrtc::EncodedImageCallback* cb,
97 const webrtc::CodecSpecificInfo* csi,
98 int32_t w,
99 int32_t h,
100 int64_t rtms,
101 uint32_t ts)
102 : callback(cb), width(w), height(h), render_time_ms(rtms), timestamp(ts) {
103 if (csi) {
104 codec_specific_info = *csi;
105 } else {
106 codec_specific_info.codecType = webrtc::kVideoCodecH264;
107 }
108 }
109 webrtc::EncodedImageCallback* callback;
110 webrtc::CodecSpecificInfo codec_specific_info;
111 int32_t width;
112 int32_t height;
113 int64_t render_time_ms;
114 uint32_t timestamp;
115 };
116
117 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the
118 // encoder. This performs the copy and format conversion.
119 // TODO(tkchin): See if encoder will accept i420 frames and compare performance.
CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame & frame,CVPixelBufferRef pixel_buffer)120 bool CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame& frame,
121 CVPixelBufferRef pixel_buffer) {
122 RTC_DCHECK(pixel_buffer);
123 RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer) ==
124 kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
125 RTC_DCHECK(CVPixelBufferGetHeightOfPlane(pixel_buffer, 0) ==
126 static_cast<size_t>(frame.height()));
127 RTC_DCHECK(CVPixelBufferGetWidthOfPlane(pixel_buffer, 0) ==
128 static_cast<size_t>(frame.width()));
129
130 CVReturn cvRet = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
131 if (cvRet != kCVReturnSuccess) {
132 LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
133 return false;
134 }
135 uint8_t* dst_y = reinterpret_cast<uint8_t*>(
136 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 0));
137 int dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 0);
138 uint8_t* dst_uv = reinterpret_cast<uint8_t*>(
139 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 1));
140 int dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 1);
141 // Convert I420 to NV12.
142 int ret = libyuv::I420ToNV12(
143 frame.buffer(webrtc::kYPlane), frame.stride(webrtc::kYPlane),
144 frame.buffer(webrtc::kUPlane), frame.stride(webrtc::kUPlane),
145 frame.buffer(webrtc::kVPlane), frame.stride(webrtc::kVPlane), dst_y,
146 dst_stride_y, dst_uv, dst_stride_uv, frame.width(), frame.height());
147 CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
148 if (ret) {
149 LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;
150 return false;
151 }
152 return true;
153 }
154
155 // This is the callback function that VideoToolbox calls when encode is
156 // complete.
VTCompressionOutputCallback(void * encoder,void * params,OSStatus status,VTEncodeInfoFlags info_flags,CMSampleBufferRef sample_buffer)157 void VTCompressionOutputCallback(void* encoder,
158 void* params,
159 OSStatus status,
160 VTEncodeInfoFlags info_flags,
161 CMSampleBufferRef sample_buffer) {
162 rtc::scoped_ptr<FrameEncodeParams> encode_params(
163 reinterpret_cast<FrameEncodeParams*>(params));
164 if (status != noErr) {
165 LOG(LS_ERROR) << "H264 encoding failed.";
166 return;
167 }
168 if (info_flags & kVTEncodeInfo_FrameDropped) {
169 LOG(LS_INFO) << "H264 encode dropped frame.";
170 }
171
172 bool is_keyframe = false;
173 CFArrayRef attachments =
174 CMSampleBufferGetSampleAttachmentsArray(sample_buffer, 0);
175 if (attachments != nullptr && CFArrayGetCount(attachments)) {
176 CFDictionaryRef attachment =
177 static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0));
178 is_keyframe =
179 !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync);
180 }
181
182 // Convert the sample buffer into a buffer suitable for RTP packetization.
183 // TODO(tkchin): Allocate buffers through a pool.
184 rtc::scoped_ptr<rtc::Buffer> buffer(new rtc::Buffer());
185 rtc::scoped_ptr<webrtc::RTPFragmentationHeader> header;
186 if (!H264CMSampleBufferToAnnexBBuffer(sample_buffer, is_keyframe,
187 buffer.get(), header.accept())) {
188 return;
189 }
190 webrtc::EncodedImage frame(buffer->data(), buffer->size(), buffer->size());
191 frame._encodedWidth = encode_params->width;
192 frame._encodedHeight = encode_params->height;
193 frame._completeFrame = true;
194 frame._frameType =
195 is_keyframe ? webrtc::kVideoFrameKey : webrtc::kVideoFrameDelta;
196 frame.capture_time_ms_ = encode_params->render_time_ms;
197 frame._timeStamp = encode_params->timestamp;
198
199 int result = encode_params->callback->Encoded(
200 frame, &(encode_params->codec_specific_info), header.get());
201 if (result != 0) {
202 LOG(LS_ERROR) << "Encoded callback failed: " << result;
203 }
204 }
205
206 } // namespace internal
207
208 namespace webrtc {
209
H264VideoToolboxEncoder()210 H264VideoToolboxEncoder::H264VideoToolboxEncoder()
211 : callback_(nullptr), compression_session_(nullptr) {}
212
~H264VideoToolboxEncoder()213 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() {
214 DestroyCompressionSession();
215 }
216
InitEncode(const VideoCodec * codec_settings,int number_of_cores,size_t max_payload_size)217 int H264VideoToolboxEncoder::InitEncode(const VideoCodec* codec_settings,
218 int number_of_cores,
219 size_t max_payload_size) {
220 RTC_DCHECK(codec_settings);
221 RTC_DCHECK_EQ(codec_settings->codecType, kVideoCodecH264);
222 // TODO(tkchin): We may need to enforce width/height dimension restrictions
223 // to match what the encoder supports.
224 width_ = codec_settings->width;
225 height_ = codec_settings->height;
226 // We can only set average bitrate on the HW encoder.
227 bitrate_ = codec_settings->startBitrate * 1000;
228
229 // TODO(tkchin): Try setting payload size via
230 // kVTCompressionPropertyKey_MaxH264SliceBytes.
231
232 return ResetCompressionSession();
233 }
234
Encode(const VideoFrame & input_image,const CodecSpecificInfo * codec_specific_info,const std::vector<FrameType> * frame_types)235 int H264VideoToolboxEncoder::Encode(
236 const VideoFrame& input_image,
237 const CodecSpecificInfo* codec_specific_info,
238 const std::vector<FrameType>* frame_types) {
239 if (input_image.IsZeroSize()) {
240 // It's possible to get zero sizes as a signal to produce keyframes (this
241 // happens for internal sources). But this shouldn't happen in
242 // webrtcvideoengine2.
243 RTC_NOTREACHED();
244 return WEBRTC_VIDEO_CODEC_OK;
245 }
246 if (!callback_ || !compression_session_) {
247 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
248 }
249
250 // Get a pixel buffer from the pool and copy frame data over.
251 CVPixelBufferPoolRef pixel_buffer_pool =
252 VTCompressionSessionGetPixelBufferPool(compression_session_);
253 CVPixelBufferRef pixel_buffer = nullptr;
254 CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,
255 &pixel_buffer);
256 if (ret != kCVReturnSuccess) {
257 LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
258 // We probably want to drop frames here, since failure probably means
259 // that the pool is empty.
260 return WEBRTC_VIDEO_CODEC_ERROR;
261 }
262 RTC_DCHECK(pixel_buffer);
263 if (!internal::CopyVideoFrameToPixelBuffer(input_image, pixel_buffer)) {
264 LOG(LS_ERROR) << "Failed to copy frame data.";
265 CVBufferRelease(pixel_buffer);
266 return WEBRTC_VIDEO_CODEC_ERROR;
267 }
268
269 // Check if we need a keyframe.
270 bool is_keyframe_required = false;
271 if (frame_types) {
272 for (auto frame_type : *frame_types) {
273 if (frame_type == kVideoFrameKey) {
274 is_keyframe_required = true;
275 break;
276 }
277 }
278 }
279
280 CMTime presentation_time_stamp =
281 CMTimeMake(input_image.render_time_ms(), 1000);
282 CFDictionaryRef frame_properties = nullptr;
283 if (is_keyframe_required) {
284 CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
285 CFTypeRef values[] = {kCFBooleanTrue};
286 frame_properties = internal::CreateCFDictionary(keys, values, 1);
287 }
288 rtc::scoped_ptr<internal::FrameEncodeParams> encode_params;
289 encode_params.reset(new internal::FrameEncodeParams(
290 callback_, codec_specific_info, width_, height_,
291 input_image.render_time_ms(), input_image.timestamp()));
292 VTCompressionSessionEncodeFrame(
293 compression_session_, pixel_buffer, presentation_time_stamp,
294 kCMTimeInvalid, frame_properties, encode_params.release(), nullptr);
295 if (frame_properties) {
296 CFRelease(frame_properties);
297 }
298 if (pixel_buffer) {
299 CVBufferRelease(pixel_buffer);
300 }
301 return WEBRTC_VIDEO_CODEC_OK;
302 }
303
RegisterEncodeCompleteCallback(EncodedImageCallback * callback)304 int H264VideoToolboxEncoder::RegisterEncodeCompleteCallback(
305 EncodedImageCallback* callback) {
306 callback_ = callback;
307 return WEBRTC_VIDEO_CODEC_OK;
308 }
309
SetChannelParameters(uint32_t packet_loss,int64_t rtt)310 int H264VideoToolboxEncoder::SetChannelParameters(uint32_t packet_loss,
311 int64_t rtt) {
312 // Encoder doesn't know anything about packet loss or rtt so just return.
313 return WEBRTC_VIDEO_CODEC_OK;
314 }
315
SetRates(uint32_t new_bitrate_kbit,uint32_t frame_rate)316 int H264VideoToolboxEncoder::SetRates(uint32_t new_bitrate_kbit,
317 uint32_t frame_rate) {
318 bitrate_ = new_bitrate_kbit * 1000;
319 if (compression_session_) {
320 internal::SetVTSessionProperty(compression_session_,
321 kVTCompressionPropertyKey_AverageBitRate,
322 bitrate_);
323 }
324 return WEBRTC_VIDEO_CODEC_OK;
325 }
326
Release()327 int H264VideoToolboxEncoder::Release() {
328 callback_ = nullptr;
329 // Need to reset to that the session is invalidated and won't use the
330 // callback anymore.
331 return ResetCompressionSession();
332 }
333
ResetCompressionSession()334 int H264VideoToolboxEncoder::ResetCompressionSession() {
335 DestroyCompressionSession();
336
337 // Set source image buffer attributes. These attributes will be present on
338 // buffers retrieved from the encoder's pixel buffer pool.
339 const size_t attributes_size = 3;
340 CFTypeRef keys[attributes_size] = {
341 #if defined(WEBRTC_IOS)
342 kCVPixelBufferOpenGLESCompatibilityKey,
343 #elif defined(WEBRTC_MAC)
344 kCVPixelBufferOpenGLCompatibilityKey,
345 #endif
346 kCVPixelBufferIOSurfacePropertiesKey,
347 kCVPixelBufferPixelFormatTypeKey
348 };
349 CFDictionaryRef io_surface_value =
350 internal::CreateCFDictionary(nullptr, nullptr, 0);
351 int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
352 CFNumberRef pixel_format =
353 CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);
354 CFTypeRef values[attributes_size] = {kCFBooleanTrue, io_surface_value,
355 pixel_format};
356 CFDictionaryRef source_attributes =
357 internal::CreateCFDictionary(keys, values, attributes_size);
358 if (io_surface_value) {
359 CFRelease(io_surface_value);
360 io_surface_value = nullptr;
361 }
362 if (pixel_format) {
363 CFRelease(pixel_format);
364 pixel_format = nullptr;
365 }
366 OSStatus status = VTCompressionSessionCreate(
367 nullptr, // use default allocator
368 width_, height_, kCMVideoCodecType_H264,
369 nullptr, // use default encoder
370 source_attributes,
371 nullptr, // use default compressed data allocator
372 internal::VTCompressionOutputCallback, this, &compression_session_);
373 if (source_attributes) {
374 CFRelease(source_attributes);
375 source_attributes = nullptr;
376 }
377 if (status != noErr) {
378 LOG(LS_ERROR) << "Failed to create compression session: " << status;
379 return WEBRTC_VIDEO_CODEC_ERROR;
380 }
381 ConfigureCompressionSession();
382 return WEBRTC_VIDEO_CODEC_OK;
383 }
384
ConfigureCompressionSession()385 void H264VideoToolboxEncoder::ConfigureCompressionSession() {
386 RTC_DCHECK(compression_session_);
387 internal::SetVTSessionProperty(compression_session_,
388 kVTCompressionPropertyKey_RealTime, true);
389 internal::SetVTSessionProperty(compression_session_,
390 kVTCompressionPropertyKey_ProfileLevel,
391 kVTProfileLevel_H264_Baseline_AutoLevel);
392 internal::SetVTSessionProperty(
393 compression_session_, kVTCompressionPropertyKey_AverageBitRate, bitrate_);
394 internal::SetVTSessionProperty(compression_session_,
395 kVTCompressionPropertyKey_AllowFrameReordering,
396 false);
397 // TODO(tkchin): Look at entropy mode and colorspace matrices.
398 // TODO(tkchin): Investigate to see if there's any way to make this work.
399 // May need it to interop with Android. Currently this call just fails.
400 // On inspecting encoder output on iOS8, this value is set to 6.
401 // internal::SetVTSessionProperty(compression_session_,
402 // kVTCompressionPropertyKey_MaxFrameDelayCount,
403 // 1);
404 // TODO(tkchin): See if enforcing keyframe frequency is beneficial in any
405 // way.
406 // internal::SetVTSessionProperty(
407 // compression_session_,
408 // kVTCompressionPropertyKey_MaxKeyFrameInterval, 240);
409 // internal::SetVTSessionProperty(
410 // compression_session_,
411 // kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240);
412 }
413
DestroyCompressionSession()414 void H264VideoToolboxEncoder::DestroyCompressionSession() {
415 if (compression_session_) {
416 VTCompressionSessionInvalidate(compression_session_);
417 CFRelease(compression_session_);
418 compression_session_ = nullptr;
419 }
420 }
421
ImplementationName() const422 const char* H264VideoToolboxEncoder::ImplementationName() const {
423 return "VideoToolbox";
424 }
425
426 } // namespace webrtc
427
428 #endif // defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
429