1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  *
10  */
11 
12 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h"
13 
14 #if defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
15 
16 #include <string>
17 #include <vector>
18 
19 #include "libyuv/convert_from.h"
20 #include "webrtc/base/checks.h"
21 #include "webrtc/base/logging.h"
22 #include "webrtc/base/scoped_ptr.h"
23 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"
24 
25 namespace internal {
26 
27 // Convenience function for creating a dictionary.
CreateCFDictionary(CFTypeRef * keys,CFTypeRef * values,size_t size)28 inline CFDictionaryRef CreateCFDictionary(CFTypeRef* keys,
29                                           CFTypeRef* values,
30                                           size_t size) {
31   return CFDictionaryCreate(kCFAllocatorDefault, keys, values, size,
32                             &kCFTypeDictionaryKeyCallBacks,
33                             &kCFTypeDictionaryValueCallBacks);
34 }
35 
36 // Copies characters from a CFStringRef into a std::string.
CFStringToString(const CFStringRef cf_string)37 std::string CFStringToString(const CFStringRef cf_string) {
38   RTC_DCHECK(cf_string);
39   std::string std_string;
40   // Get the size needed for UTF8 plus terminating character.
41   size_t buffer_size =
42       CFStringGetMaximumSizeForEncoding(CFStringGetLength(cf_string),
43                                         kCFStringEncodingUTF8) +
44       1;
45   rtc::scoped_ptr<char[]> buffer(new char[buffer_size]);
46   if (CFStringGetCString(cf_string, buffer.get(), buffer_size,
47                          kCFStringEncodingUTF8)) {
48     // Copy over the characters.
49     std_string.assign(buffer.get());
50   }
51   return std_string;
52 }
53 
54 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,int32_t value)55 void SetVTSessionProperty(VTSessionRef session,
56                           CFStringRef key,
57                           int32_t value) {
58   CFNumberRef cfNum =
59       CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value);
60   OSStatus status = VTSessionSetProperty(session, key, cfNum);
61   CFRelease(cfNum);
62   if (status != noErr) {
63     std::string key_string = CFStringToString(key);
64     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
65                   << " to " << value << ": " << status;
66   }
67 }
68 
69 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,bool value)70 void SetVTSessionProperty(VTSessionRef session, CFStringRef key, bool value) {
71   CFBooleanRef cf_bool = (value) ? kCFBooleanTrue : kCFBooleanFalse;
72   OSStatus status = VTSessionSetProperty(session, key, cf_bool);
73   if (status != noErr) {
74     std::string key_string = CFStringToString(key);
75     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
76                   << " to " << value << ": " << status;
77   }
78 }
79 
80 // Convenience function for setting a VT property.
SetVTSessionProperty(VTSessionRef session,CFStringRef key,CFStringRef value)81 void SetVTSessionProperty(VTSessionRef session,
82                           CFStringRef key,
83                           CFStringRef value) {
84   OSStatus status = VTSessionSetProperty(session, key, value);
85   if (status != noErr) {
86     std::string key_string = CFStringToString(key);
87     std::string val_string = CFStringToString(value);
88     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
89                   << " to " << val_string << ": " << status;
90   }
91 }
92 
93 // Struct that we pass to the encoder per frame to encode. We receive it again
94 // in the encoder callback.
95 struct FrameEncodeParams {
FrameEncodeParamsinternal::FrameEncodeParams96   FrameEncodeParams(webrtc::EncodedImageCallback* cb,
97                     const webrtc::CodecSpecificInfo* csi,
98                     int32_t w,
99                     int32_t h,
100                     int64_t rtms,
101                     uint32_t ts)
102       : callback(cb), width(w), height(h), render_time_ms(rtms), timestamp(ts) {
103     if (csi) {
104       codec_specific_info = *csi;
105     } else {
106       codec_specific_info.codecType = webrtc::kVideoCodecH264;
107     }
108   }
109   webrtc::EncodedImageCallback* callback;
110   webrtc::CodecSpecificInfo codec_specific_info;
111   int32_t width;
112   int32_t height;
113   int64_t render_time_ms;
114   uint32_t timestamp;
115 };
116 
117 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the
118 // encoder. This performs the copy and format conversion.
119 // TODO(tkchin): See if encoder will accept i420 frames and compare performance.
CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame & frame,CVPixelBufferRef pixel_buffer)120 bool CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame& frame,
121                                  CVPixelBufferRef pixel_buffer) {
122   RTC_DCHECK(pixel_buffer);
123   RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer) ==
124              kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
125   RTC_DCHECK(CVPixelBufferGetHeightOfPlane(pixel_buffer, 0) ==
126              static_cast<size_t>(frame.height()));
127   RTC_DCHECK(CVPixelBufferGetWidthOfPlane(pixel_buffer, 0) ==
128              static_cast<size_t>(frame.width()));
129 
130   CVReturn cvRet = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
131   if (cvRet != kCVReturnSuccess) {
132     LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
133     return false;
134   }
135   uint8_t* dst_y = reinterpret_cast<uint8_t*>(
136       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 0));
137   int dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 0);
138   uint8_t* dst_uv = reinterpret_cast<uint8_t*>(
139       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 1));
140   int dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 1);
141   // Convert I420 to NV12.
142   int ret = libyuv::I420ToNV12(
143       frame.buffer(webrtc::kYPlane), frame.stride(webrtc::kYPlane),
144       frame.buffer(webrtc::kUPlane), frame.stride(webrtc::kUPlane),
145       frame.buffer(webrtc::kVPlane), frame.stride(webrtc::kVPlane), dst_y,
146       dst_stride_y, dst_uv, dst_stride_uv, frame.width(), frame.height());
147   CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
148   if (ret) {
149     LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;
150     return false;
151   }
152   return true;
153 }
154 
155 // This is the callback function that VideoToolbox calls when encode is
156 // complete.
VTCompressionOutputCallback(void * encoder,void * params,OSStatus status,VTEncodeInfoFlags info_flags,CMSampleBufferRef sample_buffer)157 void VTCompressionOutputCallback(void* encoder,
158                                  void* params,
159                                  OSStatus status,
160                                  VTEncodeInfoFlags info_flags,
161                                  CMSampleBufferRef sample_buffer) {
162   rtc::scoped_ptr<FrameEncodeParams> encode_params(
163       reinterpret_cast<FrameEncodeParams*>(params));
164   if (status != noErr) {
165     LOG(LS_ERROR) << "H264 encoding failed.";
166     return;
167   }
168   if (info_flags & kVTEncodeInfo_FrameDropped) {
169     LOG(LS_INFO) << "H264 encode dropped frame.";
170   }
171 
172   bool is_keyframe = false;
173   CFArrayRef attachments =
174       CMSampleBufferGetSampleAttachmentsArray(sample_buffer, 0);
175   if (attachments != nullptr && CFArrayGetCount(attachments)) {
176     CFDictionaryRef attachment =
177         static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0));
178     is_keyframe =
179         !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync);
180   }
181 
182   // Convert the sample buffer into a buffer suitable for RTP packetization.
183   // TODO(tkchin): Allocate buffers through a pool.
184   rtc::scoped_ptr<rtc::Buffer> buffer(new rtc::Buffer());
185   rtc::scoped_ptr<webrtc::RTPFragmentationHeader> header;
186   if (!H264CMSampleBufferToAnnexBBuffer(sample_buffer, is_keyframe,
187                                         buffer.get(), header.accept())) {
188     return;
189   }
190   webrtc::EncodedImage frame(buffer->data(), buffer->size(), buffer->size());
191   frame._encodedWidth = encode_params->width;
192   frame._encodedHeight = encode_params->height;
193   frame._completeFrame = true;
194   frame._frameType =
195       is_keyframe ? webrtc::kVideoFrameKey : webrtc::kVideoFrameDelta;
196   frame.capture_time_ms_ = encode_params->render_time_ms;
197   frame._timeStamp = encode_params->timestamp;
198 
199   int result = encode_params->callback->Encoded(
200       frame, &(encode_params->codec_specific_info), header.get());
201   if (result != 0) {
202     LOG(LS_ERROR) << "Encoded callback failed: " << result;
203   }
204 }
205 
206 }  // namespace internal
207 
208 namespace webrtc {
209 
H264VideoToolboxEncoder()210 H264VideoToolboxEncoder::H264VideoToolboxEncoder()
211     : callback_(nullptr), compression_session_(nullptr) {}
212 
~H264VideoToolboxEncoder()213 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() {
214   DestroyCompressionSession();
215 }
216 
InitEncode(const VideoCodec * codec_settings,int number_of_cores,size_t max_payload_size)217 int H264VideoToolboxEncoder::InitEncode(const VideoCodec* codec_settings,
218                                         int number_of_cores,
219                                         size_t max_payload_size) {
220   RTC_DCHECK(codec_settings);
221   RTC_DCHECK_EQ(codec_settings->codecType, kVideoCodecH264);
222   // TODO(tkchin): We may need to enforce width/height dimension restrictions
223   // to match what the encoder supports.
224   width_ = codec_settings->width;
225   height_ = codec_settings->height;
226   // We can only set average bitrate on the HW encoder.
227   bitrate_ = codec_settings->startBitrate * 1000;
228 
229   // TODO(tkchin): Try setting payload size via
230   // kVTCompressionPropertyKey_MaxH264SliceBytes.
231 
232   return ResetCompressionSession();
233 }
234 
Encode(const VideoFrame & input_image,const CodecSpecificInfo * codec_specific_info,const std::vector<FrameType> * frame_types)235 int H264VideoToolboxEncoder::Encode(
236     const VideoFrame& input_image,
237     const CodecSpecificInfo* codec_specific_info,
238     const std::vector<FrameType>* frame_types) {
239   if (input_image.IsZeroSize()) {
240     // It's possible to get zero sizes as a signal to produce keyframes (this
241     // happens for internal sources). But this shouldn't happen in
242     // webrtcvideoengine2.
243     RTC_NOTREACHED();
244     return WEBRTC_VIDEO_CODEC_OK;
245   }
246   if (!callback_ || !compression_session_) {
247     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
248   }
249 
250   // Get a pixel buffer from the pool and copy frame data over.
251   CVPixelBufferPoolRef pixel_buffer_pool =
252       VTCompressionSessionGetPixelBufferPool(compression_session_);
253   CVPixelBufferRef pixel_buffer = nullptr;
254   CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,
255                                                     &pixel_buffer);
256   if (ret != kCVReturnSuccess) {
257     LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
258     // We probably want to drop frames here, since failure probably means
259     // that the pool is empty.
260     return WEBRTC_VIDEO_CODEC_ERROR;
261   }
262   RTC_DCHECK(pixel_buffer);
263   if (!internal::CopyVideoFrameToPixelBuffer(input_image, pixel_buffer)) {
264     LOG(LS_ERROR) << "Failed to copy frame data.";
265     CVBufferRelease(pixel_buffer);
266     return WEBRTC_VIDEO_CODEC_ERROR;
267   }
268 
269   // Check if we need a keyframe.
270   bool is_keyframe_required = false;
271   if (frame_types) {
272     for (auto frame_type : *frame_types) {
273       if (frame_type == kVideoFrameKey) {
274         is_keyframe_required = true;
275         break;
276       }
277     }
278   }
279 
280   CMTime presentation_time_stamp =
281       CMTimeMake(input_image.render_time_ms(), 1000);
282   CFDictionaryRef frame_properties = nullptr;
283   if (is_keyframe_required) {
284     CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
285     CFTypeRef values[] = {kCFBooleanTrue};
286     frame_properties = internal::CreateCFDictionary(keys, values, 1);
287   }
288   rtc::scoped_ptr<internal::FrameEncodeParams> encode_params;
289   encode_params.reset(new internal::FrameEncodeParams(
290       callback_, codec_specific_info, width_, height_,
291       input_image.render_time_ms(), input_image.timestamp()));
292   VTCompressionSessionEncodeFrame(
293       compression_session_, pixel_buffer, presentation_time_stamp,
294       kCMTimeInvalid, frame_properties, encode_params.release(), nullptr);
295   if (frame_properties) {
296     CFRelease(frame_properties);
297   }
298   if (pixel_buffer) {
299     CVBufferRelease(pixel_buffer);
300   }
301   return WEBRTC_VIDEO_CODEC_OK;
302 }
303 
RegisterEncodeCompleteCallback(EncodedImageCallback * callback)304 int H264VideoToolboxEncoder::RegisterEncodeCompleteCallback(
305     EncodedImageCallback* callback) {
306   callback_ = callback;
307   return WEBRTC_VIDEO_CODEC_OK;
308 }
309 
SetChannelParameters(uint32_t packet_loss,int64_t rtt)310 int H264VideoToolboxEncoder::SetChannelParameters(uint32_t packet_loss,
311                                                   int64_t rtt) {
312   // Encoder doesn't know anything about packet loss or rtt so just return.
313   return WEBRTC_VIDEO_CODEC_OK;
314 }
315 
SetRates(uint32_t new_bitrate_kbit,uint32_t frame_rate)316 int H264VideoToolboxEncoder::SetRates(uint32_t new_bitrate_kbit,
317                                       uint32_t frame_rate) {
318   bitrate_ = new_bitrate_kbit * 1000;
319   if (compression_session_) {
320     internal::SetVTSessionProperty(compression_session_,
321                                    kVTCompressionPropertyKey_AverageBitRate,
322                                    bitrate_);
323   }
324   return WEBRTC_VIDEO_CODEC_OK;
325 }
326 
Release()327 int H264VideoToolboxEncoder::Release() {
328   callback_ = nullptr;
329   // Need to reset to that the session is invalidated and won't use the
330   // callback anymore.
331   return ResetCompressionSession();
332 }
333 
ResetCompressionSession()334 int H264VideoToolboxEncoder::ResetCompressionSession() {
335   DestroyCompressionSession();
336 
337   // Set source image buffer attributes. These attributes will be present on
338   // buffers retrieved from the encoder's pixel buffer pool.
339   const size_t attributes_size = 3;
340   CFTypeRef keys[attributes_size] = {
341 #if defined(WEBRTC_IOS)
342     kCVPixelBufferOpenGLESCompatibilityKey,
343 #elif defined(WEBRTC_MAC)
344     kCVPixelBufferOpenGLCompatibilityKey,
345 #endif
346     kCVPixelBufferIOSurfacePropertiesKey,
347     kCVPixelBufferPixelFormatTypeKey
348   };
349   CFDictionaryRef io_surface_value =
350       internal::CreateCFDictionary(nullptr, nullptr, 0);
351   int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
352   CFNumberRef pixel_format =
353       CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);
354   CFTypeRef values[attributes_size] = {kCFBooleanTrue, io_surface_value,
355                                        pixel_format};
356   CFDictionaryRef source_attributes =
357       internal::CreateCFDictionary(keys, values, attributes_size);
358   if (io_surface_value) {
359     CFRelease(io_surface_value);
360     io_surface_value = nullptr;
361   }
362   if (pixel_format) {
363     CFRelease(pixel_format);
364     pixel_format = nullptr;
365   }
366   OSStatus status = VTCompressionSessionCreate(
367       nullptr,  // use default allocator
368       width_, height_, kCMVideoCodecType_H264,
369       nullptr,  // use default encoder
370       source_attributes,
371       nullptr,  // use default compressed data allocator
372       internal::VTCompressionOutputCallback, this, &compression_session_);
373   if (source_attributes) {
374     CFRelease(source_attributes);
375     source_attributes = nullptr;
376   }
377   if (status != noErr) {
378     LOG(LS_ERROR) << "Failed to create compression session: " << status;
379     return WEBRTC_VIDEO_CODEC_ERROR;
380   }
381   ConfigureCompressionSession();
382   return WEBRTC_VIDEO_CODEC_OK;
383 }
384 
ConfigureCompressionSession()385 void H264VideoToolboxEncoder::ConfigureCompressionSession() {
386   RTC_DCHECK(compression_session_);
387   internal::SetVTSessionProperty(compression_session_,
388                                  kVTCompressionPropertyKey_RealTime, true);
389   internal::SetVTSessionProperty(compression_session_,
390                                  kVTCompressionPropertyKey_ProfileLevel,
391                                  kVTProfileLevel_H264_Baseline_AutoLevel);
392   internal::SetVTSessionProperty(
393       compression_session_, kVTCompressionPropertyKey_AverageBitRate, bitrate_);
394   internal::SetVTSessionProperty(compression_session_,
395                                  kVTCompressionPropertyKey_AllowFrameReordering,
396                                  false);
397   // TODO(tkchin): Look at entropy mode and colorspace matrices.
398   // TODO(tkchin): Investigate to see if there's any way to make this work.
399   // May need it to interop with Android. Currently this call just fails.
400   // On inspecting encoder output on iOS8, this value is set to 6.
401   // internal::SetVTSessionProperty(compression_session_,
402   //     kVTCompressionPropertyKey_MaxFrameDelayCount,
403   //     1);
404   // TODO(tkchin): See if enforcing keyframe frequency is beneficial in any
405   // way.
406   // internal::SetVTSessionProperty(
407   //     compression_session_,
408   //     kVTCompressionPropertyKey_MaxKeyFrameInterval, 240);
409   // internal::SetVTSessionProperty(
410   //     compression_session_,
411   //     kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240);
412 }
413 
DestroyCompressionSession()414 void H264VideoToolboxEncoder::DestroyCompressionSession() {
415   if (compression_session_) {
416     VTCompressionSessionInvalidate(compression_session_);
417     CFRelease(compression_session_);
418     compression_session_ = nullptr;
419   }
420 }
421 
ImplementationName() const422 const char* H264VideoToolboxEncoder::ImplementationName() const {
423   return "VideoToolbox";
424 }
425 
426 }  // namespace webrtc
427 
428 #endif  // defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
429