1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/video_coding/qm_select.h"
12 
13 #include <math.h>
14 
15 #include "webrtc/modules/include/module_common_types.h"
16 #include "webrtc/modules/video_coding/include/video_coding_defines.h"
17 #include "webrtc/modules/video_coding/internal_defines.h"
18 #include "webrtc/modules/video_coding/qm_select_data.h"
19 #include "webrtc/system_wrappers/include/trace.h"
20 
21 namespace webrtc {
22 
23 // QM-METHOD class
24 
VCMQmMethod()25 VCMQmMethod::VCMQmMethod()
26     : content_metrics_(NULL),
27       width_(0),
28       height_(0),
29       user_frame_rate_(0.0f),
30       native_width_(0),
31       native_height_(0),
32       native_frame_rate_(0.0f),
33       image_type_(kVGA),
34       framerate_level_(kFrameRateHigh),
35       init_(false) {
36   ResetQM();
37 }
38 
~VCMQmMethod()39 VCMQmMethod::~VCMQmMethod() {}
40 
ResetQM()41 void VCMQmMethod::ResetQM() {
42   aspect_ratio_ = 1.0f;
43   motion_.Reset();
44   spatial_.Reset();
45   content_class_ = 0;
46 }
47 
ComputeContentClass()48 uint8_t VCMQmMethod::ComputeContentClass() {
49   ComputeMotionNFD();
50   ComputeSpatial();
51   return content_class_ = 3 * motion_.level + spatial_.level;
52 }
53 
UpdateContent(const VideoContentMetrics * contentMetrics)54 void VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) {
55   content_metrics_ = contentMetrics;
56 }
57 
ComputeMotionNFD()58 void VCMQmMethod::ComputeMotionNFD() {
59   if (content_metrics_) {
60     motion_.value = content_metrics_->motion_magnitude;
61   }
62   // Determine motion level.
63   if (motion_.value < kLowMotionNfd) {
64     motion_.level = kLow;
65   } else if (motion_.value > kHighMotionNfd) {
66     motion_.level = kHigh;
67   } else {
68     motion_.level = kDefault;
69   }
70 }
71 
ComputeSpatial()72 void VCMQmMethod::ComputeSpatial() {
73   float spatial_err = 0.0;
74   float spatial_err_h = 0.0;
75   float spatial_err_v = 0.0;
76   if (content_metrics_) {
77     spatial_err = content_metrics_->spatial_pred_err;
78     spatial_err_h = content_metrics_->spatial_pred_err_h;
79     spatial_err_v = content_metrics_->spatial_pred_err_v;
80   }
81   // Spatial measure: take average of 3 prediction errors.
82   spatial_.value = (spatial_err + spatial_err_h + spatial_err_v) / 3.0f;
83 
84   // Reduce thresholds for large scenes/higher pixel correlation.
85   float scale2 = image_type_ > kVGA ? kScaleTexture : 1.0;
86 
87   if (spatial_.value > scale2 * kHighTexture) {
88     spatial_.level = kHigh;
89   } else if (spatial_.value < scale2 * kLowTexture) {
90     spatial_.level = kLow;
91   } else {
92     spatial_.level = kDefault;
93   }
94 }
95 
GetImageType(uint16_t width,uint16_t height)96 ImageType VCMQmMethod::GetImageType(uint16_t width, uint16_t height) {
97   // Get the image type for the encoder frame size.
98   uint32_t image_size = width * height;
99   if (image_size == kSizeOfImageType[kQCIF]) {
100     return kQCIF;
101   } else if (image_size == kSizeOfImageType[kHCIF]) {
102     return kHCIF;
103   } else if (image_size == kSizeOfImageType[kQVGA]) {
104     return kQVGA;
105   } else if (image_size == kSizeOfImageType[kCIF]) {
106     return kCIF;
107   } else if (image_size == kSizeOfImageType[kHVGA]) {
108     return kHVGA;
109   } else if (image_size == kSizeOfImageType[kVGA]) {
110     return kVGA;
111   } else if (image_size == kSizeOfImageType[kQFULLHD]) {
112     return kQFULLHD;
113   } else if (image_size == kSizeOfImageType[kWHD]) {
114     return kWHD;
115   } else if (image_size == kSizeOfImageType[kFULLHD]) {
116     return kFULLHD;
117   } else {
118     // No exact match, find closet one.
119     return FindClosestImageType(width, height);
120   }
121 }
122 
FindClosestImageType(uint16_t width,uint16_t height)123 ImageType VCMQmMethod::FindClosestImageType(uint16_t width, uint16_t height) {
124   float size = static_cast<float>(width * height);
125   float min = size;
126   int isel = 0;
127   for (int i = 0; i < kNumImageTypes; ++i) {
128     float dist = fabs(size - kSizeOfImageType[i]);
129     if (dist < min) {
130       min = dist;
131       isel = i;
132     }
133   }
134   return static_cast<ImageType>(isel);
135 }
136 
FrameRateLevel(float avg_framerate)137 FrameRateLevelClass VCMQmMethod::FrameRateLevel(float avg_framerate) {
138   if (avg_framerate <= kLowFrameRate) {
139     return kFrameRateLow;
140   } else if (avg_framerate <= kMiddleFrameRate) {
141     return kFrameRateMiddle1;
142   } else if (avg_framerate <= kHighFrameRate) {
143     return kFrameRateMiddle2;
144   } else {
145     return kFrameRateHigh;
146   }
147 }
148 
149 // RESOLUTION CLASS
150 
VCMQmResolution()151 VCMQmResolution::VCMQmResolution() : qm_(new VCMResolutionScale()) {
152   Reset();
153 }
154 
~VCMQmResolution()155 VCMQmResolution::~VCMQmResolution() {
156   delete qm_;
157 }
158 
ResetRates()159 void VCMQmResolution::ResetRates() {
160   sum_target_rate_ = 0.0f;
161   sum_incoming_framerate_ = 0.0f;
162   sum_rate_MM_ = 0.0f;
163   sum_rate_MM_sgn_ = 0.0f;
164   sum_packet_loss_ = 0.0f;
165   buffer_level_ = kInitBufferLevel * target_bitrate_;
166   frame_cnt_ = 0;
167   frame_cnt_delta_ = 0;
168   low_buffer_cnt_ = 0;
169   update_rate_cnt_ = 0;
170 }
171 
ResetDownSamplingState()172 void VCMQmResolution::ResetDownSamplingState() {
173   state_dec_factor_spatial_ = 1.0;
174   state_dec_factor_temporal_ = 1.0;
175   for (int i = 0; i < kDownActionHistorySize; i++) {
176     down_action_history_[i].spatial = kNoChangeSpatial;
177     down_action_history_[i].temporal = kNoChangeTemporal;
178   }
179 }
180 
Reset()181 void VCMQmResolution::Reset() {
182   target_bitrate_ = 0.0f;
183   incoming_framerate_ = 0.0f;
184   buffer_level_ = 0.0f;
185   per_frame_bandwidth_ = 0.0f;
186   avg_target_rate_ = 0.0f;
187   avg_incoming_framerate_ = 0.0f;
188   avg_ratio_buffer_low_ = 0.0f;
189   avg_rate_mismatch_ = 0.0f;
190   avg_rate_mismatch_sgn_ = 0.0f;
191   avg_packet_loss_ = 0.0f;
192   encoder_state_ = kStableEncoding;
193   num_layers_ = 1;
194   ResetRates();
195   ResetDownSamplingState();
196   ResetQM();
197 }
198 
GetEncoderState()199 EncoderState VCMQmResolution::GetEncoderState() {
200   return encoder_state_;
201 }
202 
203 // Initialize state after re-initializing the encoder,
204 // i.e., after SetEncodingData() in mediaOpt.
Initialize(float bitrate,float user_framerate,uint16_t width,uint16_t height,int num_layers)205 int VCMQmResolution::Initialize(float bitrate,
206                                 float user_framerate,
207                                 uint16_t width,
208                                 uint16_t height,
209                                 int num_layers) {
210   if (user_framerate == 0.0f || width == 0 || height == 0) {
211     return VCM_PARAMETER_ERROR;
212   }
213   Reset();
214   target_bitrate_ = bitrate;
215   incoming_framerate_ = user_framerate;
216   UpdateCodecParameters(user_framerate, width, height);
217   native_width_ = width;
218   native_height_ = height;
219   native_frame_rate_ = user_framerate;
220   num_layers_ = num_layers;
221   // Initial buffer level.
222   buffer_level_ = kInitBufferLevel * target_bitrate_;
223   // Per-frame bandwidth.
224   per_frame_bandwidth_ = target_bitrate_ / user_framerate;
225   init_ = true;
226   return VCM_OK;
227 }
228 
UpdateCodecParameters(float frame_rate,uint16_t width,uint16_t height)229 void VCMQmResolution::UpdateCodecParameters(float frame_rate,
230                                             uint16_t width,
231                                             uint16_t height) {
232   width_ = width;
233   height_ = height;
234   // |user_frame_rate| is the target frame rate for VPM frame dropper.
235   user_frame_rate_ = frame_rate;
236   image_type_ = GetImageType(width, height);
237 }
238 
239 // Update rate data after every encoded frame.
UpdateEncodedSize(size_t encoded_size)240 void VCMQmResolution::UpdateEncodedSize(size_t encoded_size) {
241   frame_cnt_++;
242   // Convert to Kbps.
243   float encoded_size_kbits = 8.0f * static_cast<float>(encoded_size) / 1000.0f;
244 
245   // Update the buffer level:
246   // Note this is not the actual encoder buffer level.
247   // |buffer_level_| is reset to an initial value after SelectResolution is
248   // called, and does not account for frame dropping by encoder or VCM.
249   buffer_level_ += per_frame_bandwidth_ - encoded_size_kbits;
250 
251   // Counter for occurrences of low buffer level:
252   // low/negative values means encoder is likely dropping frames.
253   if (buffer_level_ <= kPercBufferThr * kInitBufferLevel * target_bitrate_) {
254     low_buffer_cnt_++;
255   }
256 }
257 
258 // Update various quantities after SetTargetRates in MediaOpt.
UpdateRates(float target_bitrate,float encoder_sent_rate,float incoming_framerate,uint8_t packet_loss)259 void VCMQmResolution::UpdateRates(float target_bitrate,
260                                   float encoder_sent_rate,
261                                   float incoming_framerate,
262                                   uint8_t packet_loss) {
263   // Sum the target bitrate: this is the encoder rate from previous update
264   // (~1sec), i.e, before the update for next ~1sec.
265   sum_target_rate_ += target_bitrate_;
266   update_rate_cnt_++;
267 
268   // Sum the received (from RTCP reports) packet loss rates.
269   sum_packet_loss_ += static_cast<float>(packet_loss / 255.0);
270 
271   // Sum the sequence rate mismatch:
272   // Mismatch here is based on the difference between the target rate
273   // used (in previous ~1sec) and the average actual encoding rate measured
274   // at previous ~1sec.
275   float diff = target_bitrate_ - encoder_sent_rate;
276   if (target_bitrate_ > 0.0)
277     sum_rate_MM_ += fabs(diff) / target_bitrate_;
278   int sgnDiff = diff > 0 ? 1 : (diff < 0 ? -1 : 0);
279   // To check for consistent under(+)/over_shooting(-) of target rate.
280   sum_rate_MM_sgn_ += sgnDiff;
281 
282   // Update with the current new target and frame rate:
283   // these values are ones the encoder will use for the current/next ~1sec.
284   target_bitrate_ = target_bitrate;
285   incoming_framerate_ = incoming_framerate;
286   sum_incoming_framerate_ += incoming_framerate_;
287   // Update the per_frame_bandwidth:
288   // this is the per_frame_bw for the current/next ~1sec.
289   per_frame_bandwidth_ = 0.0f;
290   if (incoming_framerate_ > 0.0f) {
291     per_frame_bandwidth_ = target_bitrate_ / incoming_framerate_;
292   }
293 }
294 
295 // Select the resolution factors: frame size and frame rate change (qm scales).
296 // Selection is for going down in resolution, or for going back up
297 // (if a previous down-sampling action was taken).
298 
299 // In the current version the following constraints are imposed:
300 // 1) We only allow for one action, either down or up, at a given time.
301 // 2) The possible down-sampling actions are: spatial by 1/2x1/2, 3/4x3/4;
302 //    temporal/frame rate reduction by 1/2 and 2/3.
303 // 3) The action for going back up is the reverse of last (spatial or temporal)
304 //    down-sampling action. The list of down-sampling actions from the
305 //    Initialize() state are kept in |down_action_history_|.
306 // 4) The total amount of down-sampling (spatial and/or temporal) from the
307 //    Initialize() state (native resolution) is limited by various factors.
SelectResolution(VCMResolutionScale ** qm)308 int VCMQmResolution::SelectResolution(VCMResolutionScale** qm) {
309   if (!init_) {
310     return VCM_UNINITIALIZED;
311   }
312   if (content_metrics_ == NULL) {
313     Reset();
314     *qm = qm_;
315     return VCM_OK;
316   }
317 
318   // Check conditions on down-sampling state.
319   assert(state_dec_factor_spatial_ >= 1.0f);
320   assert(state_dec_factor_temporal_ >= 1.0f);
321   assert(state_dec_factor_spatial_ <= kMaxSpatialDown);
322   assert(state_dec_factor_temporal_ <= kMaxTempDown);
323   assert(state_dec_factor_temporal_ * state_dec_factor_spatial_ <=
324          kMaxTotalDown);
325 
326   // Compute content class for selection.
327   content_class_ = ComputeContentClass();
328   // Compute various rate quantities for selection.
329   ComputeRatesForSelection();
330 
331   // Get the encoder state.
332   ComputeEncoderState();
333 
334   // Default settings: no action.
335   SetDefaultAction();
336   *qm = qm_;
337 
338   // Check for going back up in resolution, if we have had some down-sampling
339   // relative to native state in Initialize().
340   if (down_action_history_[0].spatial != kNoChangeSpatial ||
341       down_action_history_[0].temporal != kNoChangeTemporal) {
342     if (GoingUpResolution()) {
343       *qm = qm_;
344       return VCM_OK;
345     }
346   }
347 
348   // Check for going down in resolution.
349   if (GoingDownResolution()) {
350     *qm = qm_;
351     return VCM_OK;
352   }
353   return VCM_OK;
354 }
355 
SetDefaultAction()356 void VCMQmResolution::SetDefaultAction() {
357   qm_->codec_width = width_;
358   qm_->codec_height = height_;
359   qm_->frame_rate = user_frame_rate_;
360   qm_->change_resolution_spatial = false;
361   qm_->change_resolution_temporal = false;
362   qm_->spatial_width_fact = 1.0f;
363   qm_->spatial_height_fact = 1.0f;
364   qm_->temporal_fact = 1.0f;
365   action_.spatial = kNoChangeSpatial;
366   action_.temporal = kNoChangeTemporal;
367 }
368 
ComputeRatesForSelection()369 void VCMQmResolution::ComputeRatesForSelection() {
370   avg_target_rate_ = 0.0f;
371   avg_incoming_framerate_ = 0.0f;
372   avg_ratio_buffer_low_ = 0.0f;
373   avg_rate_mismatch_ = 0.0f;
374   avg_rate_mismatch_sgn_ = 0.0f;
375   avg_packet_loss_ = 0.0f;
376   if (frame_cnt_ > 0) {
377     avg_ratio_buffer_low_ =
378         static_cast<float>(low_buffer_cnt_) / static_cast<float>(frame_cnt_);
379   }
380   if (update_rate_cnt_ > 0) {
381     avg_rate_mismatch_ =
382         static_cast<float>(sum_rate_MM_) / static_cast<float>(update_rate_cnt_);
383     avg_rate_mismatch_sgn_ = static_cast<float>(sum_rate_MM_sgn_) /
384                              static_cast<float>(update_rate_cnt_);
385     avg_target_rate_ = static_cast<float>(sum_target_rate_) /
386                        static_cast<float>(update_rate_cnt_);
387     avg_incoming_framerate_ = static_cast<float>(sum_incoming_framerate_) /
388                               static_cast<float>(update_rate_cnt_);
389     avg_packet_loss_ = static_cast<float>(sum_packet_loss_) /
390                        static_cast<float>(update_rate_cnt_);
391   }
392   // For selection we may want to weight some quantities more heavily
393   // with the current (i.e., next ~1sec) rate values.
394   avg_target_rate_ =
395       kWeightRate * avg_target_rate_ + (1.0 - kWeightRate) * target_bitrate_;
396   avg_incoming_framerate_ = kWeightRate * avg_incoming_framerate_ +
397                             (1.0 - kWeightRate) * incoming_framerate_;
398   // Use base layer frame rate for temporal layers: this will favor spatial.
399   assert(num_layers_ > 0);
400   framerate_level_ = FrameRateLevel(avg_incoming_framerate_ /
401                                     static_cast<float>(1 << (num_layers_ - 1)));
402 }
403 
ComputeEncoderState()404 void VCMQmResolution::ComputeEncoderState() {
405   // Default.
406   encoder_state_ = kStableEncoding;
407 
408   // Assign stressed state if:
409   // 1) occurrences of low buffer levels is high, or
410   // 2) rate mis-match is high, and consistent over-shooting by encoder.
411   if ((avg_ratio_buffer_low_ > kMaxBufferLow) ||
412       ((avg_rate_mismatch_ > kMaxRateMisMatch) &&
413        (avg_rate_mismatch_sgn_ < -kRateOverShoot))) {
414     encoder_state_ = kStressedEncoding;
415   }
416   // Assign easy state if:
417   // 1) rate mis-match is high, and
418   // 2) consistent under-shooting by encoder.
419   if ((avg_rate_mismatch_ > kMaxRateMisMatch) &&
420       (avg_rate_mismatch_sgn_ > kRateUnderShoot)) {
421     encoder_state_ = kEasyEncoding;
422   }
423 }
424 
GoingUpResolution()425 bool VCMQmResolution::GoingUpResolution() {
426   // For going up, we check for undoing the previous down-sampling action.
427 
428   float fac_width = kFactorWidthSpatial[down_action_history_[0].spatial];
429   float fac_height = kFactorHeightSpatial[down_action_history_[0].spatial];
430   float fac_temp = kFactorTemporal[down_action_history_[0].temporal];
431   // For going up spatially, we allow for going up by 3/4x3/4 at each stage.
432   // So if the last spatial action was 1/2x1/2 it would be undone in 2 stages.
433   // Modify the fac_width/height for this case.
434   if (down_action_history_[0].spatial == kOneQuarterSpatialUniform) {
435     fac_width = kFactorWidthSpatial[kOneQuarterSpatialUniform] /
436                 kFactorWidthSpatial[kOneHalfSpatialUniform];
437     fac_height = kFactorHeightSpatial[kOneQuarterSpatialUniform] /
438                  kFactorHeightSpatial[kOneHalfSpatialUniform];
439   }
440 
441   // Check if we should go up both spatially and temporally.
442   if (down_action_history_[0].spatial != kNoChangeSpatial &&
443       down_action_history_[0].temporal != kNoChangeTemporal) {
444     if (ConditionForGoingUp(fac_width, fac_height, fac_temp,
445                             kTransRateScaleUpSpatialTemp)) {
446       action_.spatial = down_action_history_[0].spatial;
447       action_.temporal = down_action_history_[0].temporal;
448       UpdateDownsamplingState(kUpResolution);
449       return true;
450     }
451   }
452   // Check if we should go up either spatially or temporally.
453   bool selected_up_spatial = false;
454   bool selected_up_temporal = false;
455   if (down_action_history_[0].spatial != kNoChangeSpatial) {
456     selected_up_spatial = ConditionForGoingUp(fac_width, fac_height, 1.0f,
457                                               kTransRateScaleUpSpatial);
458   }
459   if (down_action_history_[0].temporal != kNoChangeTemporal) {
460     selected_up_temporal =
461         ConditionForGoingUp(1.0f, 1.0f, fac_temp, kTransRateScaleUpTemp);
462   }
463   if (selected_up_spatial && !selected_up_temporal) {
464     action_.spatial = down_action_history_[0].spatial;
465     action_.temporal = kNoChangeTemporal;
466     UpdateDownsamplingState(kUpResolution);
467     return true;
468   } else if (!selected_up_spatial && selected_up_temporal) {
469     action_.spatial = kNoChangeSpatial;
470     action_.temporal = down_action_history_[0].temporal;
471     UpdateDownsamplingState(kUpResolution);
472     return true;
473   } else if (selected_up_spatial && selected_up_temporal) {
474     PickSpatialOrTemporal();
475     UpdateDownsamplingState(kUpResolution);
476     return true;
477   }
478   return false;
479 }
480 
ConditionForGoingUp(float fac_width,float fac_height,float fac_temp,float scale_fac)481 bool VCMQmResolution::ConditionForGoingUp(float fac_width,
482                                           float fac_height,
483                                           float fac_temp,
484                                           float scale_fac) {
485   float estimated_transition_rate_up =
486       GetTransitionRate(fac_width, fac_height, fac_temp, scale_fac);
487   // Go back up if:
488   // 1) target rate is above threshold and current encoder state is stable, or
489   // 2) encoder state is easy (encoder is significantly under-shooting target).
490   if (((avg_target_rate_ > estimated_transition_rate_up) &&
491        (encoder_state_ == kStableEncoding)) ||
492       (encoder_state_ == kEasyEncoding)) {
493     return true;
494   } else {
495     return false;
496   }
497 }
498 
GoingDownResolution()499 bool VCMQmResolution::GoingDownResolution() {
500   float estimated_transition_rate_down =
501       GetTransitionRate(1.0f, 1.0f, 1.0f, 1.0f);
502   float max_rate = kFrameRateFac[framerate_level_] * kMaxRateQm[image_type_];
503   // Resolution reduction if:
504   // (1) target rate is below transition rate, or
505   // (2) encoder is in stressed state and target rate below a max threshold.
506   if ((avg_target_rate_ < estimated_transition_rate_down) ||
507       (encoder_state_ == kStressedEncoding && avg_target_rate_ < max_rate)) {
508     // Get the down-sampling action: based on content class, and how low
509     // average target rate is relative to transition rate.
510     uint8_t spatial_fact =
511         kSpatialAction[content_class_ +
512                        9 * RateClass(estimated_transition_rate_down)];
513     uint8_t temp_fact =
514         kTemporalAction[content_class_ +
515                         9 * RateClass(estimated_transition_rate_down)];
516 
517     switch (spatial_fact) {
518       case 4: {
519         action_.spatial = kOneQuarterSpatialUniform;
520         break;
521       }
522       case 2: {
523         action_.spatial = kOneHalfSpatialUniform;
524         break;
525       }
526       case 1: {
527         action_.spatial = kNoChangeSpatial;
528         break;
529       }
530       default: { assert(false); }
531     }
532     switch (temp_fact) {
533       case 3: {
534         action_.temporal = kTwoThirdsTemporal;
535         break;
536       }
537       case 2: {
538         action_.temporal = kOneHalfTemporal;
539         break;
540       }
541       case 1: {
542         action_.temporal = kNoChangeTemporal;
543         break;
544       }
545       default: { assert(false); }
546     }
547     // Only allow for one action (spatial or temporal) at a given time.
548     assert(action_.temporal == kNoChangeTemporal ||
549            action_.spatial == kNoChangeSpatial);
550 
551     // Adjust cases not captured in tables, mainly based on frame rate, and
552     // also check for odd frame sizes.
553     AdjustAction();
554 
555     // Update down-sampling state.
556     if (action_.spatial != kNoChangeSpatial ||
557         action_.temporal != kNoChangeTemporal) {
558       UpdateDownsamplingState(kDownResolution);
559       return true;
560     }
561   }
562   return false;
563 }
564 
GetTransitionRate(float fac_width,float fac_height,float fac_temp,float scale_fac)565 float VCMQmResolution::GetTransitionRate(float fac_width,
566                                          float fac_height,
567                                          float fac_temp,
568                                          float scale_fac) {
569   ImageType image_type =
570       GetImageType(static_cast<uint16_t>(fac_width * width_),
571                    static_cast<uint16_t>(fac_height * height_));
572 
573   FrameRateLevelClass framerate_level =
574       FrameRateLevel(fac_temp * avg_incoming_framerate_);
575   // If we are checking for going up temporally, and this is the last
576   // temporal action, then use native frame rate.
577   if (down_action_history_[1].temporal == kNoChangeTemporal &&
578       fac_temp > 1.0f) {
579     framerate_level = FrameRateLevel(native_frame_rate_);
580   }
581 
582   // The maximum allowed rate below which down-sampling is allowed:
583   // Nominal values based on image format (frame size and frame rate).
584   float max_rate = kFrameRateFac[framerate_level] * kMaxRateQm[image_type];
585 
586   uint8_t image_class = image_type > kVGA ? 1 : 0;
587   uint8_t table_index = image_class * 9 + content_class_;
588   // Scale factor for down-sampling transition threshold:
589   // factor based on the content class and the image size.
590   float scaleTransRate = kScaleTransRateQm[table_index];
591   // Threshold bitrate for resolution action.
592   return static_cast<float>(scale_fac * scaleTransRate * max_rate);
593 }
594 
UpdateDownsamplingState(UpDownAction up_down)595 void VCMQmResolution::UpdateDownsamplingState(UpDownAction up_down) {
596   if (up_down == kUpResolution) {
597     qm_->spatial_width_fact = 1.0f / kFactorWidthSpatial[action_.spatial];
598     qm_->spatial_height_fact = 1.0f / kFactorHeightSpatial[action_.spatial];
599     // If last spatial action was 1/2x1/2, we undo it in two steps, so the
600     // spatial scale factor in this first step is modified as (4.0/3.0 / 2.0).
601     if (action_.spatial == kOneQuarterSpatialUniform) {
602       qm_->spatial_width_fact = 1.0f *
603                                 kFactorWidthSpatial[kOneHalfSpatialUniform] /
604                                 kFactorWidthSpatial[kOneQuarterSpatialUniform];
605       qm_->spatial_height_fact =
606           1.0f * kFactorHeightSpatial[kOneHalfSpatialUniform] /
607           kFactorHeightSpatial[kOneQuarterSpatialUniform];
608     }
609     qm_->temporal_fact = 1.0f / kFactorTemporal[action_.temporal];
610     RemoveLastDownAction();
611   } else if (up_down == kDownResolution) {
612     ConstrainAmountOfDownSampling();
613     ConvertSpatialFractionalToWhole();
614     qm_->spatial_width_fact = kFactorWidthSpatial[action_.spatial];
615     qm_->spatial_height_fact = kFactorHeightSpatial[action_.spatial];
616     qm_->temporal_fact = kFactorTemporal[action_.temporal];
617     InsertLatestDownAction();
618   } else {
619     // This function should only be called if either the Up or Down action
620     // has been selected.
621     assert(false);
622   }
623   UpdateCodecResolution();
624   state_dec_factor_spatial_ = state_dec_factor_spatial_ *
625                               qm_->spatial_width_fact *
626                               qm_->spatial_height_fact;
627   state_dec_factor_temporal_ = state_dec_factor_temporal_ * qm_->temporal_fact;
628 }
629 
UpdateCodecResolution()630 void VCMQmResolution::UpdateCodecResolution() {
631   if (action_.spatial != kNoChangeSpatial) {
632     qm_->change_resolution_spatial = true;
633     qm_->codec_width =
634         static_cast<uint16_t>(width_ / qm_->spatial_width_fact + 0.5f);
635     qm_->codec_height =
636         static_cast<uint16_t>(height_ / qm_->spatial_height_fact + 0.5f);
637     // Size should not exceed native sizes.
638     assert(qm_->codec_width <= native_width_);
639     assert(qm_->codec_height <= native_height_);
640     // New sizes should be multiple of 2, otherwise spatial should not have
641     // been selected.
642     assert(qm_->codec_width % 2 == 0);
643     assert(qm_->codec_height % 2 == 0);
644   }
645   if (action_.temporal != kNoChangeTemporal) {
646     qm_->change_resolution_temporal = true;
647     // Update the frame rate based on the average incoming frame rate.
648     qm_->frame_rate = avg_incoming_framerate_ / qm_->temporal_fact + 0.5f;
649     if (down_action_history_[0].temporal == 0) {
650       // When we undo the last temporal-down action, make sure we go back up
651       // to the native frame rate. Since the incoming frame rate may
652       // fluctuate over time, |avg_incoming_framerate_| scaled back up may
653       // be smaller than |native_frame rate_|.
654       qm_->frame_rate = native_frame_rate_;
655     }
656   }
657 }
658 
RateClass(float transition_rate)659 uint8_t VCMQmResolution::RateClass(float transition_rate) {
660   return avg_target_rate_ < (kFacLowRate * transition_rate)
661              ? 0
662              : (avg_target_rate_ >= transition_rate ? 2 : 1);
663 }
664 
665 // TODO(marpan): Would be better to capture these frame rate adjustments by
666 // extending the table data (qm_select_data.h).
AdjustAction()667 void VCMQmResolution::AdjustAction() {
668   // If the spatial level is default state (neither low or high), motion level
669   // is not high, and spatial action was selected, switch to 2/3 frame rate
670   // reduction if the average incoming frame rate is high.
671   if (spatial_.level == kDefault && motion_.level != kHigh &&
672       action_.spatial != kNoChangeSpatial &&
673       framerate_level_ == kFrameRateHigh) {
674     action_.spatial = kNoChangeSpatial;
675     action_.temporal = kTwoThirdsTemporal;
676   }
677   // If both motion and spatial level are low, and temporal down action was
678   // selected, switch to spatial 3/4x3/4 if the frame rate is not above the
679   // lower middle level (|kFrameRateMiddle1|).
680   if (motion_.level == kLow && spatial_.level == kLow &&
681       framerate_level_ <= kFrameRateMiddle1 &&
682       action_.temporal != kNoChangeTemporal) {
683     action_.spatial = kOneHalfSpatialUniform;
684     action_.temporal = kNoChangeTemporal;
685   }
686   // If spatial action is selected, and there has been too much spatial
687   // reduction already (i.e., 1/4), then switch to temporal action if the
688   // average frame rate is not low.
689   if (action_.spatial != kNoChangeSpatial &&
690       down_action_history_[0].spatial == kOneQuarterSpatialUniform &&
691       framerate_level_ != kFrameRateLow) {
692     action_.spatial = kNoChangeSpatial;
693     action_.temporal = kTwoThirdsTemporal;
694   }
695   // Never use temporal action if number of temporal layers is above 2.
696   if (num_layers_ > 2) {
697     if (action_.temporal != kNoChangeTemporal) {
698       action_.spatial = kOneHalfSpatialUniform;
699     }
700     action_.temporal = kNoChangeTemporal;
701   }
702   // If spatial action was selected, we need to make sure the frame sizes
703   // are multiples of two. Otherwise switch to 2/3 temporal.
704   if (action_.spatial != kNoChangeSpatial && !EvenFrameSize()) {
705     action_.spatial = kNoChangeSpatial;
706     // Only one action (spatial or temporal) is allowed at a given time, so need
707     // to check whether temporal action is currently selected.
708     action_.temporal = kTwoThirdsTemporal;
709   }
710 }
711 
ConvertSpatialFractionalToWhole()712 void VCMQmResolution::ConvertSpatialFractionalToWhole() {
713   // If 3/4 spatial is selected, check if there has been another 3/4,
714   // and if so, combine them into 1/2. 1/2 scaling is more efficient than 9/16.
715   // Note we define 3/4x3/4 spatial as kOneHalfSpatialUniform.
716   if (action_.spatial == kOneHalfSpatialUniform) {
717     bool found = false;
718     int isel = kDownActionHistorySize;
719     for (int i = 0; i < kDownActionHistorySize; ++i) {
720       if (down_action_history_[i].spatial == kOneHalfSpatialUniform) {
721         isel = i;
722         found = true;
723         break;
724       }
725     }
726     if (found) {
727       action_.spatial = kOneQuarterSpatialUniform;
728       state_dec_factor_spatial_ =
729           state_dec_factor_spatial_ /
730           (kFactorWidthSpatial[kOneHalfSpatialUniform] *
731            kFactorHeightSpatial[kOneHalfSpatialUniform]);
732       // Check if switching to 1/2x1/2 (=1/4) spatial is allowed.
733       ConstrainAmountOfDownSampling();
734       if (action_.spatial == kNoChangeSpatial) {
735         // Not allowed. Go back to 3/4x3/4 spatial.
736         action_.spatial = kOneHalfSpatialUniform;
737         state_dec_factor_spatial_ =
738             state_dec_factor_spatial_ *
739             kFactorWidthSpatial[kOneHalfSpatialUniform] *
740             kFactorHeightSpatial[kOneHalfSpatialUniform];
741       } else {
742         // Switching is allowed. Remove 3/4x3/4 from the history, and update
743         // the frame size.
744         for (int i = isel; i < kDownActionHistorySize - 1; ++i) {
745           down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
746         }
747         width_ = width_ * kFactorWidthSpatial[kOneHalfSpatialUniform];
748         height_ = height_ * kFactorHeightSpatial[kOneHalfSpatialUniform];
749       }
750     }
751   }
752 }
753 
754 // Returns false if the new frame sizes, under the current spatial action,
755 // are not multiples of two.
EvenFrameSize()756 bool VCMQmResolution::EvenFrameSize() {
757   if (action_.spatial == kOneHalfSpatialUniform) {
758     if ((width_ * 3 / 4) % 2 != 0 || (height_ * 3 / 4) % 2 != 0) {
759       return false;
760     }
761   } else if (action_.spatial == kOneQuarterSpatialUniform) {
762     if ((width_ * 1 / 2) % 2 != 0 || (height_ * 1 / 2) % 2 != 0) {
763       return false;
764     }
765   }
766   return true;
767 }
768 
InsertLatestDownAction()769 void VCMQmResolution::InsertLatestDownAction() {
770   if (action_.spatial != kNoChangeSpatial) {
771     for (int i = kDownActionHistorySize - 1; i > 0; --i) {
772       down_action_history_[i].spatial = down_action_history_[i - 1].spatial;
773     }
774     down_action_history_[0].spatial = action_.spatial;
775   }
776   if (action_.temporal != kNoChangeTemporal) {
777     for (int i = kDownActionHistorySize - 1; i > 0; --i) {
778       down_action_history_[i].temporal = down_action_history_[i - 1].temporal;
779     }
780     down_action_history_[0].temporal = action_.temporal;
781   }
782 }
783 
RemoveLastDownAction()784 void VCMQmResolution::RemoveLastDownAction() {
785   if (action_.spatial != kNoChangeSpatial) {
786     // If the last spatial action was 1/2x1/2 we replace it with 3/4x3/4.
787     if (action_.spatial == kOneQuarterSpatialUniform) {
788       down_action_history_[0].spatial = kOneHalfSpatialUniform;
789     } else {
790       for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
791         down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
792       }
793       down_action_history_[kDownActionHistorySize - 1].spatial =
794           kNoChangeSpatial;
795     }
796   }
797   if (action_.temporal != kNoChangeTemporal) {
798     for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
799       down_action_history_[i].temporal = down_action_history_[i + 1].temporal;
800     }
801     down_action_history_[kDownActionHistorySize - 1].temporal =
802         kNoChangeTemporal;
803   }
804 }
805 
ConstrainAmountOfDownSampling()806 void VCMQmResolution::ConstrainAmountOfDownSampling() {
807   // Sanity checks on down-sampling selection:
808   // override the settings for too small image size and/or frame rate.
809   // Also check the limit on current down-sampling states.
810 
811   float spatial_width_fact = kFactorWidthSpatial[action_.spatial];
812   float spatial_height_fact = kFactorHeightSpatial[action_.spatial];
813   float temporal_fact = kFactorTemporal[action_.temporal];
814   float new_dec_factor_spatial =
815       state_dec_factor_spatial_ * spatial_width_fact * spatial_height_fact;
816   float new_dec_factor_temp = state_dec_factor_temporal_ * temporal_fact;
817 
818   // No spatial sampling if current frame size is too small, or if the
819   // amount of spatial down-sampling is above maximum spatial down-action.
820   if ((width_ * height_) <= kMinImageSize ||
821       new_dec_factor_spatial > kMaxSpatialDown) {
822     action_.spatial = kNoChangeSpatial;
823     new_dec_factor_spatial = state_dec_factor_spatial_;
824   }
825   // No frame rate reduction if average frame rate is below some point, or if
826   // the amount of temporal down-sampling is above maximum temporal down-action.
827   if (avg_incoming_framerate_ <= kMinFrameRate ||
828       new_dec_factor_temp > kMaxTempDown) {
829     action_.temporal = kNoChangeTemporal;
830     new_dec_factor_temp = state_dec_factor_temporal_;
831   }
832   // Check if the total (spatial-temporal) down-action is above maximum allowed,
833   // if so, disallow the current selected down-action.
834   if (new_dec_factor_spatial * new_dec_factor_temp > kMaxTotalDown) {
835     if (action_.spatial != kNoChangeSpatial) {
836       action_.spatial = kNoChangeSpatial;
837     } else if (action_.temporal != kNoChangeTemporal) {
838       action_.temporal = kNoChangeTemporal;
839     } else {
840       // We only allow for one action (spatial or temporal) at a given time, so
841       // either spatial or temporal action is selected when this function is
842       // called. If the selected action is disallowed from one of the above
843       // 2 prior conditions (on spatial & temporal max down-action), then this
844       // condition "total down-action > |kMaxTotalDown|" would not be entered.
845       assert(false);
846     }
847   }
848 }
849 
PickSpatialOrTemporal()850 void VCMQmResolution::PickSpatialOrTemporal() {
851   // Pick the one that has had the most down-sampling thus far.
852   if (state_dec_factor_spatial_ > state_dec_factor_temporal_) {
853     action_.spatial = down_action_history_[0].spatial;
854     action_.temporal = kNoChangeTemporal;
855   } else {
856     action_.spatial = kNoChangeSpatial;
857     action_.temporal = down_action_history_[0].temporal;
858   }
859 }
860 
861 // TODO(marpan): Update when we allow for directional spatial down-sampling.
SelectSpatialDirectionMode(float transition_rate)862 void VCMQmResolution::SelectSpatialDirectionMode(float transition_rate) {
863   // Default is 4/3x4/3
864   // For bit rates well below transitional rate, we select 2x2.
865   if (avg_target_rate_ < transition_rate * kRateRedSpatial2X2) {
866     qm_->spatial_width_fact = 2.0f;
867     qm_->spatial_height_fact = 2.0f;
868   }
869   // Otherwise check prediction errors and aspect ratio.
870   float spatial_err = 0.0f;
871   float spatial_err_h = 0.0f;
872   float spatial_err_v = 0.0f;
873   if (content_metrics_) {
874     spatial_err = content_metrics_->spatial_pred_err;
875     spatial_err_h = content_metrics_->spatial_pred_err_h;
876     spatial_err_v = content_metrics_->spatial_pred_err_v;
877   }
878 
879   // Favor 1x2 if aspect_ratio is 16:9.
880   if (aspect_ratio_ >= 16.0f / 9.0f) {
881     // Check if 1x2 has lowest prediction error.
882     if (spatial_err_h < spatial_err && spatial_err_h < spatial_err_v) {
883       qm_->spatial_width_fact = 2.0f;
884       qm_->spatial_height_fact = 1.0f;
885     }
886   }
887   // Check for 4/3x4/3 selection: favor 2x2 over 1x2 and 2x1.
888   if (spatial_err < spatial_err_h * (1.0f + kSpatialErr2x2VsHoriz) &&
889       spatial_err < spatial_err_v * (1.0f + kSpatialErr2X2VsVert)) {
890     qm_->spatial_width_fact = 4.0f / 3.0f;
891     qm_->spatial_height_fact = 4.0f / 3.0f;
892   }
893   // Check for 2x1 selection.
894   if (spatial_err_v < spatial_err_h * (1.0f - kSpatialErrVertVsHoriz) &&
895       spatial_err_v < spatial_err * (1.0f - kSpatialErr2X2VsVert)) {
896     qm_->spatial_width_fact = 1.0f;
897     qm_->spatial_height_fact = 2.0f;
898   }
899 }
900 
901 // ROBUSTNESS CLASS
902 
VCMQmRobustness()903 VCMQmRobustness::VCMQmRobustness() {
904   Reset();
905 }
906 
~VCMQmRobustness()907 VCMQmRobustness::~VCMQmRobustness() {}
908 
Reset()909 void VCMQmRobustness::Reset() {
910   prev_total_rate_ = 0.0f;
911   prev_rtt_time_ = 0;
912   prev_packet_loss_ = 0;
913   prev_code_rate_delta_ = 0;
914   ResetQM();
915 }
916 
917 // Adjust the FEC rate based on the content and the network state
918 // (packet loss rate, total rate/bandwidth, round trip time).
919 // Note that packetLoss here is the filtered loss value.
AdjustFecFactor(uint8_t code_rate_delta,float total_rate,float framerate,int64_t rtt_time,uint8_t packet_loss)920 float VCMQmRobustness::AdjustFecFactor(uint8_t code_rate_delta,
921                                        float total_rate,
922                                        float framerate,
923                                        int64_t rtt_time,
924                                        uint8_t packet_loss) {
925   // Default: no adjustment
926   float adjust_fec = 1.0f;
927   if (content_metrics_ == NULL) {
928     return adjust_fec;
929   }
930   // Compute class state of the content.
931   ComputeMotionNFD();
932   ComputeSpatial();
933 
934   // TODO(marpan): Set FEC adjustment factor.
935 
936   // Keep track of previous values of network state:
937   // adjustment may be also based on pattern of changes in network state.
938   prev_total_rate_ = total_rate;
939   prev_rtt_time_ = rtt_time;
940   prev_packet_loss_ = packet_loss;
941   prev_code_rate_delta_ = code_rate_delta;
942   return adjust_fec;
943 }
944 
945 // Set the UEP (unequal-protection across packets) on/off for the FEC.
SetUepProtection(uint8_t code_rate_delta,float total_rate,uint8_t packet_loss,bool frame_type)946 bool VCMQmRobustness::SetUepProtection(uint8_t code_rate_delta,
947                                        float total_rate,
948                                        uint8_t packet_loss,
949                                        bool frame_type) {
950   // Default.
951   return false;
952 }
953 }  // namespace webrtc
954