1 /*
2  *  Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "media/base/video_adapter.h"
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <cstdlib>
16 #include <limits>
17 #include <utility>
18 
19 #include "absl/types/optional.h"
20 #include "media/base/video_common.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/time_utils.h"
24 #include "system_wrappers/include/field_trial.h"
25 
26 namespace {
27 
28 struct Fraction {
29   int numerator;
30   int denominator;
31 
DivideByGcd__anon53d5046b0111::Fraction32   void DivideByGcd() {
33     int g = cricket::GreatestCommonDivisor(numerator, denominator);
34     numerator /= g;
35     denominator /= g;
36   }
37 
38   // Determines number of output pixels if both width and height of an input of
39   // |input_pixels| pixels is scaled with the fraction numerator / denominator.
scale_pixel_count__anon53d5046b0111::Fraction40   int scale_pixel_count(int input_pixels) {
41     return (numerator * numerator * input_pixels) / (denominator * denominator);
42   }
43 };
44 
45 // Round |value_to_round| to a multiple of |multiple|. Prefer rounding upwards,
46 // but never more than |max_value|.
roundUp(int value_to_round,int multiple,int max_value)47 int roundUp(int value_to_round, int multiple, int max_value) {
48   const int rounded_value =
49       (value_to_round + multiple - 1) / multiple * multiple;
50   return rounded_value <= max_value ? rounded_value
51                                     : (max_value / multiple * multiple);
52 }
53 
54 // Generates a scale factor that makes |input_pixels| close to |target_pixels|,
55 // but no higher than |max_pixels|.
FindScale(int input_width,int input_height,int target_pixels,int max_pixels,bool variable_start_scale_factor)56 Fraction FindScale(int input_width,
57                    int input_height,
58                    int target_pixels,
59                    int max_pixels,
60                    bool variable_start_scale_factor) {
61   // This function only makes sense for a positive target.
62   RTC_DCHECK_GT(target_pixels, 0);
63   RTC_DCHECK_GT(max_pixels, 0);
64   RTC_DCHECK_GE(max_pixels, target_pixels);
65 
66   const int input_pixels = input_width * input_height;
67 
68   // Don't scale up original.
69   if (target_pixels >= input_pixels)
70     return Fraction{1, 1};
71 
72   Fraction current_scale = Fraction{1, 1};
73   Fraction best_scale = Fraction{1, 1};
74 
75   if (variable_start_scale_factor) {
76     // Start scaling down by 2/3 depending on |input_width| and |input_height|.
77     if (input_width % 3 == 0 && input_height % 3 == 0) {
78       // 2/3 (then alternates 3/4, 2/3, 3/4,...).
79       current_scale = Fraction{6, 6};
80     }
81     if (input_width % 9 == 0 && input_height % 9 == 0) {
82       // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
83       current_scale = Fraction{36, 36};
84     }
85   }
86 
87   // The minimum (absolute) difference between the number of output pixels and
88   // the target pixel count.
89   int min_pixel_diff = std::numeric_limits<int>::max();
90   if (input_pixels <= max_pixels) {
91     // Start condition for 1/1 case, if it is less than max.
92     min_pixel_diff = std::abs(input_pixels - target_pixels);
93   }
94 
95   // Alternately scale down by 3/4 and 2/3. This results in fractions which are
96   // effectively scalable. For instance, starting at 1280x720 will result in
97   // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
98   // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
99   while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
100     if (current_scale.numerator % 3 == 0 &&
101         current_scale.denominator % 2 == 0) {
102       // Multiply by 2/3.
103       current_scale.numerator /= 3;
104       current_scale.denominator /= 2;
105     } else {
106       // Multiply by 3/4.
107       current_scale.numerator *= 3;
108       current_scale.denominator *= 4;
109     }
110 
111     int output_pixels = current_scale.scale_pixel_count(input_pixels);
112     if (output_pixels <= max_pixels) {
113       int diff = std::abs(target_pixels - output_pixels);
114       if (diff < min_pixel_diff) {
115         min_pixel_diff = diff;
116         best_scale = current_scale;
117       }
118     }
119   }
120   best_scale.DivideByGcd();
121 
122   return best_scale;
123 }
124 }  // namespace
125 
126 namespace cricket {
127 
VideoAdapter(int source_resolution_alignment)128 VideoAdapter::VideoAdapter(int source_resolution_alignment)
129     : frames_in_(0),
130       frames_out_(0),
131       frames_scaled_(0),
132       adaption_changes_(0),
133       previous_width_(0),
134       previous_height_(0),
135       variable_start_scale_factor_(webrtc::field_trial::IsEnabled(
136           "WebRTC-Video-VariableStartScaleFactor")),
137       source_resolution_alignment_(source_resolution_alignment),
138       resolution_alignment_(source_resolution_alignment),
139       resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
140       resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
141       max_framerate_request_(std::numeric_limits<int>::max()) {}
142 
VideoAdapter()143 VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
144 
~VideoAdapter()145 VideoAdapter::~VideoAdapter() {}
146 
KeepFrame(int64_t in_timestamp_ns)147 bool VideoAdapter::KeepFrame(int64_t in_timestamp_ns) {
148   int max_fps = max_framerate_request_;
149   if (max_fps_)
150     max_fps = std::min(max_fps, *max_fps_);
151 
152   if (max_fps <= 0)
153     return false;
154 
155   // If |max_framerate_request_| is not set, it will default to maxint, which
156   // will lead to a frame_interval_ns rounded to 0.
157   int64_t frame_interval_ns = rtc::kNumNanosecsPerSec / max_fps;
158   if (frame_interval_ns <= 0) {
159     // Frame rate throttling not enabled.
160     return true;
161   }
162 
163   if (next_frame_timestamp_ns_) {
164     // Time until next frame should be outputted.
165     const int64_t time_until_next_frame_ns =
166         (*next_frame_timestamp_ns_ - in_timestamp_ns);
167 
168     // Continue if timestamp is within expected range.
169     if (std::abs(time_until_next_frame_ns) < 2 * frame_interval_ns) {
170       // Drop if a frame shouldn't be outputted yet.
171       if (time_until_next_frame_ns > 0)
172         return false;
173       // Time to output new frame.
174       *next_frame_timestamp_ns_ += frame_interval_ns;
175       return true;
176     }
177   }
178 
179   // First timestamp received or timestamp is way outside expected range, so
180   // reset. Set first timestamp target to just half the interval to prefer
181   // keeping frames in case of jitter.
182   next_frame_timestamp_ns_ = in_timestamp_ns + frame_interval_ns / 2;
183   return true;
184 }
185 
AdaptFrameResolution(int in_width,int in_height,int64_t in_timestamp_ns,int * cropped_width,int * cropped_height,int * out_width,int * out_height)186 bool VideoAdapter::AdaptFrameResolution(int in_width,
187                                         int in_height,
188                                         int64_t in_timestamp_ns,
189                                         int* cropped_width,
190                                         int* cropped_height,
191                                         int* out_width,
192                                         int* out_height) {
193   webrtc::MutexLock lock(&mutex_);
194   ++frames_in_;
195 
196   // The max output pixel count is the minimum of the requests from
197   // OnOutputFormatRequest and OnResolutionFramerateRequest.
198   int max_pixel_count = resolution_request_max_pixel_count_;
199 
200   // Select target aspect ratio and max pixel count depending on input frame
201   // orientation.
202   absl::optional<std::pair<int, int>> target_aspect_ratio;
203   if (in_width > in_height) {
204     target_aspect_ratio = target_landscape_aspect_ratio_;
205     if (max_landscape_pixel_count_)
206       max_pixel_count = std::min(max_pixel_count, *max_landscape_pixel_count_);
207   } else {
208     target_aspect_ratio = target_portrait_aspect_ratio_;
209     if (max_portrait_pixel_count_)
210       max_pixel_count = std::min(max_pixel_count, *max_portrait_pixel_count_);
211   }
212 
213   int target_pixel_count =
214       std::min(resolution_request_target_pixel_count_, max_pixel_count);
215 
216   // Drop the input frame if necessary.
217   if (max_pixel_count <= 0 || !KeepFrame(in_timestamp_ns)) {
218     // Show VAdapt log every 90 frames dropped. (3 seconds)
219     if ((frames_in_ - frames_out_) % 90 == 0) {
220       // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
221       // in default calls.
222       RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
223                        << " / out " << frames_out_ << " / in " << frames_in_
224                        << " Changes: " << adaption_changes_
225                        << " Input: " << in_width << "x" << in_height
226                        << " timestamp: " << in_timestamp_ns
227                        << " Output fps: " << max_framerate_request_ << "/"
228                        << max_fps_.value_or(-1)
229                        << " alignment: " << resolution_alignment_;
230     }
231 
232     // Drop frame.
233     return false;
234   }
235 
236   // Calculate how the input should be cropped.
237   if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
238       target_aspect_ratio->second <= 0) {
239     *cropped_width = in_width;
240     *cropped_height = in_height;
241   } else {
242     const float requested_aspect =
243         target_aspect_ratio->first /
244         static_cast<float>(target_aspect_ratio->second);
245     *cropped_width =
246         std::min(in_width, static_cast<int>(in_height * requested_aspect));
247     *cropped_height =
248         std::min(in_height, static_cast<int>(in_width / requested_aspect));
249   }
250   const Fraction scale =
251       FindScale(*cropped_width, *cropped_height, target_pixel_count,
252                 max_pixel_count, variable_start_scale_factor_);
253   // Adjust cropping slightly to get correctly aligned output size and a perfect
254   // scale factor.
255   *cropped_width = roundUp(*cropped_width,
256                            scale.denominator * resolution_alignment_, in_width);
257   *cropped_height = roundUp(
258       *cropped_height, scale.denominator * resolution_alignment_, in_height);
259   RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
260   RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
261 
262   // Calculate final output size.
263   *out_width = *cropped_width / scale.denominator * scale.numerator;
264   *out_height = *cropped_height / scale.denominator * scale.numerator;
265   RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
266   RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
267 
268   ++frames_out_;
269   if (scale.numerator != scale.denominator)
270     ++frames_scaled_;
271 
272   if (previous_width_ &&
273       (previous_width_ != *out_width || previous_height_ != *out_height)) {
274     ++adaption_changes_;
275     RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
276                      << " / out " << frames_out_ << " / in " << frames_in_
277                      << " Changes: " << adaption_changes_
278                      << " Input: " << in_width << "x" << in_height
279                      << " Scale: " << scale.numerator << "/"
280                      << scale.denominator << " Output: " << *out_width << "x"
281                      << *out_height << " fps: " << max_framerate_request_ << "/"
282                      << max_fps_.value_or(-1)
283                      << " alignment: " << resolution_alignment_;
284   }
285 
286   previous_width_ = *out_width;
287   previous_height_ = *out_height;
288 
289   return true;
290 }
291 
OnOutputFormatRequest(const absl::optional<VideoFormat> & format)292 void VideoAdapter::OnOutputFormatRequest(
293     const absl::optional<VideoFormat>& format) {
294   absl::optional<std::pair<int, int>> target_aspect_ratio;
295   absl::optional<int> max_pixel_count;
296   absl::optional<int> max_fps;
297   if (format) {
298     target_aspect_ratio = std::make_pair(format->width, format->height);
299     max_pixel_count = format->width * format->height;
300     if (format->interval > 0)
301       max_fps = rtc::kNumNanosecsPerSec / format->interval;
302   }
303   OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
304 }
305 
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_aspect_ratio,const absl::optional<int> & max_pixel_count,const absl::optional<int> & max_fps)306 void VideoAdapter::OnOutputFormatRequest(
307     const absl::optional<std::pair<int, int>>& target_aspect_ratio,
308     const absl::optional<int>& max_pixel_count,
309     const absl::optional<int>& max_fps) {
310   absl::optional<std::pair<int, int>> target_landscape_aspect_ratio;
311   absl::optional<std::pair<int, int>> target_portrait_aspect_ratio;
312   if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
313       target_aspect_ratio->second > 0) {
314     // Maintain input orientation.
315     const int max_side =
316         std::max(target_aspect_ratio->first, target_aspect_ratio->second);
317     const int min_side =
318         std::min(target_aspect_ratio->first, target_aspect_ratio->second);
319     target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
320     target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
321   }
322   OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
323                         target_portrait_aspect_ratio, max_pixel_count, max_fps);
324 }
325 
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_landscape_aspect_ratio,const absl::optional<int> & max_landscape_pixel_count,const absl::optional<std::pair<int,int>> & target_portrait_aspect_ratio,const absl::optional<int> & max_portrait_pixel_count,const absl::optional<int> & max_fps)326 void VideoAdapter::OnOutputFormatRequest(
327     const absl::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
328     const absl::optional<int>& max_landscape_pixel_count,
329     const absl::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
330     const absl::optional<int>& max_portrait_pixel_count,
331     const absl::optional<int>& max_fps) {
332   webrtc::MutexLock lock(&mutex_);
333   target_landscape_aspect_ratio_ = target_landscape_aspect_ratio;
334   max_landscape_pixel_count_ = max_landscape_pixel_count;
335   target_portrait_aspect_ratio_ = target_portrait_aspect_ratio;
336   max_portrait_pixel_count_ = max_portrait_pixel_count;
337   max_fps_ = max_fps;
338   next_frame_timestamp_ns_ = absl::nullopt;
339 }
340 
OnSinkWants(const rtc::VideoSinkWants & sink_wants)341 void VideoAdapter::OnSinkWants(const rtc::VideoSinkWants& sink_wants) {
342   webrtc::MutexLock lock(&mutex_);
343   resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
344   resolution_request_target_pixel_count_ =
345       sink_wants.target_pixel_count.value_or(
346           resolution_request_max_pixel_count_);
347   max_framerate_request_ = sink_wants.max_framerate_fps;
348   resolution_alignment_ = cricket::LeastCommonMultiple(
349       source_resolution_alignment_, sink_wants.resolution_alignment);
350 }
351 
352 }  // namespace cricket
353