1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h"
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <iterator>
16 #include <limits>
17 #include <numeric>
18 
19 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/numerics/running_statistics.h"
22 #include "rtc_base/strings/string_builder.h"
23 
24 namespace webrtc {
25 namespace test {
26 
27 using FrameStatistics = VideoCodecTestStats::FrameStatistics;
28 using VideoStatistics = VideoCodecTestStats::VideoStatistics;
29 
30 namespace {
31 const int kMaxBitrateMismatchPercent = 20;
32 }
33 
34 VideoCodecTestStatsImpl::VideoCodecTestStatsImpl() = default;
35 VideoCodecTestStatsImpl::~VideoCodecTestStatsImpl() = default;
36 
AddFrame(const FrameStatistics & frame_stat)37 void VideoCodecTestStatsImpl::AddFrame(const FrameStatistics& frame_stat) {
38   const size_t timestamp = frame_stat.rtp_timestamp;
39   const size_t layer_idx = frame_stat.spatial_idx;
40   RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) ==
41              rtp_timestamp_to_frame_num_[layer_idx].end());
42   rtp_timestamp_to_frame_num_[layer_idx][timestamp] = frame_stat.frame_number;
43   layer_stats_[layer_idx].push_back(frame_stat);
44 }
45 
GetFrame(size_t frame_num,size_t layer_idx)46 FrameStatistics* VideoCodecTestStatsImpl::GetFrame(size_t frame_num,
47                                                    size_t layer_idx) {
48   RTC_CHECK_LT(frame_num, layer_stats_[layer_idx].size());
49   return &layer_stats_[layer_idx][frame_num];
50 }
51 
GetFrameWithTimestamp(size_t timestamp,size_t layer_idx)52 FrameStatistics* VideoCodecTestStatsImpl::GetFrameWithTimestamp(
53     size_t timestamp,
54     size_t layer_idx) {
55   RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) !=
56              rtp_timestamp_to_frame_num_[layer_idx].end());
57 
58   return GetFrame(rtp_timestamp_to_frame_num_[layer_idx][timestamp], layer_idx);
59 }
60 
GetFrameStatistics()61 std::vector<FrameStatistics> VideoCodecTestStatsImpl::GetFrameStatistics() {
62   size_t capacity = 0;
63   for (const auto& layer_stat : layer_stats_) {
64     capacity += layer_stat.second.size();
65   }
66 
67   std::vector<FrameStatistics> frame_statistics;
68   frame_statistics.reserve(capacity);
69   for (const auto& layer_stat : layer_stats_) {
70     std::copy(layer_stat.second.cbegin(), layer_stat.second.cend(),
71               std::back_inserter(frame_statistics));
72   }
73 
74   return frame_statistics;
75 }
76 
77 std::vector<VideoStatistics>
SliceAndCalcLayerVideoStatistic(size_t first_frame_num,size_t last_frame_num)78 VideoCodecTestStatsImpl::SliceAndCalcLayerVideoStatistic(
79     size_t first_frame_num,
80     size_t last_frame_num) {
81   std::vector<VideoStatistics> layer_stats;
82 
83   size_t num_spatial_layers = 0;
84   size_t num_temporal_layers = 0;
85   GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
86                            &num_temporal_layers);
87   RTC_CHECK_GT(num_spatial_layers, 0);
88   RTC_CHECK_GT(num_temporal_layers, 0);
89 
90   for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
91        ++spatial_idx) {
92     for (size_t temporal_idx = 0; temporal_idx < num_temporal_layers;
93          ++temporal_idx) {
94       VideoStatistics layer_stat = SliceAndCalcVideoStatistic(
95           first_frame_num, last_frame_num, spatial_idx, temporal_idx, false);
96       layer_stats.push_back(layer_stat);
97     }
98   }
99 
100   return layer_stats;
101 }
102 
SliceAndCalcAggregatedVideoStatistic(size_t first_frame_num,size_t last_frame_num)103 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcAggregatedVideoStatistic(
104     size_t first_frame_num,
105     size_t last_frame_num) {
106   size_t num_spatial_layers = 0;
107   size_t num_temporal_layers = 0;
108   GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
109                            &num_temporal_layers);
110   RTC_CHECK_GT(num_spatial_layers, 0);
111   RTC_CHECK_GT(num_temporal_layers, 0);
112 
113   return SliceAndCalcVideoStatistic(first_frame_num, last_frame_num,
114                                     num_spatial_layers - 1,
115                                     num_temporal_layers - 1, true);
116 }
117 
Size(size_t spatial_idx)118 size_t VideoCodecTestStatsImpl::Size(size_t spatial_idx) {
119   return layer_stats_[spatial_idx].size();
120 }
121 
Clear()122 void VideoCodecTestStatsImpl::Clear() {
123   layer_stats_.clear();
124   rtp_timestamp_to_frame_num_.clear();
125 }
126 
AggregateFrameStatistic(size_t frame_num,size_t spatial_idx,bool aggregate_independent_layers)127 FrameStatistics VideoCodecTestStatsImpl::AggregateFrameStatistic(
128     size_t frame_num,
129     size_t spatial_idx,
130     bool aggregate_independent_layers) {
131   FrameStatistics frame_stat = *GetFrame(frame_num, spatial_idx);
132   bool inter_layer_predicted = frame_stat.inter_layer_predicted;
133   while (spatial_idx-- > 0) {
134     if (aggregate_independent_layers || inter_layer_predicted) {
135       FrameStatistics* base_frame_stat = GetFrame(frame_num, spatial_idx);
136       frame_stat.length_bytes += base_frame_stat->length_bytes;
137       frame_stat.target_bitrate_kbps += base_frame_stat->target_bitrate_kbps;
138 
139       inter_layer_predicted = base_frame_stat->inter_layer_predicted;
140     }
141   }
142 
143   return frame_stat;
144 }
145 
CalcLayerTargetBitrateKbps(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)146 size_t VideoCodecTestStatsImpl::CalcLayerTargetBitrateKbps(
147     size_t first_frame_num,
148     size_t last_frame_num,
149     size_t spatial_idx,
150     size_t temporal_idx,
151     bool aggregate_independent_layers) {
152   size_t target_bitrate_kbps = 0;
153 
154   // We don't know if superframe includes all required spatial layers because
155   // of possible frame drops. Run through all frames in specified range, find
156   // and return maximum target bitrate. Assume that target bitrate in frame
157   // statistic is specified per temporal layer.
158   for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
159        ++frame_num) {
160     FrameStatistics superframe = AggregateFrameStatistic(
161         frame_num, spatial_idx, aggregate_independent_layers);
162 
163     if (superframe.temporal_idx <= temporal_idx) {
164       target_bitrate_kbps =
165           std::max(target_bitrate_kbps, superframe.target_bitrate_kbps);
166     }
167   }
168 
169   RTC_DCHECK_GT(target_bitrate_kbps, 0);
170   return target_bitrate_kbps;
171 }
172 
SliceAndCalcVideoStatistic(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)173 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcVideoStatistic(
174     size_t first_frame_num,
175     size_t last_frame_num,
176     size_t spatial_idx,
177     size_t temporal_idx,
178     bool aggregate_independent_layers) {
179   VideoStatistics video_stat;
180 
181   float buffer_level_bits = 0.0f;
182   RunningStatistics<float> buffer_level_sec;
183 
184   RunningStatistics<size_t> key_frame_size_bytes;
185   RunningStatistics<size_t> delta_frame_size_bytes;
186 
187   RunningStatistics<size_t> frame_encoding_time_us;
188   RunningStatistics<size_t> frame_decoding_time_us;
189 
190   RunningStatistics<float> psnr_y;
191   RunningStatistics<float> psnr_u;
192   RunningStatistics<float> psnr_v;
193   RunningStatistics<float> psnr;
194   RunningStatistics<float> ssim;
195   RunningStatistics<int> qp;
196 
197   size_t rtp_timestamp_first_frame = 0;
198   size_t rtp_timestamp_prev_frame = 0;
199 
200   FrameStatistics last_successfully_decoded_frame(0, 0, 0);
201 
202   const size_t target_bitrate_kbps =
203       CalcLayerTargetBitrateKbps(first_frame_num, last_frame_num, spatial_idx,
204                                  temporal_idx, aggregate_independent_layers);
205   RTC_CHECK_GT(target_bitrate_kbps, 0);  // We divide by |target_bitrate_kbps|.
206 
207   for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
208        ++frame_num) {
209     FrameStatistics frame_stat = AggregateFrameStatistic(
210         frame_num, spatial_idx, aggregate_independent_layers);
211 
212     float time_since_first_frame_sec =
213         1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_first_frame) /
214         kVideoPayloadTypeFrequency;
215     float time_since_prev_frame_sec =
216         1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_prev_frame) /
217         kVideoPayloadTypeFrequency;
218 
219     if (frame_stat.temporal_idx > temporal_idx) {
220       continue;
221     }
222 
223     buffer_level_bits -= time_since_prev_frame_sec * 1000 * target_bitrate_kbps;
224     buffer_level_bits = std::max(0.0f, buffer_level_bits);
225     buffer_level_bits += 8.0 * frame_stat.length_bytes;
226     buffer_level_sec.AddSample(buffer_level_bits /
227                                (1000 * target_bitrate_kbps));
228 
229     video_stat.length_bytes += frame_stat.length_bytes;
230 
231     if (frame_stat.encoding_successful) {
232       ++video_stat.num_encoded_frames;
233 
234       if (frame_stat.frame_type == VideoFrameType::kVideoFrameKey) {
235         key_frame_size_bytes.AddSample(frame_stat.length_bytes);
236         ++video_stat.num_key_frames;
237       } else {
238         delta_frame_size_bytes.AddSample(frame_stat.length_bytes);
239       }
240 
241       frame_encoding_time_us.AddSample(frame_stat.encode_time_us);
242       qp.AddSample(frame_stat.qp);
243 
244       video_stat.max_nalu_size_bytes = std::max(video_stat.max_nalu_size_bytes,
245                                                 frame_stat.max_nalu_size_bytes);
246     }
247 
248     if (frame_stat.decoding_successful) {
249       ++video_stat.num_decoded_frames;
250 
251       video_stat.width = std::max(video_stat.width, frame_stat.decoded_width);
252       video_stat.height =
253           std::max(video_stat.height, frame_stat.decoded_height);
254 
255       psnr_y.AddSample(frame_stat.psnr_y);
256       psnr_u.AddSample(frame_stat.psnr_u);
257       psnr_v.AddSample(frame_stat.psnr_v);
258       psnr.AddSample(frame_stat.psnr);
259       ssim.AddSample(frame_stat.ssim);
260 
261       if (video_stat.num_decoded_frames > 1) {
262         if (last_successfully_decoded_frame.decoded_width !=
263                 frame_stat.decoded_width ||
264             last_successfully_decoded_frame.decoded_height !=
265                 frame_stat.decoded_height) {
266           ++video_stat.num_spatial_resizes;
267         }
268       }
269 
270       frame_decoding_time_us.AddSample(frame_stat.decode_time_us);
271       last_successfully_decoded_frame = frame_stat;
272     }
273 
274     if (video_stat.num_input_frames > 0) {
275       if (video_stat.time_to_reach_target_bitrate_sec == 0.0f) {
276         RTC_CHECK_GT(time_since_first_frame_sec, 0);
277         const float curr_kbps =
278             8.0 * video_stat.length_bytes / 1000 / time_since_first_frame_sec;
279         const float bitrate_mismatch_percent =
280             100 * std::fabs(curr_kbps - target_bitrate_kbps) /
281             target_bitrate_kbps;
282         if (bitrate_mismatch_percent < kMaxBitrateMismatchPercent) {
283           video_stat.time_to_reach_target_bitrate_sec =
284               time_since_first_frame_sec;
285         }
286       }
287     }
288 
289     rtp_timestamp_prev_frame = frame_stat.rtp_timestamp;
290     if (video_stat.num_input_frames == 0) {
291       rtp_timestamp_first_frame = frame_stat.rtp_timestamp;
292     }
293 
294     ++video_stat.num_input_frames;
295   }
296 
297   const size_t num_frames = last_frame_num - first_frame_num + 1;
298   const size_t timestamp_delta =
299       GetFrame(first_frame_num + 1, spatial_idx)->rtp_timestamp -
300       GetFrame(first_frame_num, spatial_idx)->rtp_timestamp;
301   RTC_CHECK_GT(timestamp_delta, 0);
302   const float input_framerate_fps =
303       1.0 * kVideoPayloadTypeFrequency / timestamp_delta;
304   RTC_CHECK_GT(input_framerate_fps, 0);
305   const float duration_sec = num_frames / input_framerate_fps;
306 
307   video_stat.target_bitrate_kbps = target_bitrate_kbps;
308   video_stat.input_framerate_fps = input_framerate_fps;
309 
310   video_stat.spatial_idx = spatial_idx;
311   video_stat.temporal_idx = temporal_idx;
312 
313   RTC_CHECK_GT(duration_sec, 0);
314   video_stat.bitrate_kbps =
315       static_cast<size_t>(8 * video_stat.length_bytes / 1000 / duration_sec);
316   video_stat.framerate_fps = video_stat.num_encoded_frames / duration_sec;
317 
318   // http://bugs.webrtc.org/10400: On Windows, we only get millisecond
319   // granularity in the frame encode/decode timing measurements.
320   // So we need to softly avoid a div-by-zero here.
321   const float mean_encode_time_us =
322       frame_encoding_time_us.GetMean().value_or(0);
323   video_stat.enc_speed_fps = mean_encode_time_us > 0.0f
324                                  ? 1000000.0f / mean_encode_time_us
325                                  : std::numeric_limits<float>::max();
326   const float mean_decode_time_us =
327       frame_decoding_time_us.GetMean().value_or(0);
328   video_stat.dec_speed_fps = mean_decode_time_us > 0.0f
329                                  ? 1000000.0f / mean_decode_time_us
330                                  : std::numeric_limits<float>::max();
331 
332   auto MaxDelaySec =
333       [target_bitrate_kbps](const RunningStatistics<size_t>& stats) {
334         return 8 * stats.GetMax().value_or(0) / 1000 / target_bitrate_kbps;
335       };
336 
337   video_stat.avg_delay_sec = buffer_level_sec.GetMean().value_or(0);
338   video_stat.max_key_frame_delay_sec = MaxDelaySec(key_frame_size_bytes);
339   video_stat.max_delta_frame_delay_sec = MaxDelaySec(key_frame_size_bytes);
340 
341   video_stat.avg_key_frame_size_bytes =
342       key_frame_size_bytes.GetMean().value_or(0);
343   video_stat.avg_delta_frame_size_bytes =
344       delta_frame_size_bytes.GetMean().value_or(0);
345   video_stat.avg_qp = qp.GetMean().value_or(0);
346 
347   video_stat.avg_psnr_y = psnr_y.GetMean().value_or(0);
348   video_stat.avg_psnr_u = psnr_u.GetMean().value_or(0);
349   video_stat.avg_psnr_v = psnr_v.GetMean().value_or(0);
350   video_stat.avg_psnr = psnr.GetMean().value_or(0);
351   video_stat.min_psnr =
352       psnr.GetMin().value_or(std::numeric_limits<float>::max());
353   video_stat.avg_ssim = ssim.GetMean().value_or(0);
354   video_stat.min_ssim =
355       ssim.GetMin().value_or(std::numeric_limits<float>::max());
356 
357   return video_stat;
358 }
359 
GetNumberOfEncodedLayers(size_t first_frame_num,size_t last_frame_num,size_t * num_encoded_spatial_layers,size_t * num_encoded_temporal_layers)360 void VideoCodecTestStatsImpl::GetNumberOfEncodedLayers(
361     size_t first_frame_num,
362     size_t last_frame_num,
363     size_t* num_encoded_spatial_layers,
364     size_t* num_encoded_temporal_layers) {
365   *num_encoded_spatial_layers = 0;
366   *num_encoded_temporal_layers = 0;
367 
368   const size_t num_spatial_layers = layer_stats_.size();
369 
370   for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
371        ++frame_num) {
372     for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
373          ++spatial_idx) {
374       FrameStatistics* frame_stat = GetFrame(frame_num, spatial_idx);
375       if (frame_stat->encoding_successful) {
376         *num_encoded_spatial_layers =
377             std::max(*num_encoded_spatial_layers, frame_stat->spatial_idx + 1);
378         *num_encoded_temporal_layers = std::max(*num_encoded_temporal_layers,
379                                                 frame_stat->temporal_idx + 1);
380       }
381     }
382   }
383 }
384 
385 }  // namespace test
386 }  // namespace webrtc
387