1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "webrtc/modules/video_processing/content_analysis.h"
11 
12 #include <math.h>
13 #include <stdlib.h>
14 
15 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
16 #include "webrtc/system_wrappers/include/tick_util.h"
17 
18 namespace webrtc {
19 
VPMContentAnalysis(bool runtime_cpu_detection)20 VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
21     : orig_frame_(NULL),
22       prev_frame_(NULL),
23       width_(0),
24       height_(0),
25       skip_num_(1),
26       border_(8),
27       motion_magnitude_(0.0f),
28       spatial_pred_err_(0.0f),
29       spatial_pred_err_h_(0.0f),
30       spatial_pred_err_v_(0.0f),
31       first_frame_(true),
32       ca_Init_(false),
33       content_metrics_(NULL) {
34   ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
35   TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
36 
37   if (runtime_cpu_detection) {
38 #if defined(WEBRTC_ARCH_X86_FAMILY)
39     if (WebRtc_GetCPUInfo(kSSE2)) {
40       ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
41       TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
42     }
43 #endif
44   }
45   Release();
46 }
47 
~VPMContentAnalysis()48 VPMContentAnalysis::~VPMContentAnalysis() {
49   Release();
50 }
51 
ComputeContentMetrics(const VideoFrame & inputFrame)52 VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
53     const VideoFrame& inputFrame) {
54   if (inputFrame.IsZeroSize())
55     return NULL;
56 
57   // Init if needed (native dimension change).
58   if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
59     if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
60       return NULL;
61   }
62   // Only interested in the Y plane.
63   orig_frame_ = inputFrame.buffer(kYPlane);
64 
65   // Compute spatial metrics: 3 spatial prediction errors.
66   (this->*ComputeSpatialMetrics)();
67 
68   // Compute motion metrics
69   if (first_frame_ == false)
70     ComputeMotionMetrics();
71 
72   // Saving current frame as previous one: Y only.
73   memcpy(prev_frame_, orig_frame_, width_ * height_);
74 
75   first_frame_ = false;
76   ca_Init_ = true;
77 
78   return ContentMetrics();
79 }
80 
Release()81 int32_t VPMContentAnalysis::Release() {
82   if (content_metrics_ != NULL) {
83     delete content_metrics_;
84     content_metrics_ = NULL;
85   }
86 
87   if (prev_frame_ != NULL) {
88     delete[] prev_frame_;
89     prev_frame_ = NULL;
90   }
91 
92   width_ = 0;
93   height_ = 0;
94   first_frame_ = true;
95 
96   return VPM_OK;
97 }
98 
Initialize(int width,int height)99 int32_t VPMContentAnalysis::Initialize(int width, int height) {
100   width_ = width;
101   height_ = height;
102   first_frame_ = true;
103 
104   // skip parameter: # of skipped rows: for complexity reduction
105   //  temporal also currently uses it for column reduction.
106   skip_num_ = 1;
107 
108   // use skipNum = 2 for 4CIF, WHD
109   if ((height_ >= 576) && (width_ >= 704)) {
110     skip_num_ = 2;
111   }
112   // use skipNum = 4 for FULLL_HD images
113   if ((height_ >= 1080) && (width_ >= 1920)) {
114     skip_num_ = 4;
115   }
116 
117   if (content_metrics_ != NULL) {
118     delete content_metrics_;
119   }
120 
121   if (prev_frame_ != NULL) {
122     delete[] prev_frame_;
123   }
124 
125   // Spatial Metrics don't work on a border of 8. Minimum processing
126   // block size is 16 pixels.  So make sure the width and height support this.
127   if (width_ <= 32 || height_ <= 32) {
128     ca_Init_ = false;
129     return VPM_PARAMETER_ERROR;
130   }
131 
132   content_metrics_ = new VideoContentMetrics();
133   if (content_metrics_ == NULL) {
134     return VPM_MEMORY;
135   }
136 
137   prev_frame_ = new uint8_t[width_ * height_];  // Y only.
138   if (prev_frame_ == NULL)
139     return VPM_MEMORY;
140 
141   return VPM_OK;
142 }
143 
144 // Compute motion metrics: magnitude over non-zero motion vectors,
145 //  and size of zero cluster
ComputeMotionMetrics()146 int32_t VPMContentAnalysis::ComputeMotionMetrics() {
147   // Motion metrics: only one is derived from normalized
148   //  (MAD) temporal difference
149   (this->*TemporalDiffMetric)();
150   return VPM_OK;
151 }
152 
153 // Normalized temporal difference (MAD): used as a motion level metric
154 // Normalize MAD by spatial contrast: images with more contrast
155 //  (pixel variance) likely have larger temporal difference
156 // To reduce complexity, we compute the metric for a reduced set of points.
TemporalDiffMetric_C()157 int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
158   // size of original frame
159   int sizei = height_;
160   int sizej = width_;
161   uint32_t tempDiffSum = 0;
162   uint32_t pixelSum = 0;
163   uint64_t pixelSqSum = 0;
164 
165   uint32_t num_pixels = 0;  // Counter for # of pixels.
166   const int width_end = ((width_ - 2 * border_) & -16) + border_;
167 
168   for (int i = border_; i < sizei - border_; i += skip_num_) {
169     for (int j = border_; j < width_end; j++) {
170       num_pixels += 1;
171       int ssn = i * sizej + j;
172 
173       uint8_t currPixel = orig_frame_[ssn];
174       uint8_t prevPixel = prev_frame_[ssn];
175 
176       tempDiffSum +=
177           static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel)));
178       pixelSum += static_cast<uint32_t>(currPixel);
179       pixelSqSum += static_cast<uint64_t>(currPixel * currPixel);
180     }
181   }
182 
183   // Default.
184   motion_magnitude_ = 0.0f;
185 
186   if (tempDiffSum == 0)
187     return VPM_OK;
188 
189   // Normalize over all pixels.
190   float const tempDiffAvg =
191       static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels);
192   float const pixelSumAvg =
193       static_cast<float>(pixelSum) / static_cast<float>(num_pixels);
194   float const pixelSqSumAvg =
195       static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels);
196   float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
197 
198   if (contrast > 0.0) {
199     contrast = sqrt(contrast);
200     motion_magnitude_ = tempDiffAvg / contrast;
201   }
202   return VPM_OK;
203 }
204 
205 // Compute spatial metrics:
206 // To reduce complexity, we compute the metric for a reduced set of points.
207 // The spatial metrics are rough estimates of the prediction error cost for
208 //  each QM spatial mode: 2x2,1x2,2x1
209 // The metrics are a simple estimate of the up-sampling prediction error,
210 // estimated assuming sub-sampling for decimation (no filtering),
211 // and up-sampling back up with simple bilinear interpolation.
ComputeSpatialMetrics_C()212 int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
213   const int sizei = height_;
214   const int sizej = width_;
215 
216   // Pixel mean square average: used to normalize the spatial metrics.
217   uint32_t pixelMSA = 0;
218 
219   uint32_t spatialErrSum = 0;
220   uint32_t spatialErrVSum = 0;
221   uint32_t spatialErrHSum = 0;
222 
223   // make sure work section is a multiple of 16
224   const int width_end = ((sizej - 2 * border_) & -16) + border_;
225 
226   for (int i = border_; i < sizei - border_; i += skip_num_) {
227     for (int j = border_; j < width_end; j++) {
228       int ssn1 = i * sizej + j;
229       int ssn2 = (i + 1) * sizej + j;  // bottom
230       int ssn3 = (i - 1) * sizej + j;  // top
231       int ssn4 = i * sizej + j + 1;    // right
232       int ssn5 = i * sizej + j - 1;    // left
233 
234       uint16_t refPixel1 = orig_frame_[ssn1] << 1;
235       uint16_t refPixel2 = orig_frame_[ssn1] << 2;
236 
237       uint8_t bottPixel = orig_frame_[ssn2];
238       uint8_t topPixel = orig_frame_[ssn3];
239       uint8_t rightPixel = orig_frame_[ssn4];
240       uint8_t leftPixel = orig_frame_[ssn5];
241 
242       spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
243           refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel +
244                                             rightPixel))));
245       spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
246           refPixel1 - static_cast<uint16_t>(bottPixel + topPixel))));
247       spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
248           refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel))));
249       pixelMSA += orig_frame_[ssn1];
250     }
251   }
252 
253   // Normalize over all pixels.
254   const float spatialErr = static_cast<float>(spatialErrSum >> 2);
255   const float spatialErrH = static_cast<float>(spatialErrHSum >> 1);
256   const float spatialErrV = static_cast<float>(spatialErrVSum >> 1);
257   const float norm = static_cast<float>(pixelMSA);
258 
259   // 2X2:
260   spatial_pred_err_ = spatialErr / norm;
261   // 1X2:
262   spatial_pred_err_h_ = spatialErrH / norm;
263   // 2X1:
264   spatial_pred_err_v_ = spatialErrV / norm;
265   return VPM_OK;
266 }
267 
ContentMetrics()268 VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
269   if (ca_Init_ == false)
270     return NULL;
271 
272   content_metrics_->spatial_pred_err = spatial_pred_err_;
273   content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
274   content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
275   // Motion metric: normalized temporal difference (MAD).
276   content_metrics_->motion_magnitude = motion_magnitude_;
277 
278   return content_metrics_;
279 }
280 
281 }  // namespace webrtc
282