1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <vector>
18
19 #include "EffectDownmix.h"
20
21 #include <audio_utils/channels.h>
22 #include <audio_utils/primitives.h>
23 #include <audio_utils/Statistics.h>
24 #include <gtest/gtest.h>
25 #include <log/log.h>
26
27 extern audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM;
28 static constexpr audio_channel_mask_t kChannelPositionMasks[] = {
29 AUDIO_CHANNEL_OUT_FRONT_LEFT, // Legacy: the downmix effect treats MONO as FRONT_LEFT only.
30 // The AudioMixer interprets MONO as a special case requiring
31 // channel replication, bypassing the downmix effect.
32 AUDIO_CHANNEL_OUT_FRONT_CENTER,
33 AUDIO_CHANNEL_OUT_STEREO,
34 AUDIO_CHANNEL_OUT_2POINT1,
35 AUDIO_CHANNEL_OUT_2POINT0POINT2,
36 AUDIO_CHANNEL_OUT_QUAD, // AUDIO_CHANNEL_OUT_QUAD_BACK
37 AUDIO_CHANNEL_OUT_QUAD_SIDE,
38 AUDIO_CHANNEL_OUT_SURROUND,
39 AUDIO_CHANNEL_OUT_2POINT1POINT2,
40 AUDIO_CHANNEL_OUT_3POINT0POINT2,
41 AUDIO_CHANNEL_OUT_PENTA,
42 AUDIO_CHANNEL_OUT_3POINT1POINT2,
43 AUDIO_CHANNEL_OUT_5POINT1, // AUDIO_CHANNEL_OUT_5POINT1_BACK
44 AUDIO_CHANNEL_OUT_5POINT1_SIDE,
45 AUDIO_CHANNEL_OUT_6POINT1,
46 AUDIO_CHANNEL_OUT_5POINT1POINT2,
47 AUDIO_CHANNEL_OUT_7POINT1,
48 AUDIO_CHANNEL_OUT_5POINT1POINT4,
49 AUDIO_CHANNEL_OUT_7POINT1POINT2,
50 AUDIO_CHANNEL_OUT_7POINT1POINT4,
51 AUDIO_CHANNEL_OUT_13POINT_360RA,
52 AUDIO_CHANNEL_OUT_22POINT2,
53 audio_channel_mask_t(AUDIO_CHANNEL_OUT_22POINT2
54 | AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT | AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT),
55 };
56
57 constexpr float COEF_25 = 0.2508909536f;
58 constexpr float COEF_35 = 0.3543928915f;
59 constexpr float COEF_36 = 0.3552343859f;
60 constexpr float COEF_61 = 0.6057043428f;
61
62 constexpr inline float kScaleFromChannelIdxLeft[] = {
63 1.f, // AUDIO_CHANNEL_OUT_FRONT_LEFT = 0x1u,
64 0.f, // AUDIO_CHANNEL_OUT_FRONT_RIGHT = 0x2u,
65 M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_CENTER = 0x4u,
66 0.5f, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY = 0x8u,
67 M_SQRT1_2, // AUDIO_CHANNEL_OUT_BACK_LEFT = 0x10u,
68 0.f, // AUDIO_CHANNEL_OUT_BACK_RIGHT = 0x20u,
69 COEF_61, // AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER = 0x40u,
70 COEF_25, // AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER = 0x80u,
71 0.5f, // AUDIO_CHANNEL_OUT_BACK_CENTER = 0x100u,
72 M_SQRT1_2, // AUDIO_CHANNEL_OUT_SIDE_LEFT = 0x200u,
73 0.f, // AUDIO_CHANNEL_OUT_SIDE_RIGHT = 0x400u,
74 COEF_36, // AUDIO_CHANNEL_OUT_TOP_CENTER = 0x800u,
75 1.f, // AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT = 0x1000u,
76 M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER = 0x2000u,
77 0.f, // AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT = 0x4000u,
78 M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_BACK_LEFT = 0x8000u,
79 COEF_35, // AUDIO_CHANNEL_OUT_TOP_BACK_CENTER = 0x10000u,
80 0.f, // AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT = 0x20000u,
81 COEF_61, // AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT = 0x40000u,
82 0.f, // AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT = 0x80000u,
83 1.f, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_LEFT = 0x100000u,
84 M_SQRT1_2, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_CENTER = 0x200000u,
85 0.f, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_RIGHT = 0x400000u,
86 0.f, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2 = 0x800000u,
87 M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT = 0x1000000u,
88 0.f, // AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT = 0x2000000u,
89 };
90
91 constexpr inline float kScaleFromChannelIdxRight[] = {
92 0.f, // AUDIO_CHANNEL_OUT_FRONT_LEFT = 0x1u,
93 1.f, // AUDIO_CHANNEL_OUT_FRONT_RIGHT = 0x2u,
94 M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_CENTER = 0x4u,
95 0.5f, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY = 0x8u,
96 0.f, // AUDIO_CHANNEL_OUT_BACK_LEFT = 0x10u,
97 M_SQRT1_2, // AUDIO_CHANNEL_OUT_BACK_RIGHT = 0x20u,
98 COEF_25, // AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER = 0x40u,
99 COEF_61, // AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER = 0x80u,
100 0.5f, // AUDIO_CHANNEL_OUT_BACK_CENTER = 0x100u,
101 0.f, // AUDIO_CHANNEL_OUT_SIDE_LEFT = 0x200u,
102 M_SQRT1_2, // AUDIO_CHANNEL_OUT_SIDE_RIGHT = 0x400u,
103 COEF_36, // AUDIO_CHANNEL_OUT_TOP_CENTER = 0x800u,
104 0.f, // AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT = 0x1000u,
105 M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER = 0x2000u,
106 1.f, // AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT = 0x4000u,
107 0.f, // AUDIO_CHANNEL_OUT_TOP_BACK_LEFT = 0x8000u,
108 COEF_35, // AUDIO_CHANNEL_OUT_TOP_BACK_CENTER = 0x10000u,
109 M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT = 0x20000u,
110 0.f, // AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT = 0x40000u,
111 COEF_61, // AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT = 0x80000u,
112 0.f, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_LEFT = 0x100000u,
113 M_SQRT1_2, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_CENTER = 0x200000u,
114 1.f, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_RIGHT = 0x400000u,
115 M_SQRT1_2, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2 = 0x800000u,
116 0.f, // AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT = 0x1000000u,
117 M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT = 0x2000000u,
118 };
119
120 // Downmix doesn't change with sample rate
121 static constexpr size_t kSampleRates[] = {
122 48000,
123 };
124
125 // Our near expectation is 16x the bit that doesn't fit the mantissa.
126 // this works so long as we add values close in exponent with each other
127 // realizing that errors accumulate as the sqrt of N (random walk, lln, etc).
128 #define EXPECT_NEAR_EPSILON(e, v) EXPECT_NEAR((e), (v), \
129 abs((e) * std::numeric_limits<std::decay_t<decltype(e)>>::epsilon() * 8))
130
131 template<typename T>
channelStatistics(const std::vector<T> & input,size_t channels)132 static auto channelStatistics(const std::vector<T>& input, size_t channels) {
133 std::vector<android::audio_utils::Statistics<T>> result(channels);
134 const size_t frames = input.size() / channels;
135 if (frames > 0) {
136 const float *fptr = input.data();
137 for (size_t i = 0; i < frames; ++i) {
138 for (size_t j = 0; j < channels; ++j) {
139 result[j].add(*fptr++);
140 }
141 }
142 }
143 return result;
144 }
145
146 using DownmixParam = std::tuple<int /* sample rate */, int /* channel mask */>;
147 class DownmixTest : public ::testing::TestWithParam<DownmixParam> {
148 public:
149 static constexpr effect_uuid_t downmix_uuid_ = {
150 0x93f04452, 0xe4fe, 0x41cc, 0x91f9, {0xe4, 0x75, 0xb6, 0xd1, 0xd6, 0x9f}};
151 static constexpr size_t FRAME_LENGTH = 256;
152
testBalance(int sampleRate,audio_channel_mask_t channelMask)153 void testBalance(int sampleRate, audio_channel_mask_t channelMask) {
154 using namespace ::android::audio_utils::channels;
155
156 size_t frames = 100; // set to an even number (2, 4, 6 ... ) stream alternates +1, -1.
157 constexpr unsigned outChannels = 2;
158 unsigned inChannels = audio_channel_count_from_out_mask(channelMask);
159 std::vector<float> input(frames * inChannels);
160 std::vector<float> output(frames * outChannels);
161
162 double savedPower[32][2]{};
163 for (unsigned i = 0, channel = channelMask; channel != 0; ++i) {
164 const int index = __builtin_ctz(channel);
165 ASSERT_LT(index, FCC_26);
166 const int pairIndex = pairIdxFromChannelIdx(index);
167 const AUDIO_GEOMETRY_SIDE side = sideFromChannelIdx(index);
168 const int channelBit = 1 << index;
169 channel &= ~channelBit;
170
171 // Generate a +1, -1 alternating stream in one channel, which has variance 1.
172 auto indata = input.data();
173 for (unsigned j = 0; j < frames; ++j) {
174 for (unsigned k = 0; k < inChannels; ++k) {
175 *indata++ = (k == i) ? (j & 1 ? -1 : 1) : 0;
176 }
177 }
178 run(sampleRate, channelMask, input, output, frames);
179
180 auto stats = channelStatistics(output, 2 /* channels */);
181 // printf("power: %s %s\n", stats[0].toString().c_str(), stats[1].toString().c_str());
182 double power[2] = { stats[0].getPopVariance(), stats[1].getPopVariance() };
183
184 // Check symmetric power for pair channels on exchange of left/right position.
185 // to do this, we save previous power measurements.
186 if (pairIndex >= 0 && pairIndex < index) {
187 EXPECT_NEAR_EPSILON(power[0], savedPower[pairIndex][1]);
188 EXPECT_NEAR_EPSILON(power[1], savedPower[pairIndex][0]);
189 }
190 savedPower[index][0] = power[0];
191 savedPower[index][1] = power[1];
192
193 constexpr float POWER_TOLERANCE = 0.001;
194 const float expectedPower =
195 kScaleFromChannelIdxLeft[index] * kScaleFromChannelIdxLeft[index]
196 + kScaleFromChannelIdxRight[index] * kScaleFromChannelIdxRight[index];
197 EXPECT_NEAR(expectedPower, power[0] + power[1], POWER_TOLERANCE);
198 switch (side) {
199 case AUDIO_GEOMETRY_SIDE_LEFT:
200 if (channelBit == AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER) {
201 break;
202 }
203 EXPECT_EQ(0.f, power[1]);
204 break;
205 case AUDIO_GEOMETRY_SIDE_RIGHT:
206 if (channelBit == AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER) {
207 break;
208 }
209 EXPECT_EQ(0.f, power[0]);
210 break;
211 case AUDIO_GEOMETRY_SIDE_CENTER:
212 if (channelBit == AUDIO_CHANNEL_OUT_LOW_FREQUENCY) {
213 if (channelMask & AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2) {
214 EXPECT_EQ(0.f, power[1]);
215 break;
216 } else {
217 EXPECT_NEAR_EPSILON(power[0], power[1]); // always true
218 EXPECT_NEAR(expectedPower, power[0] + power[1], POWER_TOLERANCE);
219 break;
220 }
221 } else if (channelBit == AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2) {
222 EXPECT_EQ(0.f, power[0]);
223 EXPECT_NEAR(expectedPower, power[1], POWER_TOLERANCE);
224 break;
225 }
226 EXPECT_NEAR_EPSILON(power[0], power[1]);
227 break;
228 }
229 }
230 }
231
run(int sampleRate,audio_channel_mask_t channelMask,std::vector<float> & input,std::vector<float> & output,size_t frames)232 void run(int sampleRate, audio_channel_mask_t channelMask,
233 std::vector<float>& input, std::vector<float>& output, size_t frames) {
234 reconfig(sampleRate, channelMask);
235
236 ASSERT_EQ(frames * inputChannelCount_, input.size());
237 ASSERT_EQ(frames * outputChannelCount_, output.size());
238
239 const int32_t sessionId = 0;
240 const int32_t ioId = 0;
241 int32_t err = AUDIO_EFFECT_LIBRARY_INFO_SYM.create_effect(
242 &downmix_uuid_, sessionId, ioId, &handle_);
243 ASSERT_EQ(0, err);
244
245 const struct effect_interface_s * const downmixApi = *handle_;
246 int32_t reply = 0;
247 uint32_t replySize = (uint32_t)sizeof(reply);
248 err = (downmixApi->command)(
249 handle_, EFFECT_CMD_SET_CONFIG,
250 sizeof(effect_config_t), &config_, &replySize, &reply);
251 ASSERT_EQ(0, err);
252 ASSERT_EQ(0, reply);
253 err = (downmixApi->command)(
254 handle_, EFFECT_CMD_ENABLE,
255 0, nullptr, &replySize, &reply);
256 ASSERT_EQ(0, err);
257
258 process(input, output, frames);
259 err = AUDIO_EFFECT_LIBRARY_INFO_SYM.release_effect(handle_);
260 ASSERT_EQ(0, err);
261 }
262
263 // This test assumes the channel mask is invalid.
testInvalidChannelMask(audio_channel_mask_t invalidChannelMask)264 void testInvalidChannelMask(audio_channel_mask_t invalidChannelMask) {
265 reconfig(48000 /* sampleRate */, invalidChannelMask);
266 const int32_t sessionId = 0;
267 const int32_t ioId = 0;
268 int32_t err = AUDIO_EFFECT_LIBRARY_INFO_SYM.create_effect(
269 &downmix_uuid_, sessionId, ioId, &handle_);
270 ASSERT_EQ(0, err);
271
272 const struct effect_interface_s * const downmixApi = *handle_;
273 int32_t reply = 0;
274 uint32_t replySize = (uint32_t)sizeof(reply);
275 err = (downmixApi->command)(
276 handle_, EFFECT_CMD_SET_CONFIG,
277 sizeof(effect_config_t), &config_, &replySize, &reply);
278 ASSERT_EQ(0, err);
279 ASSERT_NE(0, reply); // error has occurred.
280 err = AUDIO_EFFECT_LIBRARY_INFO_SYM.release_effect(handle_);
281 ASSERT_EQ(0, err);
282 }
283
284 private:
reconfig(int sampleRate,audio_channel_mask_t channelMask)285 void reconfig(int sampleRate, audio_channel_mask_t channelMask) {
286 config_.inputCfg.accessMode = EFFECT_BUFFER_ACCESS_READ;
287 config_.inputCfg.format = AUDIO_FORMAT_PCM_FLOAT;
288 config_.inputCfg.bufferProvider.getBuffer = nullptr;
289 config_.inputCfg.bufferProvider.releaseBuffer = nullptr;
290 config_.inputCfg.bufferProvider.cookie = nullptr;
291 config_.inputCfg.mask = EFFECT_CONFIG_ALL;
292
293 config_.outputCfg.accessMode = EFFECT_BUFFER_ACCESS_WRITE;
294 config_.outputCfg.format = AUDIO_FORMAT_PCM_FLOAT;
295 config_.outputCfg.bufferProvider.getBuffer = nullptr;
296 config_.outputCfg.bufferProvider.releaseBuffer = nullptr;
297 config_.outputCfg.bufferProvider.cookie = nullptr;
298 config_.outputCfg.mask = EFFECT_CONFIG_ALL;
299
300 config_.inputCfg.samplingRate = sampleRate;
301 config_.inputCfg.channels = channelMask;
302 inputChannelCount_ = audio_channel_count_from_out_mask(config_.inputCfg.channels);
303
304 config_.outputCfg.samplingRate = sampleRate;
305 config_.outputCfg.channels = AUDIO_CHANNEL_OUT_STEREO; // output always stereo
306 outputChannelCount_ = audio_channel_count_from_out_mask(config_.outputCfg.channels);
307 }
308
process(std::vector<float> & input,std::vector<float> & output,size_t frames) const309 void process(std::vector<float> &input, std::vector<float> &output, size_t frames) const {
310 const struct effect_interface_s * const downmixApi = *handle_;
311
312 for (size_t pos = 0; pos < frames;) {
313 const size_t transfer = std::min(frames - pos, FRAME_LENGTH);
314 audio_buffer_t inbuffer{.frameCount = transfer,
315 .f32 = input.data() + pos * inputChannelCount_};
316 audio_buffer_t outbuffer{.frameCount = transfer,
317 .f32 = output.data() + pos * outputChannelCount_};
318 const int32_t err = (downmixApi->process)(handle_, &inbuffer, &outbuffer);
319 ASSERT_EQ(0, err);
320 pos += transfer;
321 }
322 }
323
324 effect_handle_t handle_{};
325 effect_config_t config_{};
326 int outputChannelCount_{};
327 int inputChannelCount_{};
328 };
329
TEST(DownmixTestSimple,invalidChannelMask)330 TEST(DownmixTestSimple, invalidChannelMask) {
331 // Fill in a dummy test method to use DownmixTest outside of a parameterized test.
332 class DownmixTestComplete : public DownmixTest {
333 void TestBody() override {}
334 } downmixtest;
335
336 constexpr auto INVALID_CHANNEL_MASK = audio_channel_mask_t(1 << 31);
337 downmixtest.testInvalidChannelMask(INVALID_CHANNEL_MASK);
338 }
339
TEST_P(DownmixTest,basic)340 TEST_P(DownmixTest, basic) {
341 testBalance(kSampleRates[std::get<0>(GetParam())],
342 kChannelPositionMasks[std::get<1>(GetParam())]);
343 }
344
345 INSTANTIATE_TEST_SUITE_P(
346 DownmixTestAll, DownmixTest,
347 ::testing::Combine(
348 ::testing::Range(0, (int)std::size(kSampleRates)),
349 ::testing::Range(0, (int)std::size(kChannelPositionMasks))
350 ),
__anon47b21dca0102(const testing::TestParamInfo<DownmixTest::ParamType>& info) 351 [](const testing::TestParamInfo<DownmixTest::ParamType>& info) {
352 const int index = std::get<1>(info.param);
353 const audio_channel_mask_t channelMask = kChannelPositionMasks[index];
354 const std::string name = std::string(audio_channel_out_mask_to_string(channelMask))
355 + "_" + std::to_string(std::get<0>(info.param)) + "_" + std::to_string(index);
356 return name;
357 });
358