1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <vector>
18 
19 #include "EffectDownmix.h"
20 
21 #include <audio_utils/channels.h>
22 #include <audio_utils/primitives.h>
23 #include <audio_utils/Statistics.h>
24 #include <gtest/gtest.h>
25 #include <log/log.h>
26 
27 extern audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM;
28 static constexpr audio_channel_mask_t kChannelPositionMasks[] = {
29     AUDIO_CHANNEL_OUT_FRONT_LEFT, // Legacy: the downmix effect treats MONO as FRONT_LEFT only.
30                                   // The AudioMixer interprets MONO as a special case requiring
31                                   // channel replication, bypassing the downmix effect.
32     AUDIO_CHANNEL_OUT_FRONT_CENTER,
33     AUDIO_CHANNEL_OUT_STEREO,
34     AUDIO_CHANNEL_OUT_2POINT1,
35     AUDIO_CHANNEL_OUT_2POINT0POINT2,
36     AUDIO_CHANNEL_OUT_QUAD, // AUDIO_CHANNEL_OUT_QUAD_BACK
37     AUDIO_CHANNEL_OUT_QUAD_SIDE,
38     AUDIO_CHANNEL_OUT_SURROUND,
39     AUDIO_CHANNEL_OUT_2POINT1POINT2,
40     AUDIO_CHANNEL_OUT_3POINT0POINT2,
41     AUDIO_CHANNEL_OUT_PENTA,
42     AUDIO_CHANNEL_OUT_3POINT1POINT2,
43     AUDIO_CHANNEL_OUT_5POINT1, // AUDIO_CHANNEL_OUT_5POINT1_BACK
44     AUDIO_CHANNEL_OUT_5POINT1_SIDE,
45     AUDIO_CHANNEL_OUT_6POINT1,
46     AUDIO_CHANNEL_OUT_5POINT1POINT2,
47     AUDIO_CHANNEL_OUT_7POINT1,
48     AUDIO_CHANNEL_OUT_5POINT1POINT4,
49     AUDIO_CHANNEL_OUT_7POINT1POINT2,
50     AUDIO_CHANNEL_OUT_7POINT1POINT4,
51     AUDIO_CHANNEL_OUT_13POINT_360RA,
52     AUDIO_CHANNEL_OUT_22POINT2,
53     audio_channel_mask_t(AUDIO_CHANNEL_OUT_22POINT2
54             | AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT | AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT),
55 };
56 
57 constexpr float COEF_25 = 0.2508909536f;
58 constexpr float COEF_35 = 0.3543928915f;
59 constexpr float COEF_36 = 0.3552343859f;
60 constexpr float COEF_61 = 0.6057043428f;
61 
62 constexpr inline float kScaleFromChannelIdxLeft[] = {
63     1.f,       // AUDIO_CHANNEL_OUT_FRONT_LEFT            = 0x1u,
64     0.f,       // AUDIO_CHANNEL_OUT_FRONT_RIGHT           = 0x2u,
65     M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_CENTER          = 0x4u,
66     0.5f,      // AUDIO_CHANNEL_OUT_LOW_FREQUENCY         = 0x8u,
67     M_SQRT1_2, // AUDIO_CHANNEL_OUT_BACK_LEFT             = 0x10u,
68     0.f,       // AUDIO_CHANNEL_OUT_BACK_RIGHT            = 0x20u,
69     COEF_61,   // AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER  = 0x40u,
70     COEF_25,   // AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER = 0x80u,
71     0.5f,      // AUDIO_CHANNEL_OUT_BACK_CENTER           = 0x100u,
72     M_SQRT1_2, // AUDIO_CHANNEL_OUT_SIDE_LEFT             = 0x200u,
73     0.f,       // AUDIO_CHANNEL_OUT_SIDE_RIGHT            = 0x400u,
74     COEF_36,   // AUDIO_CHANNEL_OUT_TOP_CENTER            = 0x800u,
75     1.f,       // AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT        = 0x1000u,
76     M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER      = 0x2000u,
77     0.f,       // AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT       = 0x4000u,
78     M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_BACK_LEFT         = 0x8000u,
79     COEF_35,   // AUDIO_CHANNEL_OUT_TOP_BACK_CENTER       = 0x10000u,
80     0.f,       // AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT        = 0x20000u,
81     COEF_61,   // AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT         = 0x40000u,
82     0.f,       // AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT        = 0x80000u,
83     1.f,       // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_LEFT     = 0x100000u,
84     M_SQRT1_2, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_CENTER   = 0x200000u,
85     0.f, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_RIGHT    = 0x400000u,
86     0.f, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2       = 0x800000u,
87     M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT       = 0x1000000u,
88     0.f,       // AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT      = 0x2000000u,
89 };
90 
91 constexpr inline float kScaleFromChannelIdxRight[] = {
92     0.f,       // AUDIO_CHANNEL_OUT_FRONT_LEFT            = 0x1u,
93     1.f,       // AUDIO_CHANNEL_OUT_FRONT_RIGHT           = 0x2u,
94     M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_CENTER          = 0x4u,
95     0.5f,      // AUDIO_CHANNEL_OUT_LOW_FREQUENCY         = 0x8u,
96     0.f,       // AUDIO_CHANNEL_OUT_BACK_LEFT             = 0x10u,
97     M_SQRT1_2, // AUDIO_CHANNEL_OUT_BACK_RIGHT            = 0x20u,
98     COEF_25,   // AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER  = 0x40u,
99     COEF_61,   // AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER = 0x80u,
100     0.5f,      // AUDIO_CHANNEL_OUT_BACK_CENTER           = 0x100u,
101     0.f,       // AUDIO_CHANNEL_OUT_SIDE_LEFT             = 0x200u,
102     M_SQRT1_2, // AUDIO_CHANNEL_OUT_SIDE_RIGHT            = 0x400u,
103     COEF_36,   // AUDIO_CHANNEL_OUT_TOP_CENTER            = 0x800u,
104     0.f,       // AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT        = 0x1000u,
105     M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER      = 0x2000u,
106     1.f,       // AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT       = 0x4000u,
107     0.f,       // AUDIO_CHANNEL_OUT_TOP_BACK_LEFT         = 0x8000u,
108     COEF_35,   // AUDIO_CHANNEL_OUT_TOP_BACK_CENTER       = 0x10000u,
109     M_SQRT1_2, // AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT        = 0x20000u,
110     0.f,       // AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT         = 0x40000u,
111     COEF_61,   // AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT        = 0x80000u,
112     0.f,       // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_LEFT     = 0x100000u,
113     M_SQRT1_2, // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_CENTER   = 0x200000u,
114     1.f,       // AUDIO_CHANNEL_OUT_BOTTOM_FRONT_RIGHT    = 0x400000u,
115     M_SQRT1_2, // AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2       = 0x800000u,
116     0.f,       // AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT       = 0x1000000u,
117     M_SQRT1_2, // AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT      = 0x2000000u,
118 };
119 
120 // Downmix doesn't change with sample rate
121 static constexpr size_t kSampleRates[] = {
122     48000,
123 };
124 
125 // Our near expectation is 16x the bit that doesn't fit the mantissa.
126 // this works so long as we add values close in exponent with each other
127 // realizing that errors accumulate as the sqrt of N (random walk, lln, etc).
128 #define EXPECT_NEAR_EPSILON(e, v) EXPECT_NEAR((e), (v), \
129         abs((e) * std::numeric_limits<std::decay_t<decltype(e)>>::epsilon() * 8))
130 
131 template<typename T>
channelStatistics(const std::vector<T> & input,size_t channels)132 static auto channelStatistics(const std::vector<T>& input, size_t channels) {
133     std::vector<android::audio_utils::Statistics<T>> result(channels);
134     const size_t frames = input.size() / channels;
135     if (frames > 0) {
136         const float *fptr = input.data();
137         for (size_t i = 0; i < frames; ++i) {
138             for (size_t j = 0; j < channels; ++j) {
139                 result[j].add(*fptr++);
140             }
141         }
142     }
143     return result;
144 }
145 
146 using DownmixParam = std::tuple<int /* sample rate */,  int /* channel mask */>;
147 class DownmixTest : public ::testing::TestWithParam<DownmixParam> {
148 public:
149     static constexpr effect_uuid_t downmix_uuid_ = {
150         0x93f04452, 0xe4fe, 0x41cc, 0x91f9, {0xe4, 0x75, 0xb6, 0xd1, 0xd6, 0x9f}};
151     static constexpr size_t FRAME_LENGTH = 256;
152 
testBalance(int sampleRate,audio_channel_mask_t channelMask)153     void testBalance(int sampleRate, audio_channel_mask_t channelMask) {
154         using namespace ::android::audio_utils::channels;
155 
156         size_t frames = 100; // set to an even number (2, 4, 6 ... ) stream alternates +1, -1.
157         constexpr unsigned outChannels = 2;
158         unsigned inChannels = audio_channel_count_from_out_mask(channelMask);
159         std::vector<float> input(frames * inChannels);
160         std::vector<float> output(frames * outChannels);
161 
162         double savedPower[32][2]{};
163         for (unsigned i = 0, channel = channelMask; channel != 0; ++i) {
164             const int index = __builtin_ctz(channel);
165             ASSERT_LT(index, FCC_26);
166             const int pairIndex = pairIdxFromChannelIdx(index);
167             const AUDIO_GEOMETRY_SIDE side = sideFromChannelIdx(index);
168             const int channelBit = 1 << index;
169             channel &= ~channelBit;
170 
171             // Generate a +1, -1 alternating stream in one channel, which has variance 1.
172             auto indata = input.data();
173             for (unsigned j = 0; j < frames; ++j) {
174                 for (unsigned k = 0; k < inChannels; ++k) {
175                     *indata++ = (k == i) ? (j & 1 ? -1 : 1) : 0;
176                 }
177             }
178             run(sampleRate, channelMask, input, output, frames);
179 
180             auto stats = channelStatistics(output, 2 /* channels */);
181             // printf("power: %s %s\n", stats[0].toString().c_str(), stats[1].toString().c_str());
182             double power[2] = { stats[0].getPopVariance(), stats[1].getPopVariance() };
183 
184             // Check symmetric power for pair channels on exchange of left/right position.
185             // to do this, we save previous power measurements.
186             if (pairIndex >= 0 && pairIndex < index) {
187                 EXPECT_NEAR_EPSILON(power[0], savedPower[pairIndex][1]);
188                 EXPECT_NEAR_EPSILON(power[1], savedPower[pairIndex][0]);
189             }
190             savedPower[index][0] = power[0];
191             savedPower[index][1] = power[1];
192 
193             constexpr float POWER_TOLERANCE = 0.001;
194             const float expectedPower =
195                     kScaleFromChannelIdxLeft[index] * kScaleFromChannelIdxLeft[index]
196                     + kScaleFromChannelIdxRight[index] * kScaleFromChannelIdxRight[index];
197             EXPECT_NEAR(expectedPower, power[0] + power[1], POWER_TOLERANCE);
198             switch (side) {
199             case AUDIO_GEOMETRY_SIDE_LEFT:
200                 if (channelBit == AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER) {
201                     break;
202                 }
203                 EXPECT_EQ(0.f, power[1]);
204                 break;
205             case AUDIO_GEOMETRY_SIDE_RIGHT:
206                 if (channelBit == AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER) {
207                     break;
208                 }
209                 EXPECT_EQ(0.f, power[0]);
210                 break;
211             case AUDIO_GEOMETRY_SIDE_CENTER:
212                 if (channelBit == AUDIO_CHANNEL_OUT_LOW_FREQUENCY) {
213                     if (channelMask & AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2) {
214                         EXPECT_EQ(0.f, power[1]);
215                         break;
216                     } else {
217                         EXPECT_NEAR_EPSILON(power[0], power[1]); // always true
218                         EXPECT_NEAR(expectedPower, power[0] + power[1], POWER_TOLERANCE);
219                         break;
220                     }
221                 } else if (channelBit == AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2) {
222                     EXPECT_EQ(0.f, power[0]);
223                     EXPECT_NEAR(expectedPower, power[1], POWER_TOLERANCE);
224                     break;
225                 }
226                 EXPECT_NEAR_EPSILON(power[0], power[1]);
227                 break;
228             }
229         }
230     }
231 
run(int sampleRate,audio_channel_mask_t channelMask,std::vector<float> & input,std::vector<float> & output,size_t frames)232     void run(int sampleRate, audio_channel_mask_t channelMask,
233             std::vector<float>& input, std::vector<float>& output, size_t frames) {
234         reconfig(sampleRate, channelMask);
235 
236         ASSERT_EQ(frames * inputChannelCount_, input.size());
237         ASSERT_EQ(frames * outputChannelCount_, output.size());
238 
239         const int32_t sessionId = 0;
240         const int32_t ioId = 0;
241         int32_t err = AUDIO_EFFECT_LIBRARY_INFO_SYM.create_effect(
242                 &downmix_uuid_, sessionId, ioId,  &handle_);
243         ASSERT_EQ(0, err);
244 
245         const struct effect_interface_s * const downmixApi = *handle_;
246         int32_t reply = 0;
247         uint32_t replySize = (uint32_t)sizeof(reply);
248         err = (downmixApi->command)(
249                 handle_, EFFECT_CMD_SET_CONFIG,
250                 sizeof(effect_config_t), &config_, &replySize, &reply);
251         ASSERT_EQ(0, err);
252         ASSERT_EQ(0, reply);
253         err = (downmixApi->command)(
254                 handle_, EFFECT_CMD_ENABLE,
255                 0, nullptr, &replySize, &reply);
256         ASSERT_EQ(0, err);
257 
258         process(input, output, frames);
259         err = AUDIO_EFFECT_LIBRARY_INFO_SYM.release_effect(handle_);
260         ASSERT_EQ(0, err);
261     }
262 
263     // This test assumes the channel mask is invalid.
testInvalidChannelMask(audio_channel_mask_t invalidChannelMask)264     void testInvalidChannelMask(audio_channel_mask_t invalidChannelMask) {
265         reconfig(48000 /* sampleRate */, invalidChannelMask);
266         const int32_t sessionId = 0;
267         const int32_t ioId = 0;
268         int32_t err = AUDIO_EFFECT_LIBRARY_INFO_SYM.create_effect(
269                 &downmix_uuid_, sessionId, ioId,  &handle_);
270         ASSERT_EQ(0, err);
271 
272         const struct effect_interface_s * const downmixApi = *handle_;
273         int32_t reply = 0;
274         uint32_t replySize = (uint32_t)sizeof(reply);
275         err = (downmixApi->command)(
276                 handle_, EFFECT_CMD_SET_CONFIG,
277                 sizeof(effect_config_t), &config_, &replySize, &reply);
278         ASSERT_EQ(0, err);
279         ASSERT_NE(0, reply);  // error has occurred.
280         err = AUDIO_EFFECT_LIBRARY_INFO_SYM.release_effect(handle_);
281         ASSERT_EQ(0, err);
282     }
283 
284 private:
reconfig(int sampleRate,audio_channel_mask_t channelMask)285     void reconfig(int sampleRate, audio_channel_mask_t channelMask) {
286         config_.inputCfg.accessMode = EFFECT_BUFFER_ACCESS_READ;
287         config_.inputCfg.format = AUDIO_FORMAT_PCM_FLOAT;
288         config_.inputCfg.bufferProvider.getBuffer = nullptr;
289         config_.inputCfg.bufferProvider.releaseBuffer = nullptr;
290         config_.inputCfg.bufferProvider.cookie = nullptr;
291         config_.inputCfg.mask = EFFECT_CONFIG_ALL;
292 
293         config_.outputCfg.accessMode = EFFECT_BUFFER_ACCESS_WRITE;
294         config_.outputCfg.format = AUDIO_FORMAT_PCM_FLOAT;
295         config_.outputCfg.bufferProvider.getBuffer = nullptr;
296         config_.outputCfg.bufferProvider.releaseBuffer = nullptr;
297         config_.outputCfg.bufferProvider.cookie = nullptr;
298         config_.outputCfg.mask = EFFECT_CONFIG_ALL;
299 
300         config_.inputCfg.samplingRate = sampleRate;
301         config_.inputCfg.channels = channelMask;
302         inputChannelCount_ = audio_channel_count_from_out_mask(config_.inputCfg.channels);
303 
304         config_.outputCfg.samplingRate = sampleRate;
305         config_.outputCfg.channels = AUDIO_CHANNEL_OUT_STEREO; // output always stereo
306         outputChannelCount_ = audio_channel_count_from_out_mask(config_.outputCfg.channels);
307     }
308 
process(std::vector<float> & input,std::vector<float> & output,size_t frames) const309     void process(std::vector<float> &input, std::vector<float> &output, size_t frames) const {
310         const struct effect_interface_s * const downmixApi = *handle_;
311 
312         for (size_t pos = 0; pos < frames;) {
313             const size_t transfer = std::min(frames - pos, FRAME_LENGTH);
314             audio_buffer_t inbuffer{.frameCount = transfer,
315                 .f32 = input.data() + pos * inputChannelCount_};
316             audio_buffer_t outbuffer{.frameCount = transfer,
317                 .f32 = output.data() + pos * outputChannelCount_};
318             const int32_t err = (downmixApi->process)(handle_, &inbuffer, &outbuffer);
319             ASSERT_EQ(0, err);
320             pos += transfer;
321         }
322     }
323 
324     effect_handle_t handle_{};
325     effect_config_t config_{};
326     int outputChannelCount_{};
327     int inputChannelCount_{};
328 };
329 
TEST(DownmixTestSimple,invalidChannelMask)330 TEST(DownmixTestSimple, invalidChannelMask) {
331     // Fill in a dummy test method to use DownmixTest outside of a parameterized test.
332     class DownmixTestComplete : public DownmixTest {
333         void TestBody() override {}
334     } downmixtest;
335 
336     constexpr auto INVALID_CHANNEL_MASK = audio_channel_mask_t(1 << 31);
337     downmixtest.testInvalidChannelMask(INVALID_CHANNEL_MASK);
338 }
339 
TEST_P(DownmixTest,basic)340 TEST_P(DownmixTest, basic) {
341     testBalance(kSampleRates[std::get<0>(GetParam())],
342             kChannelPositionMasks[std::get<1>(GetParam())]);
343 }
344 
345 INSTANTIATE_TEST_SUITE_P(
346         DownmixTestAll, DownmixTest,
347         ::testing::Combine(
348                 ::testing::Range(0, (int)std::size(kSampleRates)),
349                 ::testing::Range(0, (int)std::size(kChannelPositionMasks))
350                 ),
__anon47b21dca0102(const testing::TestParamInfo<DownmixTest::ParamType>& info) 351         [](const testing::TestParamInfo<DownmixTest::ParamType>& info) {
352             const int index = std::get<1>(info.param);
353             const audio_channel_mask_t channelMask = kChannelPositionMasks[index];
354             const std::string name = std::string(audio_channel_out_mask_to_string(channelMask))
355                 + "_" + std::to_string(std::get<0>(info.param)) + "_" + std::to_string(index);
356             return name;
357         });
358