1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /**
18 * Tools for measuring latency and for detecting glitches.
19 * These classes are pure math and can be used with any audio system.
20 */
21
22 #ifndef ANALYZER_LATENCY_ANALYZER_H
23 #define ANALYZER_LATENCY_ANALYZER_H
24
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <iomanip>
29 #include <iostream>
30 #include <math.h>
31 #include <memory>
32 #include <sstream>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <vector>
37
38 #include "PeakDetector.h"
39 #include "PseudoRandom.h"
40 #include "RandomPulseGenerator.h"
41
42
43 #define LOOPBACK_RESULT_TAG "RESULT: "
44
45 static constexpr int32_t kDefaultSampleRate = 48000;
46 static constexpr int32_t kMillisPerSecond = 1000;
47 static constexpr int32_t kMaxLatencyMillis = 700; // arbitrary and generous
48 static constexpr double kMinimumConfidence = 0.2;
49
50 struct LatencyReport {
51 int32_t latencyInFrames = 0.0;
52 double confidence = 0.0;
53
resetLatencyReport54 void reset() {
55 latencyInFrames = 0;
56 confidence = 0.0;
57 }
58 };
59
60 // Calculate a normalized cross correlation.
calculateNormalizedCorrelation(const float * a,const float * b,int windowSize)61 static double calculateNormalizedCorrelation(const float *a,
62 const float *b,
63 int windowSize) {
64 double correlation = 0.0;
65 double sumProducts = 0.0;
66 double sumSquares = 0.0;
67
68 // Correlate a against b.
69 for (int i = 0; i < windowSize; i++) {
70 float s1 = a[i];
71 float s2 = b[i];
72 // Use a normalized cross-correlation.
73 sumProducts += s1 * s2;
74 sumSquares += ((s1 * s1) + (s2 * s2));
75 }
76
77 if (sumSquares >= 1.0e-9) {
78 correlation = 2.0 * sumProducts / sumSquares;
79 }
80 return correlation;
81 }
82
calculateRootMeanSquare(float * data,int32_t numSamples)83 static double calculateRootMeanSquare(float *data, int32_t numSamples) {
84 double sum = 0.0;
85 for (int32_t i = 0; i < numSamples; i++) {
86 float sample = data[i];
87 sum += sample * sample;
88 }
89 return sqrt(sum / numSamples);
90 }
91
92 /**
93 * Monophonic recording with processing.
94 */
95 class AudioRecording
96 {
97 public:
98
allocate(int maxFrames)99 void allocate(int maxFrames) {
100 mData = std::make_unique<float[]>(maxFrames);
101 mMaxFrames = maxFrames;
102 }
103
104 // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)105 int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
106 // stop at end of buffer
107 if ((mFrameCounter + numFrames) > mMaxFrames) {
108 numFrames = mMaxFrames - mFrameCounter;
109 }
110 for (int i = 0; i < numFrames; i++) {
111 mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
112 }
113 return numFrames;
114 }
115
116 // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)117 int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
118 // stop at end of buffer
119 if ((mFrameCounter + numFrames) > mMaxFrames) {
120 numFrames = mMaxFrames - mFrameCounter;
121 }
122 for (int i = 0; i < numFrames; i++) {
123 mData[mFrameCounter++] = inputData[i * inputChannelCount];
124 }
125 return numFrames;
126 }
127
128 // Write FLOAT data from the first channel.
write(float sample)129 int32_t write(float sample) {
130 // stop at end of buffer
131 if (mFrameCounter < mMaxFrames) {
132 mData[mFrameCounter++] = sample;
133 return 1;
134 }
135 return 0;
136 }
137
clear()138 void clear() {
139 mFrameCounter = 0;
140 }
size()141 int32_t size() const {
142 return mFrameCounter;
143 }
144
isFull()145 bool isFull() const {
146 return mFrameCounter >= mMaxFrames;
147 }
148
getData()149 float *getData() const {
150 return mData.get();
151 }
152
setSampleRate(int32_t sampleRate)153 void setSampleRate(int32_t sampleRate) {
154 mSampleRate = sampleRate;
155 }
156
getSampleRate()157 int32_t getSampleRate() const {
158 return mSampleRate;
159 }
160
161 /**
162 * Square the samples so they are all positive and so the peaks are emphasized.
163 */
square()164 void square() {
165 float *x = mData.get();
166 for (int i = 0; i < mFrameCounter; i++) {
167 x[i] *= x[i];
168 }
169 }
170
171 /**
172 * Amplify a signal so that the peak matches the specified target.
173 *
174 * @param target final max value
175 * @return gain applied to signal
176 */
normalize(float target)177 float normalize(float target) {
178 float maxValue = 1.0e-9f;
179 for (int i = 0; i < mFrameCounter; i++) {
180 maxValue = std::max(maxValue, abs(mData[i]));
181 }
182 float gain = target / maxValue;
183 for (int i = 0; i < mFrameCounter; i++) {
184 mData[i] *= gain;
185 }
186 return gain;
187 }
188
189 private:
190 std::unique_ptr<float[]> mData;
191 int32_t mFrameCounter = 0;
192 int32_t mMaxFrames = 0;
193 int32_t mSampleRate = kDefaultSampleRate; // common default
194 };
195
measureLatencyFromPulse(AudioRecording & recorded,AudioRecording & pulse,LatencyReport * report)196 static int measureLatencyFromPulse(AudioRecording &recorded,
197 AudioRecording &pulse,
198 LatencyReport *report) {
199
200 report->latencyInFrames = 0;
201 report->confidence = 0.0;
202
203 int numCorrelations = recorded.size() - pulse.size();
204 if (numCorrelations < 10) {
205 ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
206 return -1;
207 }
208 std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
209
210 // Correlate pulse against the recorded data.
211 for (int i = 0; i < numCorrelations; i++) {
212 float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
213 &pulse.getData()[0],
214 pulse.size());
215 correlations[i] = correlation;
216 }
217
218 // Find highest peak in correlation array.
219 float peakCorrelation = 0.0;
220 int peakIndex = -1;
221 for (int i = 0; i < numCorrelations; i++) {
222 float value = abs(correlations[i]);
223 if (value > peakCorrelation) {
224 peakCorrelation = value;
225 peakIndex = i;
226 }
227 }
228 if (peakIndex < 0) {
229 ALOGE("%s() no signal for correlation\n", __func__);
230 return -2;
231 }
232
233 report->latencyInFrames = peakIndex;
234 report->confidence = peakCorrelation;
235
236 return 0;
237 }
238
239 // ====================================================================================
240 class LoopbackProcessor {
241 public:
242 virtual ~LoopbackProcessor() = default;
243
244 enum result_code {
245 RESULT_OK = 0,
246 ERROR_NOISY = -99,
247 ERROR_VOLUME_TOO_LOW,
248 ERROR_VOLUME_TOO_HIGH,
249 ERROR_CONFIDENCE,
250 ERROR_INVALID_STATE,
251 ERROR_GLITCHES,
252 ERROR_NO_LOCK
253 };
254
prepareToTest()255 virtual void prepareToTest() {
256 reset();
257 }
258
reset()259 virtual void reset() {
260 mResult = 0;
261 mResetCount++;
262 }
263
264 virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
265 virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
266
process(float * inputData,int inputChannelCount,int numInputFrames,float * outputData,int outputChannelCount,int numOutputFrames)267 void process(float *inputData, int inputChannelCount, int numInputFrames,
268 float *outputData, int outputChannelCount, int numOutputFrames) {
269 int numBoth = std::min(numInputFrames, numOutputFrames);
270 // Process one frame at a time.
271 for (int i = 0; i < numBoth; i++) {
272 processInputFrame(inputData, inputChannelCount);
273 inputData += inputChannelCount;
274 processOutputFrame(outputData, outputChannelCount);
275 outputData += outputChannelCount;
276 }
277 // If there is more input than output.
278 for (int i = numBoth; i < numInputFrames; i++) {
279 processInputFrame(inputData, inputChannelCount);
280 inputData += inputChannelCount;
281 }
282 // If there is more output than input.
283 for (int i = numBoth; i < numOutputFrames; i++) {
284 processOutputFrame(outputData, outputChannelCount);
285 outputData += outputChannelCount;
286 }
287 }
288
289 virtual std::string analyze() = 0;
290
printStatus()291 virtual void printStatus() {};
292
getResult()293 int32_t getResult() {
294 return mResult;
295 }
296
setResult(int32_t result)297 void setResult(int32_t result) {
298 mResult = result;
299 }
300
isDone()301 virtual bool isDone() {
302 return false;
303 }
304
save(const char * fileName)305 virtual int save(const char *fileName) {
306 (void) fileName;
307 return -1;
308 }
309
load(const char * fileName)310 virtual int load(const char *fileName) {
311 (void) fileName;
312 return -1;
313 }
314
setSampleRate(int32_t sampleRate)315 virtual void setSampleRate(int32_t sampleRate) {
316 mSampleRate = sampleRate;
317 }
318
getSampleRate()319 int32_t getSampleRate() const {
320 return mSampleRate;
321 }
322
getResetCount()323 int32_t getResetCount() const {
324 return mResetCount;
325 }
326
327 /** Called when not enough input frames could be read after synchronization.
328 */
onInsufficientRead()329 virtual void onInsufficientRead() {
330 reset();
331 }
332
333 protected:
334 int32_t mResetCount = 0;
335
336 private:
337 int32_t mSampleRate = kDefaultSampleRate;
338 int32_t mResult = 0;
339 };
340
341 class LatencyAnalyzer : public LoopbackProcessor {
342 public:
343
LatencyAnalyzer()344 LatencyAnalyzer() : LoopbackProcessor() {}
345 virtual ~LatencyAnalyzer() = default;
346
347 virtual int32_t getProgress() const = 0;
348
349 virtual int getState() = 0;
350
351 // @return latency in frames
352 virtual int32_t getMeasuredLatency() = 0;
353
354 virtual double getMeasuredConfidence() = 0;
355
356 virtual double getBackgroundRMS() = 0;
357
358 virtual double getSignalRMS() = 0;
359
360 };
361
362 // ====================================================================================
363 /**
364 * Measure latency given a loopback stream data.
365 * Use an encoded bit train as the sound source because it
366 * has an unambiguous correlation value.
367 * Uses a state machine to cycle through various stages.
368 *
369 */
370 class PulseLatencyAnalyzer : public LatencyAnalyzer {
371 public:
372
PulseLatencyAnalyzer()373 PulseLatencyAnalyzer() : LatencyAnalyzer() {
374 int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
375 int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
376 / (kFramesPerEncodedBit * kMillisPerSecond);
377 int32_t pulseLength = numPulseBits * kFramesPerEncodedBit;
378 mFramesToRecord = pulseLength + maxLatencyFrames;
379 mAudioRecording.allocate(mFramesToRecord);
380 mAudioRecording.setSampleRate(getSampleRate());
381 generateRandomPulse(pulseLength);
382 }
383
generateRandomPulse(int32_t pulseLength)384 void generateRandomPulse(int32_t pulseLength) {
385 mPulse.allocate(pulseLength);
386 RandomPulseGenerator pulser(kFramesPerEncodedBit);
387 for (int i = 0; i < pulseLength; i++) {
388 mPulse.write(pulser.nextFloat());
389 }
390 }
391
getState()392 int getState() override {
393 return mState;
394 }
395
setSampleRate(int32_t sampleRate)396 void setSampleRate(int32_t sampleRate) override {
397 LoopbackProcessor::setSampleRate(sampleRate);
398 mAudioRecording.setSampleRate(sampleRate);
399 }
400
reset()401 void reset() override {
402 LoopbackProcessor::reset();
403 mDownCounter = getSampleRate() / 2;
404 mLoopCounter = 0;
405
406 mPulseCursor = 0;
407 mBackgroundSumSquare = 0.0f;
408 mBackgroundSumCount = 0;
409 mBackgroundRMS = 0.0f;
410 mSignalRMS = 0.0f;
411
412 mState = STATE_MEASURE_BACKGROUND;
413 mAudioRecording.clear();
414 mLatencyReport.reset();
415 }
416
hasEnoughData()417 bool hasEnoughData() {
418 return mAudioRecording.isFull();
419 }
420
isDone()421 bool isDone() override {
422 return mState == STATE_DONE;
423 }
424
getProgress()425 int32_t getProgress() const override {
426 return mAudioRecording.size();
427 }
428
analyze()429 std::string analyze() override {
430 std::stringstream report;
431 report << "PulseLatencyAnalyzer ---------------\n";
432 report << LOOPBACK_RESULT_TAG "test.state = "
433 << std::setw(8) << mState << "\n";
434 report << LOOPBACK_RESULT_TAG "test.state.name = "
435 << convertStateToText(mState) << "\n";
436 report << LOOPBACK_RESULT_TAG "background.rms = "
437 << std::setw(8) << mBackgroundRMS << "\n";
438
439 int32_t newResult = RESULT_OK;
440 if (mState != STATE_GOT_DATA) {
441 report << "WARNING - Bad state. Check volume on device.\n";
442 // setResult(ERROR_INVALID_STATE);
443 } else {
444 float gain = mAudioRecording.normalize(1.0f);
445 measureLatencyFromPulse(mAudioRecording,
446 mPulse,
447 &mLatencyReport);
448
449 if (mLatencyReport.confidence < kMinimumConfidence) {
450 report << " ERROR - confidence too low!";
451 newResult = ERROR_CONFIDENCE;
452 } else {
453 mSignalRMS = calculateRootMeanSquare(
454 &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
455 / gain;
456 }
457 double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
458 / getSampleRate();
459 report << LOOPBACK_RESULT_TAG "latency.frames = " << std::setw(8)
460 << mLatencyReport.latencyInFrames << "\n";
461 report << LOOPBACK_RESULT_TAG "latency.msec = " << std::setw(8)
462 << latencyMillis << "\n";
463 report << LOOPBACK_RESULT_TAG "latency.confidence = " << std::setw(8)
464 << mLatencyReport.confidence << "\n";
465 }
466 mState = STATE_DONE;
467 if (getResult() == RESULT_OK) {
468 setResult(newResult);
469 }
470
471 return report.str();
472 }
473
getMeasuredLatency()474 int32_t getMeasuredLatency() override {
475 return mLatencyReport.latencyInFrames;
476 }
477
getMeasuredConfidence()478 double getMeasuredConfidence() override {
479 return mLatencyReport.confidence;
480 }
481
getBackgroundRMS()482 double getBackgroundRMS() override {
483 return mBackgroundRMS;
484 }
485
getSignalRMS()486 double getSignalRMS() override {
487 return mSignalRMS;
488 }
489
isRecordingComplete()490 bool isRecordingComplete() {
491 return mState == STATE_GOT_DATA;
492 }
493
printStatus()494 void printStatus() override {
495 ALOGD("latency: st = %d = %s", mState, convertStateToText(mState));
496 }
497
processInputFrame(float * frameData,int channelCount)498 result_code processInputFrame(float *frameData, int channelCount) override {
499 echo_state nextState = mState;
500 mLoopCounter++;
501
502 switch (mState) {
503 case STATE_MEASURE_BACKGROUND:
504 // Measure background RMS on channel 0
505 mBackgroundSumSquare += frameData[0] * frameData[0];
506 mBackgroundSumCount++;
507 mDownCounter--;
508 if (mDownCounter <= 0) {
509 mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
510 nextState = STATE_IN_PULSE;
511 mPulseCursor = 0;
512 }
513 break;
514
515 case STATE_IN_PULSE:
516 // Record input until the mAudioRecording is full.
517 mAudioRecording.write(frameData, channelCount, 1);
518 if (hasEnoughData()) {
519 nextState = STATE_GOT_DATA;
520 }
521 break;
522
523 case STATE_GOT_DATA:
524 case STATE_DONE:
525 default:
526 break;
527 }
528
529 mState = nextState;
530 return RESULT_OK;
531 }
532
processOutputFrame(float * frameData,int channelCount)533 result_code processOutputFrame(float *frameData, int channelCount) override {
534 switch (mState) {
535 case STATE_IN_PULSE:
536 if (mPulseCursor < mPulse.size()) {
537 float pulseSample = mPulse.getData()[mPulseCursor++];
538 for (int i = 0; i < channelCount; i++) {
539 frameData[i] = pulseSample;
540 }
541 } else {
542 for (int i = 0; i < channelCount; i++) {
543 frameData[i] = 0;
544 }
545 }
546 break;
547
548 case STATE_MEASURE_BACKGROUND:
549 case STATE_GOT_DATA:
550 case STATE_DONE:
551 default:
552 for (int i = 0; i < channelCount; i++) {
553 frameData[i] = 0.0f; // silence
554 }
555 break;
556 }
557
558 return RESULT_OK;
559 }
560
561 private:
562
563 enum echo_state {
564 STATE_MEASURE_BACKGROUND,
565 STATE_IN_PULSE,
566 STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
567 STATE_DONE,
568 };
569
convertStateToText(echo_state state)570 const char *convertStateToText(echo_state state) {
571 switch (state) {
572 case STATE_MEASURE_BACKGROUND:
573 return "INIT";
574 case STATE_IN_PULSE:
575 return "PULSE";
576 case STATE_GOT_DATA:
577 return "GOT_DATA";
578 case STATE_DONE:
579 return "DONE";
580 }
581 return "UNKNOWN";
582 }
583
584 int32_t mDownCounter = 500;
585 int32_t mLoopCounter = 0;
586 echo_state mState = STATE_MEASURE_BACKGROUND;
587
588 static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
589 static constexpr int32_t kPulseLengthMillis = 500;
590
591 AudioRecording mPulse;
592 int32_t mPulseCursor = 0;
593
594 double mBackgroundSumSquare = 0.0;
595 int32_t mBackgroundSumCount = 0;
596 double mBackgroundRMS = 0.0;
597 double mSignalRMS = 0.0;
598 int32_t mFramesToRecord = 0;
599
600 AudioRecording mAudioRecording; // contains only the input after starting the pulse
601 LatencyReport mLatencyReport;
602 };
603
604 #endif // ANALYZER_LATENCY_ANALYZER_H
605