1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * Tools for measuring latency and for detecting glitches.
19  * These classes are pure math and can be used with any audio system.
20  */
21 
22 #ifndef ANALYZER_LATENCY_ANALYZER_H
23 #define ANALYZER_LATENCY_ANALYZER_H
24 
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <iomanip>
29 #include <iostream>
30 #include <math.h>
31 #include <memory>
32 #include <sstream>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <vector>
37 
38 #include "PeakDetector.h"
39 #include "PseudoRandom.h"
40 #include "RandomPulseGenerator.h"
41 
42 // This is used when the code is in Oboe.
43 #ifndef ALOGD
44 #define ALOGD printf
45 #define ALOGE printf
46 #define ALOGW printf
47 #endif
48 
49 #define LOOPBACK_RESULT_TAG  "RESULT: "
50 
51 static constexpr int32_t kDefaultSampleRate = 48000;
52 static constexpr int32_t kMillisPerSecond   = 1000;
53 static constexpr int32_t kMaxLatencyMillis  = 700;  // arbitrary and generous
54 static constexpr double  kMinimumConfidence = 0.2;
55 
56 struct LatencyReport {
57     int32_t latencyInFrames = 0.0;
58     double confidence = 0.0;
59 
resetLatencyReport60     void reset() {
61         latencyInFrames = 0;
62         confidence = 0.0;
63     }
64 };
65 
66 // Calculate a normalized cross correlation.
calculateNormalizedCorrelation(const float * a,const float * b,int windowSize)67 static double calculateNormalizedCorrelation(const float *a,
68                                              const float *b,
69                                              int windowSize) {
70     double correlation = 0.0;
71     double sumProducts = 0.0;
72     double sumSquares = 0.0;
73 
74     // Correlate a against b.
75     for (int i = 0; i < windowSize; i++) {
76         float s1 = a[i];
77         float s2 = b[i];
78         // Use a normalized cross-correlation.
79         sumProducts += s1 * s2;
80         sumSquares += ((s1 * s1) + (s2 * s2));
81     }
82 
83     if (sumSquares >= 1.0e-9) {
84         correlation = 2.0 * sumProducts / sumSquares;
85     }
86     return correlation;
87 }
88 
calculateRootMeanSquare(float * data,int32_t numSamples)89 static double calculateRootMeanSquare(float *data, int32_t numSamples) {
90     double sum = 0.0;
91     for (int32_t i = 0; i < numSamples; i++) {
92         float sample = data[i];
93         sum += sample * sample;
94     }
95     return sqrt(sum / numSamples);
96 }
97 
98 /**
99  * Monophonic recording with processing.
100  */
101 class AudioRecording
102 {
103 public:
104 
allocate(int maxFrames)105     void allocate(int maxFrames) {
106         mData = std::make_unique<float[]>(maxFrames);
107         mMaxFrames = maxFrames;
108     }
109 
110     // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)111     int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
112         // stop at end of buffer
113         if ((mFrameCounter + numFrames) > mMaxFrames) {
114             numFrames = mMaxFrames - mFrameCounter;
115         }
116         for (int i = 0; i < numFrames; i++) {
117             mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
118         }
119         return numFrames;
120     }
121 
122     // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)123     int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
124         // stop at end of buffer
125         if ((mFrameCounter + numFrames) > mMaxFrames) {
126             numFrames = mMaxFrames - mFrameCounter;
127         }
128         for (int i = 0; i < numFrames; i++) {
129             mData[mFrameCounter++] = inputData[i * inputChannelCount];
130         }
131         return numFrames;
132     }
133 
134     // Write FLOAT data from the first channel.
write(float sample)135     int32_t write(float sample) {
136         // stop at end of buffer
137         if (mFrameCounter < mMaxFrames) {
138             mData[mFrameCounter++] = sample;
139             return 1;
140         }
141         return 0;
142     }
143 
clear()144     void clear() {
145         mFrameCounter = 0;
146     }
size()147     int32_t size() const {
148         return mFrameCounter;
149     }
150 
isFull()151     bool isFull() const {
152         return mFrameCounter >= mMaxFrames;
153     }
154 
getData()155     float *getData() const {
156         return mData.get();
157     }
158 
setSampleRate(int32_t sampleRate)159     void setSampleRate(int32_t sampleRate) {
160         mSampleRate = sampleRate;
161     }
162 
getSampleRate()163     int32_t getSampleRate() const {
164         return mSampleRate;
165     }
166 
167     /**
168      * Square the samples so they are all positive and so the peaks are emphasized.
169      */
square()170     void square() {
171         float *x = mData.get();
172         for (int i = 0; i < mFrameCounter; i++) {
173             x[i] *= x[i];
174         }
175     }
176 
177     /**
178      * Amplify a signal so that the peak matches the specified target.
179      *
180      * @param target final max value
181      * @return gain applied to signal
182      */
normalize(float target)183     float normalize(float target) {
184         float maxValue = 1.0e-9f;
185         for (int i = 0; i < mFrameCounter; i++) {
186             maxValue = std::max(maxValue, abs(mData[i]));
187         }
188         float gain = target / maxValue;
189         for (int i = 0; i < mFrameCounter; i++) {
190             mData[i] *= gain;
191         }
192         return gain;
193     }
194 
195 private:
196     std::unique_ptr<float[]> mData;
197     int32_t       mFrameCounter = 0;
198     int32_t       mMaxFrames = 0;
199     int32_t       mSampleRate = kDefaultSampleRate; // common default
200 };
201 
measureLatencyFromPulse(AudioRecording & recorded,AudioRecording & pulse,LatencyReport * report)202 static int measureLatencyFromPulse(AudioRecording &recorded,
203                                    AudioRecording &pulse,
204                                    LatencyReport *report) {
205 
206     report->latencyInFrames = 0;
207     report->confidence = 0.0;
208 
209     int numCorrelations = recorded.size() - pulse.size();
210     if (numCorrelations < 10) {
211         ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
212         return -1;
213     }
214     std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
215 
216     // Correlate pulse against the recorded data.
217     for (int i = 0; i < numCorrelations; i++) {
218         float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
219                                                                    &pulse.getData()[0],
220                                                                    pulse.size());
221         correlations[i] = correlation;
222     }
223 
224     // Find highest peak in correlation array.
225     float peakCorrelation = 0.0;
226     int peakIndex = -1;
227     for (int i = 0; i < numCorrelations; i++) {
228         float value = abs(correlations[i]);
229         if (value > peakCorrelation) {
230             peakCorrelation = value;
231             peakIndex = i;
232         }
233     }
234     if (peakIndex < 0) {
235         ALOGE("%s() no signal for correlation\n", __func__);
236         return -2;
237     }
238 
239     report->latencyInFrames = peakIndex;
240     report->confidence = peakCorrelation;
241 
242     return 0;
243 }
244 
245 // ====================================================================================
246 class LoopbackProcessor {
247 public:
248     virtual ~LoopbackProcessor() = default;
249 
250     enum result_code {
251         RESULT_OK = 0,
252         ERROR_NOISY = -99,
253         ERROR_VOLUME_TOO_LOW,
254         ERROR_VOLUME_TOO_HIGH,
255         ERROR_CONFIDENCE,
256         ERROR_INVALID_STATE,
257         ERROR_GLITCHES,
258         ERROR_NO_LOCK
259     };
260 
prepareToTest()261     virtual void prepareToTest() {
262         reset();
263     }
264 
reset()265     virtual void reset() {
266         mResult = 0;
267         mResetCount++;
268     }
269 
270     virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
271     virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
272 
process(float * inputData,int inputChannelCount,int numInputFrames,float * outputData,int outputChannelCount,int numOutputFrames)273     void process(float *inputData, int inputChannelCount, int numInputFrames,
274                  float *outputData, int outputChannelCount, int numOutputFrames) {
275         int numBoth = std::min(numInputFrames, numOutputFrames);
276         // Process one frame at a time.
277         for (int i = 0; i < numBoth; i++) {
278             processInputFrame(inputData, inputChannelCount);
279             inputData += inputChannelCount;
280             processOutputFrame(outputData, outputChannelCount);
281             outputData += outputChannelCount;
282         }
283         // If there is more input than output.
284         for (int i = numBoth; i < numInputFrames; i++) {
285             processInputFrame(inputData, inputChannelCount);
286             inputData += inputChannelCount;
287         }
288         // If there is more output than input.
289         for (int i = numBoth; i < numOutputFrames; i++) {
290             processOutputFrame(outputData, outputChannelCount);
291             outputData += outputChannelCount;
292         }
293     }
294 
295     virtual std::string analyze() = 0;
296 
printStatus()297     virtual void printStatus() {};
298 
getResult()299     int32_t getResult() {
300         return mResult;
301     }
302 
setResult(int32_t result)303     void setResult(int32_t result) {
304         mResult = result;
305     }
306 
isDone()307     virtual bool isDone() {
308         return false;
309     }
310 
save(const char * fileName)311     virtual int save(const char *fileName) {
312         (void) fileName;
313         return -1;
314     }
315 
load(const char * fileName)316     virtual int load(const char *fileName) {
317         (void) fileName;
318         return -1;
319     }
320 
setSampleRate(int32_t sampleRate)321     virtual void setSampleRate(int32_t sampleRate) {
322         mSampleRate = sampleRate;
323     }
324 
getSampleRate()325     int32_t getSampleRate() const {
326         return mSampleRate;
327     }
328 
getResetCount()329     int32_t getResetCount() const {
330         return mResetCount;
331     }
332 
333     /** Called when not enough input frames could be read after synchronization.
334      */
onInsufficientRead()335     virtual void onInsufficientRead() {
336         reset();
337     }
338 
339 protected:
340     int32_t   mResetCount = 0;
341 
342 private:
343     int32_t mSampleRate = kDefaultSampleRate;
344     int32_t mResult = 0;
345 };
346 
347 class LatencyAnalyzer : public LoopbackProcessor {
348 public:
349 
LatencyAnalyzer()350     LatencyAnalyzer() : LoopbackProcessor() {}
351     virtual ~LatencyAnalyzer() = default;
352 
353     virtual int32_t getProgress() const = 0;
354 
355     virtual int getState() = 0;
356 
357     // @return latency in frames
358     virtual int32_t getMeasuredLatency() = 0;
359 
360     virtual double getMeasuredConfidence() = 0;
361 
362     virtual double getBackgroundRMS() = 0;
363 
364     virtual double getSignalRMS() = 0;
365 
366 };
367 
368 // ====================================================================================
369 /**
370  * Measure latency given a loopback stream data.
371  * Use an encoded bit train as the sound source because it
372  * has an unambiguous correlation value.
373  * Uses a state machine to cycle through various stages.
374  *
375  */
376 class PulseLatencyAnalyzer : public LatencyAnalyzer {
377 public:
378 
PulseLatencyAnalyzer()379     PulseLatencyAnalyzer() : LatencyAnalyzer() {
380         int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
381         int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
382                 / (kFramesPerEncodedBit * kMillisPerSecond);
383         int32_t  pulseLength = numPulseBits * kFramesPerEncodedBit;
384         mFramesToRecord = pulseLength + maxLatencyFrames;
385         mAudioRecording.allocate(mFramesToRecord);
386         mAudioRecording.setSampleRate(getSampleRate());
387         generateRandomPulse(pulseLength);
388     }
389 
generateRandomPulse(int32_t pulseLength)390     void generateRandomPulse(int32_t pulseLength) {
391         mPulse.allocate(pulseLength);
392         RandomPulseGenerator pulser(kFramesPerEncodedBit);
393         for (int i = 0; i < pulseLength; i++) {
394             mPulse.write(pulser.nextFloat());
395         }
396     }
397 
getState()398     int getState() override {
399         return mState;
400     }
401 
setSampleRate(int32_t sampleRate)402     void setSampleRate(int32_t sampleRate) override {
403         LoopbackProcessor::setSampleRate(sampleRate);
404         mAudioRecording.setSampleRate(sampleRate);
405     }
406 
reset()407     void reset() override {
408         LoopbackProcessor::reset();
409         mDownCounter = getSampleRate() / 2;
410         mLoopCounter = 0;
411 
412         mPulseCursor = 0;
413         mBackgroundSumSquare = 0.0f;
414         mBackgroundSumCount = 0;
415         mBackgroundRMS = 0.0f;
416         mSignalRMS = 0.0f;
417 
418         mState = STATE_MEASURE_BACKGROUND;
419         mAudioRecording.clear();
420         mLatencyReport.reset();
421     }
422 
hasEnoughData()423     bool hasEnoughData() {
424         return mAudioRecording.isFull();
425     }
426 
isDone()427     bool isDone() override {
428         return mState == STATE_DONE;
429     }
430 
getProgress()431     int32_t getProgress() const override {
432         return mAudioRecording.size();
433     }
434 
analyze()435     std::string analyze() override {
436         std::stringstream report;
437         report << "PulseLatencyAnalyzer ---------------\n";
438         report << LOOPBACK_RESULT_TAG "test.state             = "
439                 << std::setw(8) << mState << "\n";
440         report << LOOPBACK_RESULT_TAG "test.state.name        = "
441                 << convertStateToText(mState) << "\n";
442         report << LOOPBACK_RESULT_TAG "background.rms         = "
443                 << std::setw(8) << mBackgroundRMS << "\n";
444 
445         int32_t newResult = RESULT_OK;
446         if (mState != STATE_GOT_DATA) {
447             report << "WARNING - Bad state. Check volume on device.\n";
448             // setResult(ERROR_INVALID_STATE);
449         } else {
450             float gain = mAudioRecording.normalize(1.0f);
451             measureLatencyFromPulse(mAudioRecording,
452                                     mPulse,
453                                     &mLatencyReport);
454 
455             if (mLatencyReport.confidence < kMinimumConfidence) {
456                 report << "   ERROR - confidence too low!";
457                 newResult = ERROR_CONFIDENCE;
458             } else {
459                 mSignalRMS = calculateRootMeanSquare(
460                         &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
461                                 / gain;
462             }
463             double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
464                                    / getSampleRate();
465             report << LOOPBACK_RESULT_TAG "latency.frames         = " << std::setw(8)
466                    << mLatencyReport.latencyInFrames << "\n";
467             report << LOOPBACK_RESULT_TAG "latency.msec           = " << std::setw(8)
468                    << latencyMillis << "\n";
469             report << LOOPBACK_RESULT_TAG "latency.confidence     = " << std::setw(8)
470                    << mLatencyReport.confidence << "\n";
471         }
472         mState = STATE_DONE;
473         if (getResult() == RESULT_OK) {
474             setResult(newResult);
475         }
476 
477         return report.str();
478     }
479 
getMeasuredLatency()480     int32_t getMeasuredLatency() override {
481         return mLatencyReport.latencyInFrames;
482     }
483 
getMeasuredConfidence()484     double getMeasuredConfidence() override {
485         return mLatencyReport.confidence;
486     }
487 
getBackgroundRMS()488     double getBackgroundRMS() override {
489         return mBackgroundRMS;
490     }
491 
getSignalRMS()492     double getSignalRMS() override {
493         return mSignalRMS;
494     }
495 
printStatus()496     void printStatus() override {
497         ALOGD("st = %d", mState);
498     }
499 
processInputFrame(float * frameData,int channelCount)500     result_code processInputFrame(float *frameData, int channelCount) override {
501         echo_state nextState = mState;
502         mLoopCounter++;
503 
504         switch (mState) {
505             case STATE_MEASURE_BACKGROUND:
506                 // Measure background RMS on channel 0
507                 mBackgroundSumSquare += frameData[0] * frameData[0];
508                 mBackgroundSumCount++;
509                 mDownCounter--;
510                 if (mDownCounter <= 0) {
511                     mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
512                     nextState = STATE_IN_PULSE;
513                     mPulseCursor = 0;
514                 }
515                 break;
516 
517             case STATE_IN_PULSE:
518                 // Record input until the mAudioRecording is full.
519                 mAudioRecording.write(frameData, channelCount, 1);
520                 if (hasEnoughData()) {
521                     nextState = STATE_GOT_DATA;
522                 }
523                 break;
524 
525             case STATE_GOT_DATA:
526             case STATE_DONE:
527             default:
528                 break;
529         }
530 
531         mState = nextState;
532         return RESULT_OK;
533     }
534 
processOutputFrame(float * frameData,int channelCount)535     result_code processOutputFrame(float *frameData, int channelCount) override {
536         switch (mState) {
537             case STATE_IN_PULSE:
538                 if (mPulseCursor < mPulse.size()) {
539                     float pulseSample = mPulse.getData()[mPulseCursor++];
540                     for (int i = 0; i < channelCount; i++) {
541                         frameData[i] = pulseSample;
542                     }
543                 } else {
544                     for (int i = 0; i < channelCount; i++) {
545                         frameData[i] = 0;
546                     }
547                 }
548                 break;
549 
550             case STATE_MEASURE_BACKGROUND:
551             case STATE_GOT_DATA:
552             case STATE_DONE:
553             default:
554                 for (int i = 0; i < channelCount; i++) {
555                     frameData[i] = 0.0f; // silence
556                 }
557                 break;
558         }
559 
560         return RESULT_OK;
561     }
562 
563 private:
564 
565     enum echo_state {
566         STATE_MEASURE_BACKGROUND,
567         STATE_IN_PULSE,
568         STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
569         STATE_DONE,
570     };
571 
convertStateToText(echo_state state)572     const char *convertStateToText(echo_state state) {
573         switch (state) {
574             case STATE_MEASURE_BACKGROUND:
575                 return "INIT";
576             case STATE_IN_PULSE:
577                 return "PULSE";
578             case STATE_GOT_DATA:
579                 return "GOT_DATA";
580             case STATE_DONE:
581                 return "DONE";
582         }
583         return "UNKNOWN";
584     }
585 
586     int32_t         mDownCounter = 500;
587     int32_t         mLoopCounter = 0;
588     echo_state      mState = STATE_MEASURE_BACKGROUND;
589 
590     static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
591     static constexpr int32_t kPulseLengthMillis = 500;
592 
593     AudioRecording     mPulse;
594     int32_t            mPulseCursor = 0;
595 
596     double             mBackgroundSumSquare = 0.0;
597     int32_t            mBackgroundSumCount = 0;
598     double             mBackgroundRMS = 0.0;
599     double             mSignalRMS = 0.0;
600     int32_t            mFramesToRecord = 0;
601 
602     AudioRecording     mAudioRecording; // contains only the input after starting the pulse
603     LatencyReport      mLatencyReport;
604 };
605 
606 #endif // ANALYZER_LATENCY_ANALYZER_H
607