1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * Tools for measuring latency and for detecting glitches.
19  * These classes are pure math and can be used with any audio system.
20  */
21 
22 #ifndef ANALYZER_LATENCY_ANALYZER_H
23 #define ANALYZER_LATENCY_ANALYZER_H
24 
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <iomanip>
29 #include <iostream>
30 #include <math.h>
31 #include <memory>
32 #include <sstream>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <vector>
37 
38 #include "PeakDetector.h"
39 #include "PseudoRandom.h"
40 #include "RandomPulseGenerator.h"
41 
42 // This is used when the code is in Oboe.
43 #ifndef ALOGD
44 #define ALOGD LOGD
45 #define ALOGE LOGE
46 #define ALOGW LOGW
47 #endif
48 
49 #define LOOPBACK_RESULT_TAG  "RESULT: "
50 
51 static constexpr int32_t kDefaultSampleRate = 48000;
52 static constexpr int32_t kMillisPerSecond   = 1000;  // by definition
53 static constexpr int32_t kMaxLatencyMillis  = 1000;  // arbitrary and generous
54 static constexpr double  kMinimumConfidence = 0.2;
55 
56 struct LatencyReport {
57     int32_t latencyInFrames = 0.0;
58     double confidence = 0.0;
59 
resetLatencyReport60     void reset() {
61         latencyInFrames = 0;
62         confidence = 0.0;
63     }
64 };
65 
66 // Calculate a normalized cross correlation.
calculateNormalizedCorrelation(const float * a,const float * b,int windowSize)67 static double calculateNormalizedCorrelation(const float *a,
68                                              const float *b,
69                                              int windowSize) {
70     double correlation = 0.0;
71     double sumProducts = 0.0;
72     double sumSquares = 0.0;
73 
74     // Correlate a against b.
75     for (int i = 0; i < windowSize; i++) {
76         float s1 = a[i];
77         float s2 = b[i];
78         // Use a normalized cross-correlation.
79         sumProducts += s1 * s2;
80         sumSquares += ((s1 * s1) + (s2 * s2));
81     }
82 
83     if (sumSquares >= 1.0e-9) {
84         correlation = 2.0 * sumProducts / sumSquares;
85     }
86     return correlation;
87 }
88 
calculateRootMeanSquare(float * data,int32_t numSamples)89 static double calculateRootMeanSquare(float *data, int32_t numSamples) {
90     double sum = 0.0;
91     for (int32_t i = 0; i < numSamples; i++) {
92         float sample = data[i];
93         sum += sample * sample;
94     }
95     return sqrt(sum / numSamples);
96 }
97 
98 /**
99  * Monophonic recording with processing.
100  */
101 class AudioRecording
102 {
103 public:
104 
allocate(int maxFrames)105     void allocate(int maxFrames) {
106         mData = std::make_unique<float[]>(maxFrames);
107         mMaxFrames = maxFrames;
108     }
109 
110     // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)111     int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
112         // stop at end of buffer
113         if ((mFrameCounter + numFrames) > mMaxFrames) {
114             numFrames = mMaxFrames - mFrameCounter;
115         }
116         for (int i = 0; i < numFrames; i++) {
117             mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
118         }
119         return numFrames;
120     }
121 
122     // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)123     int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
124         // stop at end of buffer
125         if ((mFrameCounter + numFrames) > mMaxFrames) {
126             numFrames = mMaxFrames - mFrameCounter;
127         }
128         for (int i = 0; i < numFrames; i++) {
129             mData[mFrameCounter++] = inputData[i * inputChannelCount];
130         }
131         return numFrames;
132     }
133 
134     // Write FLOAT data from the first channel.
write(float sample)135     int32_t write(float sample) {
136         // stop at end of buffer
137         if (mFrameCounter < mMaxFrames) {
138             mData[mFrameCounter++] = sample;
139             return 1;
140         }
141         return 0;
142     }
143 
clear()144     void clear() {
145         mFrameCounter = 0;
146     }
size()147     int32_t size() const {
148         return mFrameCounter;
149     }
150 
isFull()151     bool isFull() const {
152         return mFrameCounter >= mMaxFrames;
153     }
154 
getData()155     float *getData() const {
156         return mData.get();
157     }
158 
setSampleRate(int32_t sampleRate)159     void setSampleRate(int32_t sampleRate) {
160         mSampleRate = sampleRate;
161     }
162 
getSampleRate()163     int32_t getSampleRate() const {
164         return mSampleRate;
165     }
166 
167     /**
168      * Square the samples so they are all positive and so the peaks are emphasized.
169      */
square()170     void square() {
171         float *x = mData.get();
172         for (int i = 0; i < mFrameCounter; i++) {
173             x[i] *= x[i];
174         }
175     }
176 
177     /**
178      * Amplify a signal so that the peak matches the specified target.
179      *
180      * @param target final max value
181      * @return gain applied to signal
182      */
normalize(float target)183     float normalize(float target) {
184         float maxValue = 1.0e-9f;
185         for (int i = 0; i < mFrameCounter; i++) {
186             maxValue = std::max(maxValue, abs(mData[i]));
187         }
188         float gain = target / maxValue;
189         for (int i = 0; i < mFrameCounter; i++) {
190             mData[i] *= gain;
191         }
192         return gain;
193     }
194 
195 private:
196     std::unique_ptr<float[]> mData;
197     int32_t       mFrameCounter = 0;
198     int32_t       mMaxFrames = 0;
199     int32_t       mSampleRate = kDefaultSampleRate; // common default
200 };
201 
measureLatencyFromPulse(AudioRecording & recorded,AudioRecording & pulse,LatencyReport * report)202 static int measureLatencyFromPulse(AudioRecording &recorded,
203                                    AudioRecording &pulse,
204                                    LatencyReport *report) {
205 
206     report->latencyInFrames = 0;
207     report->confidence = 0.0;
208 
209     int numCorrelations = recorded.size() - pulse.size();
210     if (numCorrelations < 10) {
211         ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
212         return -1;
213     }
214     std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
215 
216     // Correlate pulse against the recorded data.
217     for (int i = 0; i < numCorrelations; i++) {
218         float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
219                                                                    &pulse.getData()[0],
220                                                                    pulse.size());
221         correlations[i] = correlation;
222     }
223 
224     // Find highest peak in correlation array.
225     float peakCorrelation = 0.0;
226     int peakIndex = -1;
227     for (int i = 0; i < numCorrelations; i++) {
228         float value = abs(correlations[i]);
229         if (value > peakCorrelation) {
230             peakCorrelation = value;
231             peakIndex = i;
232         }
233     }
234     if (peakIndex < 0) {
235         ALOGE("%s() no signal for correlation\n", __func__);
236         return -2;
237     }
238 #if 0
239     // Dump correlation data for charting.
240     else {
241         const int margin = 50;
242         int startIndex = std::max(0, peakIndex - margin);
243         int endIndex = std::min(numCorrelations - 1, peakIndex + margin);
244         for (int index = startIndex; index < endIndex; index++) {
245             ALOGD("Correlation, %d, %f", index, correlations[index]);
246         }
247     }
248 #endif
249 
250     report->latencyInFrames = peakIndex;
251     report->confidence = peakCorrelation;
252 
253     return 0;
254 }
255 
256 // ====================================================================================
257 class LoopbackProcessor {
258 public:
259     virtual ~LoopbackProcessor() = default;
260 
261     enum result_code {
262         RESULT_OK = 0,
263         ERROR_NOISY = -99,
264         ERROR_VOLUME_TOO_LOW,
265         ERROR_VOLUME_TOO_HIGH,
266         ERROR_CONFIDENCE,
267         ERROR_INVALID_STATE,
268         ERROR_GLITCHES,
269         ERROR_NO_LOCK
270     };
271 
prepareToTest()272     virtual void prepareToTest() {
273         reset();
274     }
275 
reset()276     virtual void reset() {
277         mResult = 0;
278         mResetCount++;
279     }
280 
281     virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
282     virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
283 
process(float * inputData,int inputChannelCount,int numInputFrames,float * outputData,int outputChannelCount,int numOutputFrames)284     void process(float *inputData, int inputChannelCount, int numInputFrames,
285                  float *outputData, int outputChannelCount, int numOutputFrames) {
286         int numBoth = std::min(numInputFrames, numOutputFrames);
287         // Process one frame at a time.
288         for (int i = 0; i < numBoth; i++) {
289             processInputFrame(inputData, inputChannelCount);
290             inputData += inputChannelCount;
291             processOutputFrame(outputData, outputChannelCount);
292             outputData += outputChannelCount;
293         }
294         // If there is more input than output.
295         for (int i = numBoth; i < numInputFrames; i++) {
296             processInputFrame(inputData, inputChannelCount);
297             inputData += inputChannelCount;
298         }
299         // If there is more output than input.
300         for (int i = numBoth; i < numOutputFrames; i++) {
301             processOutputFrame(outputData, outputChannelCount);
302             outputData += outputChannelCount;
303         }
304     }
305 
306     virtual std::string analyze() = 0;
307 
printStatus()308     virtual void printStatus() {};
309 
getResult()310     int32_t getResult() {
311         return mResult;
312     }
313 
setResult(int32_t result)314     void setResult(int32_t result) {
315         mResult = result;
316     }
317 
isDone()318     virtual bool isDone() {
319         return false;
320     }
321 
save(const char * fileName)322     virtual int save(const char *fileName) {
323         (void) fileName;
324         return -1;
325     }
326 
load(const char * fileName)327     virtual int load(const char *fileName) {
328         (void) fileName;
329         return -1;
330     }
331 
setSampleRate(int32_t sampleRate)332     virtual void setSampleRate(int32_t sampleRate) {
333         mSampleRate = sampleRate;
334     }
335 
getSampleRate()336     int32_t getSampleRate() const {
337         return mSampleRate;
338     }
339 
getResetCount()340     int32_t getResetCount() const {
341         return mResetCount;
342     }
343 
344     /** Called when not enough input frames could be read after synchronization.
345      */
onInsufficientRead()346     virtual void onInsufficientRead() {
347         reset();
348     }
349 
350 protected:
351     int32_t   mResetCount = 0;
352 
353 private:
354     int32_t mSampleRate = kDefaultSampleRate;
355     int32_t mResult = 0;
356 };
357 
358 class LatencyAnalyzer : public LoopbackProcessor {
359 public:
360 
LatencyAnalyzer()361     LatencyAnalyzer() : LoopbackProcessor() {}
362     virtual ~LatencyAnalyzer() = default;
363 
364     virtual int32_t getProgress() const = 0;
365 
366     virtual int getState() = 0;
367 
368     // @return latency in frames
369     virtual int32_t getMeasuredLatency() = 0;
370 
371     virtual double getMeasuredConfidence() = 0;
372 
373     virtual double getBackgroundRMS() = 0;
374 
375     virtual double getSignalRMS() = 0;
376 
377 };
378 
379 // ====================================================================================
380 /**
381  * Measure latency given a loopback stream data.
382  * Use an encoded bit train as the sound source because it
383  * has an unambiguous correlation value.
384  * Uses a state machine to cycle through various stages.
385  *
386  */
387 class PulseLatencyAnalyzer : public LatencyAnalyzer {
388 public:
389 
PulseLatencyAnalyzer()390     PulseLatencyAnalyzer() : LatencyAnalyzer() {
391         int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
392         int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
393                 / (kFramesPerEncodedBit * kMillisPerSecond);
394         int32_t  pulseLength = numPulseBits * kFramesPerEncodedBit;
395         mFramesToRecord = pulseLength + maxLatencyFrames;
396         mAudioRecording.allocate(mFramesToRecord);
397         mAudioRecording.setSampleRate(getSampleRate());
398         generateRandomPulse(pulseLength);
399     }
400 
generateRandomPulse(int32_t pulseLength)401     void generateRandomPulse(int32_t pulseLength) {
402         mPulse.allocate(pulseLength);
403         RandomPulseGenerator pulser(kFramesPerEncodedBit);
404         for (int i = 0; i < pulseLength; i++) {
405             mPulse.write(pulser.nextFloat());
406         }
407     }
408 
getState()409     int getState() override {
410         return mState;
411     }
412 
setSampleRate(int32_t sampleRate)413     void setSampleRate(int32_t sampleRate) override {
414         LoopbackProcessor::setSampleRate(sampleRate);
415         mAudioRecording.setSampleRate(sampleRate);
416     }
417 
reset()418     void reset() override {
419         LoopbackProcessor::reset();
420         mState = STATE_MEASURE_BACKGROUND;
421         mDownCounter = (int32_t) (getSampleRate() * kBackgroundMeasurementLengthSeconds);
422         mLoopCounter = 0;
423 
424         mPulseCursor = 0;
425         mBackgroundSumSquare = 0.0f;
426         mBackgroundSumCount = 0;
427         mBackgroundRMS = 0.0f;
428         mSignalRMS = 0.0f;
429 
430         mAudioRecording.clear();
431         mLatencyReport.reset();
432     }
433 
hasEnoughData()434     bool hasEnoughData() {
435         return mAudioRecording.isFull();
436     }
437 
isDone()438     bool isDone() override {
439         return mState == STATE_DONE;
440     }
441 
getProgress()442     int32_t getProgress() const override {
443         return mAudioRecording.size();
444     }
445 
analyze()446     std::string analyze() override {
447         std::stringstream report;
448         report << "PulseLatencyAnalyzer ---------------\n";
449         report << LOOPBACK_RESULT_TAG "test.state             = "
450                 << std::setw(8) << mState << "\n";
451         report << LOOPBACK_RESULT_TAG "test.state.name        = "
452                 << convertStateToText(mState) << "\n";
453         report << LOOPBACK_RESULT_TAG "background.rms         = "
454                 << std::setw(8) << mBackgroundRMS << "\n";
455 
456         int32_t newResult = RESULT_OK;
457         if (mState != STATE_GOT_DATA) {
458             report << "WARNING - Bad state. Check volume on device.\n";
459             // setResult(ERROR_INVALID_STATE);
460         } else {
461             float gain = mAudioRecording.normalize(1.0f);
462             measureLatencyFromPulse(mAudioRecording,
463                                     mPulse,
464                                     &mLatencyReport);
465 
466             if (mLatencyReport.confidence < kMinimumConfidence) {
467                 report << "   ERROR - confidence too low!";
468                 newResult = ERROR_CONFIDENCE;
469             } else {
470                 mSignalRMS = calculateRootMeanSquare(
471                         &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
472                                 / gain;
473             }
474             double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
475                                    / getSampleRate();
476             report << LOOPBACK_RESULT_TAG "latency.frames         = " << std::setw(8)
477                    << mLatencyReport.latencyInFrames << "\n";
478             report << LOOPBACK_RESULT_TAG "latency.msec           = " << std::setw(8)
479                    << latencyMillis << "\n";
480             report << LOOPBACK_RESULT_TAG "latency.confidence     = " << std::setw(8)
481                    << mLatencyReport.confidence << "\n";
482         }
483         mState = STATE_DONE;
484         if (getResult() == RESULT_OK) {
485             setResult(newResult);
486         }
487 
488         return report.str();
489     }
490 
getMeasuredLatency()491     int32_t getMeasuredLatency() override {
492         return mLatencyReport.latencyInFrames;
493     }
494 
getMeasuredConfidence()495     double getMeasuredConfidence() override {
496         return mLatencyReport.confidence;
497     }
498 
getBackgroundRMS()499     double getBackgroundRMS() override {
500         return mBackgroundRMS;
501     }
502 
getSignalRMS()503     double getSignalRMS() override {
504         return mSignalRMS;
505     }
506 
isRecordingComplete()507     bool isRecordingComplete() {
508         return mState == STATE_GOT_DATA;
509     }
510 
printStatus()511     void printStatus() override {
512         ALOGD("latency: st = %d = %s", mState, convertStateToText(mState));
513     }
514 
processInputFrame(float * frameData,int channelCount)515     result_code processInputFrame(float *frameData, int channelCount) override {
516         echo_state nextState = mState;
517         mLoopCounter++;
518 
519         switch (mState) {
520             case STATE_MEASURE_BACKGROUND:
521                 // Measure background RMS on channel 0
522                 mBackgroundSumSquare += frameData[0] * frameData[0];
523                 mBackgroundSumCount++;
524                 mDownCounter--;
525                 if (mDownCounter <= 0) {
526                     mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
527                     nextState = STATE_IN_PULSE;
528                     mPulseCursor = 0;
529                 }
530                 break;
531 
532             case STATE_IN_PULSE:
533                 // Record input until the mAudioRecording is full.
534                 mAudioRecording.write(frameData, channelCount, 1);
535                 if (hasEnoughData()) {
536                     nextState = STATE_GOT_DATA;
537                 }
538                 break;
539 
540             case STATE_GOT_DATA:
541             case STATE_DONE:
542             default:
543                 break;
544         }
545 
546         mState = nextState;
547         return RESULT_OK;
548     }
549 
processOutputFrame(float * frameData,int channelCount)550     result_code processOutputFrame(float *frameData, int channelCount) override {
551         switch (mState) {
552             case STATE_IN_PULSE:
553                 if (mPulseCursor < mPulse.size()) {
554                     float pulseSample = mPulse.getData()[mPulseCursor++];
555                     for (int i = 0; i < channelCount; i++) {
556                         frameData[i] = pulseSample;
557                     }
558                 } else {
559                     for (int i = 0; i < channelCount; i++) {
560                         frameData[i] = 0;
561                     }
562                 }
563                 break;
564 
565             case STATE_MEASURE_BACKGROUND:
566             case STATE_GOT_DATA:
567             case STATE_DONE:
568             default:
569                 for (int i = 0; i < channelCount; i++) {
570                     frameData[i] = 0.0f; // silence
571                 }
572                 break;
573         }
574 
575         return RESULT_OK;
576     }
577 
578 private:
579 
580     enum echo_state {
581         STATE_MEASURE_BACKGROUND,
582         STATE_IN_PULSE,
583         STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
584         STATE_DONE,
585     };
586 
convertStateToText(echo_state state)587     const char *convertStateToText(echo_state state) {
588         switch (state) {
589             case STATE_MEASURE_BACKGROUND:
590                 return "INIT";
591             case STATE_IN_PULSE:
592                 return "PULSE";
593             case STATE_GOT_DATA:
594                 return "GOT_DATA";
595             case STATE_DONE:
596                 return "DONE";
597         }
598         return "UNKNOWN";
599     }
600 
601     int32_t         mDownCounter = 500;
602     int32_t         mLoopCounter = 0;
603     echo_state      mState = STATE_MEASURE_BACKGROUND;
604 
605     static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
606     static constexpr int32_t kPulseLengthMillis = 500;
607     static constexpr double  kBackgroundMeasurementLengthSeconds = 0.5;
608 
609     AudioRecording     mPulse;
610     int32_t            mPulseCursor = 0;
611 
612     double             mBackgroundSumSquare = 0.0;
613     int32_t            mBackgroundSumCount = 0;
614     double             mBackgroundRMS = 0.0;
615     double             mSignalRMS = 0.0;
616     int32_t            mFramesToRecord = 0;
617 
618     AudioRecording     mAudioRecording; // contains only the input after starting the pulse
619     LatencyReport      mLatencyReport;
620 };
621 
622 #endif // ANALYZER_LATENCY_ANALYZER_H
623