1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * Tools for measuring latency and for detecting glitches.
19  * These classes are pure math and can be used with any audio system.
20  */
21 
22 #ifndef ANALYZER_LATENCY_ANALYZER_H
23 #define ANALYZER_LATENCY_ANALYZER_H
24 
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <iomanip>
29 #include <iostream>
30 #include <math.h>
31 #include <memory>
32 #include <sstream>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <vector>
37 
38 #include "PeakDetector.h"
39 #include "PseudoRandom.h"
40 #include "RandomPulseGenerator.h"
41 
42 
43 #define LOOPBACK_RESULT_TAG  "RESULT: "
44 
45 static constexpr int32_t kDefaultSampleRate = 48000;
46 static constexpr int32_t kMillisPerSecond   = 1000;
47 static constexpr int32_t kMaxLatencyMillis  = 700;  // arbitrary and generous
48 static constexpr double  kMinimumConfidence = 0.2;
49 
50 struct LatencyReport {
51     int32_t latencyInFrames = 0.0;
52     double confidence = 0.0;
53 
resetLatencyReport54     void reset() {
55         latencyInFrames = 0;
56         confidence = 0.0;
57     }
58 };
59 
60 // Calculate a normalized cross correlation.
calculateNormalizedCorrelation(const float * a,const float * b,int windowSize)61 static double calculateNormalizedCorrelation(const float *a,
62                                              const float *b,
63                                              int windowSize) {
64     double correlation = 0.0;
65     double sumProducts = 0.0;
66     double sumSquares = 0.0;
67 
68     // Correlate a against b.
69     for (int i = 0; i < windowSize; i++) {
70         float s1 = a[i];
71         float s2 = b[i];
72         // Use a normalized cross-correlation.
73         sumProducts += s1 * s2;
74         sumSquares += ((s1 * s1) + (s2 * s2));
75     }
76 
77     if (sumSquares >= 1.0e-9) {
78         correlation = 2.0 * sumProducts / sumSquares;
79     }
80     return correlation;
81 }
82 
calculateRootMeanSquare(float * data,int32_t numSamples)83 static double calculateRootMeanSquare(float *data, int32_t numSamples) {
84     double sum = 0.0;
85     for (int32_t i = 0; i < numSamples; i++) {
86         float sample = data[i];
87         sum += sample * sample;
88     }
89     return sqrt(sum / numSamples);
90 }
91 
92 /**
93  * Monophonic recording with processing.
94  */
95 class AudioRecording
96 {
97 public:
98 
allocate(int maxFrames)99     void allocate(int maxFrames) {
100         mData = std::make_unique<float[]>(maxFrames);
101         mMaxFrames = maxFrames;
102     }
103 
104     // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)105     int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
106         // stop at end of buffer
107         if ((mFrameCounter + numFrames) > mMaxFrames) {
108             numFrames = mMaxFrames - mFrameCounter;
109         }
110         for (int i = 0; i < numFrames; i++) {
111             mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
112         }
113         return numFrames;
114     }
115 
116     // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)117     int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
118         // stop at end of buffer
119         if ((mFrameCounter + numFrames) > mMaxFrames) {
120             numFrames = mMaxFrames - mFrameCounter;
121         }
122         for (int i = 0; i < numFrames; i++) {
123             mData[mFrameCounter++] = inputData[i * inputChannelCount];
124         }
125         return numFrames;
126     }
127 
128     // Write FLOAT data from the first channel.
write(float sample)129     int32_t write(float sample) {
130         // stop at end of buffer
131         if (mFrameCounter < mMaxFrames) {
132             mData[mFrameCounter++] = sample;
133             return 1;
134         }
135         return 0;
136     }
137 
clear()138     void clear() {
139         mFrameCounter = 0;
140     }
size()141     int32_t size() const {
142         return mFrameCounter;
143     }
144 
isFull()145     bool isFull() const {
146         return mFrameCounter >= mMaxFrames;
147     }
148 
getData()149     float *getData() const {
150         return mData.get();
151     }
152 
setSampleRate(int32_t sampleRate)153     void setSampleRate(int32_t sampleRate) {
154         mSampleRate = sampleRate;
155     }
156 
getSampleRate()157     int32_t getSampleRate() const {
158         return mSampleRate;
159     }
160 
161     /**
162      * Square the samples so they are all positive and so the peaks are emphasized.
163      */
square()164     void square() {
165         float *x = mData.get();
166         for (int i = 0; i < mFrameCounter; i++) {
167             x[i] *= x[i];
168         }
169     }
170 
171     /**
172      * Amplify a signal so that the peak matches the specified target.
173      *
174      * @param target final max value
175      * @return gain applied to signal
176      */
normalize(float target)177     float normalize(float target) {
178         float maxValue = 1.0e-9f;
179         for (int i = 0; i < mFrameCounter; i++) {
180             maxValue = std::max(maxValue, abs(mData[i]));
181         }
182         float gain = target / maxValue;
183         for (int i = 0; i < mFrameCounter; i++) {
184             mData[i] *= gain;
185         }
186         return gain;
187     }
188 
189 private:
190     std::unique_ptr<float[]> mData;
191     int32_t       mFrameCounter = 0;
192     int32_t       mMaxFrames = 0;
193     int32_t       mSampleRate = kDefaultSampleRate; // common default
194 };
195 
measureLatencyFromPulse(AudioRecording & recorded,AudioRecording & pulse,LatencyReport * report)196 static int measureLatencyFromPulse(AudioRecording &recorded,
197                                    AudioRecording &pulse,
198                                    LatencyReport *report) {
199 
200     report->latencyInFrames = 0;
201     report->confidence = 0.0;
202 
203     int numCorrelations = recorded.size() - pulse.size();
204     if (numCorrelations < 10) {
205         ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
206         return -1;
207     }
208     std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
209 
210     // Correlate pulse against the recorded data.
211     for (int i = 0; i < numCorrelations; i++) {
212         float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
213                                                                    &pulse.getData()[0],
214                                                                    pulse.size());
215         correlations[i] = correlation;
216     }
217 
218     // Find highest peak in correlation array.
219     float peakCorrelation = 0.0;
220     int peakIndex = -1;
221     for (int i = 0; i < numCorrelations; i++) {
222         float value = abs(correlations[i]);
223         if (value > peakCorrelation) {
224             peakCorrelation = value;
225             peakIndex = i;
226         }
227     }
228     if (peakIndex < 0) {
229         ALOGE("%s() no signal for correlation\n", __func__);
230         return -2;
231     }
232 
233     report->latencyInFrames = peakIndex;
234     report->confidence = peakCorrelation;
235 
236     return 0;
237 }
238 
239 // ====================================================================================
240 class LoopbackProcessor {
241 public:
242     virtual ~LoopbackProcessor() = default;
243 
244     enum result_code {
245         RESULT_OK = 0,
246         ERROR_NOISY = -99,
247         ERROR_VOLUME_TOO_LOW,
248         ERROR_VOLUME_TOO_HIGH,
249         ERROR_CONFIDENCE,
250         ERROR_INVALID_STATE,
251         ERROR_GLITCHES,
252         ERROR_NO_LOCK
253     };
254 
prepareToTest()255     virtual void prepareToTest() {
256         reset();
257     }
258 
reset()259     virtual void reset() {
260         mResult = 0;
261         mResetCount++;
262     }
263 
264     virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
265     virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
266 
process(float * inputData,int inputChannelCount,int numInputFrames,float * outputData,int outputChannelCount,int numOutputFrames)267     void process(float *inputData, int inputChannelCount, int numInputFrames,
268                  float *outputData, int outputChannelCount, int numOutputFrames) {
269         int numBoth = std::min(numInputFrames, numOutputFrames);
270         // Process one frame at a time.
271         for (int i = 0; i < numBoth; i++) {
272             processInputFrame(inputData, inputChannelCount);
273             inputData += inputChannelCount;
274             processOutputFrame(outputData, outputChannelCount);
275             outputData += outputChannelCount;
276         }
277         // If there is more input than output.
278         for (int i = numBoth; i < numInputFrames; i++) {
279             processInputFrame(inputData, inputChannelCount);
280             inputData += inputChannelCount;
281         }
282         // If there is more output than input.
283         for (int i = numBoth; i < numOutputFrames; i++) {
284             processOutputFrame(outputData, outputChannelCount);
285             outputData += outputChannelCount;
286         }
287     }
288 
289     virtual std::string analyze() = 0;
290 
printStatus()291     virtual void printStatus() {};
292 
getResult()293     int32_t getResult() {
294         return mResult;
295     }
296 
setResult(int32_t result)297     void setResult(int32_t result) {
298         mResult = result;
299     }
300 
isDone()301     virtual bool isDone() {
302         return false;
303     }
304 
save(const char * fileName)305     virtual int save(const char *fileName) {
306         (void) fileName;
307         return -1;
308     }
309 
load(const char * fileName)310     virtual int load(const char *fileName) {
311         (void) fileName;
312         return -1;
313     }
314 
setSampleRate(int32_t sampleRate)315     virtual void setSampleRate(int32_t sampleRate) {
316         mSampleRate = sampleRate;
317     }
318 
getSampleRate()319     int32_t getSampleRate() const {
320         return mSampleRate;
321     }
322 
getResetCount()323     int32_t getResetCount() const {
324         return mResetCount;
325     }
326 
327     /** Called when not enough input frames could be read after synchronization.
328      */
onInsufficientRead()329     virtual void onInsufficientRead() {
330         reset();
331     }
332 
333 protected:
334     int32_t   mResetCount = 0;
335 
336 private:
337     int32_t mSampleRate = kDefaultSampleRate;
338     int32_t mResult = 0;
339 };
340 
341 class LatencyAnalyzer : public LoopbackProcessor {
342 public:
343 
LatencyAnalyzer()344     LatencyAnalyzer() : LoopbackProcessor() {}
345     virtual ~LatencyAnalyzer() = default;
346 
347     virtual int32_t getProgress() const = 0;
348 
349     virtual int getState() = 0;
350 
351     // @return latency in frames
352     virtual int32_t getMeasuredLatency() = 0;
353 
354     virtual double getMeasuredConfidence() = 0;
355 
356     virtual double getBackgroundRMS() = 0;
357 
358     virtual double getSignalRMS() = 0;
359 
360 };
361 
362 // ====================================================================================
363 /**
364  * Measure latency given a loopback stream data.
365  * Use an encoded bit train as the sound source because it
366  * has an unambiguous correlation value.
367  * Uses a state machine to cycle through various stages.
368  *
369  */
370 class PulseLatencyAnalyzer : public LatencyAnalyzer {
371 public:
372 
PulseLatencyAnalyzer()373     PulseLatencyAnalyzer() : LatencyAnalyzer() {
374         int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
375         int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
376                 / (kFramesPerEncodedBit * kMillisPerSecond);
377         int32_t  pulseLength = numPulseBits * kFramesPerEncodedBit;
378         mFramesToRecord = pulseLength + maxLatencyFrames;
379         mAudioRecording.allocate(mFramesToRecord);
380         mAudioRecording.setSampleRate(getSampleRate());
381         generateRandomPulse(pulseLength);
382     }
383 
generateRandomPulse(int32_t pulseLength)384     void generateRandomPulse(int32_t pulseLength) {
385         mPulse.allocate(pulseLength);
386         RandomPulseGenerator pulser(kFramesPerEncodedBit);
387         for (int i = 0; i < pulseLength; i++) {
388             mPulse.write(pulser.nextFloat());
389         }
390     }
391 
getState()392     int getState() override {
393         return mState;
394     }
395 
setSampleRate(int32_t sampleRate)396     void setSampleRate(int32_t sampleRate) override {
397         LoopbackProcessor::setSampleRate(sampleRate);
398         mAudioRecording.setSampleRate(sampleRate);
399     }
400 
reset()401     void reset() override {
402         LoopbackProcessor::reset();
403         mDownCounter = getSampleRate() / 2;
404         mLoopCounter = 0;
405 
406         mPulseCursor = 0;
407         mBackgroundSumSquare = 0.0f;
408         mBackgroundSumCount = 0;
409         mBackgroundRMS = 0.0f;
410         mSignalRMS = 0.0f;
411 
412         mState = STATE_MEASURE_BACKGROUND;
413         mAudioRecording.clear();
414         mLatencyReport.reset();
415     }
416 
hasEnoughData()417     bool hasEnoughData() {
418         return mAudioRecording.isFull();
419     }
420 
isDone()421     bool isDone() override {
422         return mState == STATE_DONE;
423     }
424 
getProgress()425     int32_t getProgress() const override {
426         return mAudioRecording.size();
427     }
428 
analyze()429     std::string analyze() override {
430         std::stringstream report;
431         report << "PulseLatencyAnalyzer ---------------\n";
432         report << LOOPBACK_RESULT_TAG "test.state             = "
433                 << std::setw(8) << mState << "\n";
434         report << LOOPBACK_RESULT_TAG "test.state.name        = "
435                 << convertStateToText(mState) << "\n";
436         report << LOOPBACK_RESULT_TAG "background.rms         = "
437                 << std::setw(8) << mBackgroundRMS << "\n";
438 
439         int32_t newResult = RESULT_OK;
440         if (mState != STATE_GOT_DATA) {
441             report << "WARNING - Bad state. Check volume on device.\n";
442             // setResult(ERROR_INVALID_STATE);
443         } else {
444             float gain = mAudioRecording.normalize(1.0f);
445             measureLatencyFromPulse(mAudioRecording,
446                                     mPulse,
447                                     &mLatencyReport);
448 
449             if (mLatencyReport.confidence < kMinimumConfidence) {
450                 report << "   ERROR - confidence too low!";
451                 newResult = ERROR_CONFIDENCE;
452             } else {
453                 mSignalRMS = calculateRootMeanSquare(
454                         &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
455                                 / gain;
456             }
457             double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
458                                    / getSampleRate();
459             report << LOOPBACK_RESULT_TAG "latency.frames         = " << std::setw(8)
460                    << mLatencyReport.latencyInFrames << "\n";
461             report << LOOPBACK_RESULT_TAG "latency.msec           = " << std::setw(8)
462                    << latencyMillis << "\n";
463             report << LOOPBACK_RESULT_TAG "latency.confidence     = " << std::setw(8)
464                    << mLatencyReport.confidence << "\n";
465         }
466         mState = STATE_DONE;
467         if (getResult() == RESULT_OK) {
468             setResult(newResult);
469         }
470 
471         return report.str();
472     }
473 
getMeasuredLatency()474     int32_t getMeasuredLatency() override {
475         return mLatencyReport.latencyInFrames;
476     }
477 
getMeasuredConfidence()478     double getMeasuredConfidence() override {
479         return mLatencyReport.confidence;
480     }
481 
getBackgroundRMS()482     double getBackgroundRMS() override {
483         return mBackgroundRMS;
484     }
485 
getSignalRMS()486     double getSignalRMS() override {
487         return mSignalRMS;
488     }
489 
isRecordingComplete()490     bool isRecordingComplete() {
491         return mState == STATE_GOT_DATA;
492     }
493 
printStatus()494     void printStatus() override {
495         ALOGD("latency: st = %d = %s", mState, convertStateToText(mState));
496     }
497 
processInputFrame(float * frameData,int channelCount)498     result_code processInputFrame(float *frameData, int channelCount) override {
499         echo_state nextState = mState;
500         mLoopCounter++;
501 
502         switch (mState) {
503             case STATE_MEASURE_BACKGROUND:
504                 // Measure background RMS on channel 0
505                 mBackgroundSumSquare += frameData[0] * frameData[0];
506                 mBackgroundSumCount++;
507                 mDownCounter--;
508                 if (mDownCounter <= 0) {
509                     mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
510                     nextState = STATE_IN_PULSE;
511                     mPulseCursor = 0;
512                 }
513                 break;
514 
515             case STATE_IN_PULSE:
516                 // Record input until the mAudioRecording is full.
517                 mAudioRecording.write(frameData, channelCount, 1);
518                 if (hasEnoughData()) {
519                     nextState = STATE_GOT_DATA;
520                 }
521                 break;
522 
523             case STATE_GOT_DATA:
524             case STATE_DONE:
525             default:
526                 break;
527         }
528 
529         mState = nextState;
530         return RESULT_OK;
531     }
532 
processOutputFrame(float * frameData,int channelCount)533     result_code processOutputFrame(float *frameData, int channelCount) override {
534         switch (mState) {
535             case STATE_IN_PULSE:
536                 if (mPulseCursor < mPulse.size()) {
537                     float pulseSample = mPulse.getData()[mPulseCursor++];
538                     for (int i = 0; i < channelCount; i++) {
539                         frameData[i] = pulseSample;
540                     }
541                 } else {
542                     for (int i = 0; i < channelCount; i++) {
543                         frameData[i] = 0;
544                     }
545                 }
546                 break;
547 
548             case STATE_MEASURE_BACKGROUND:
549             case STATE_GOT_DATA:
550             case STATE_DONE:
551             default:
552                 for (int i = 0; i < channelCount; i++) {
553                     frameData[i] = 0.0f; // silence
554                 }
555                 break;
556         }
557 
558         return RESULT_OK;
559     }
560 
561 private:
562 
563     enum echo_state {
564         STATE_MEASURE_BACKGROUND,
565         STATE_IN_PULSE,
566         STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
567         STATE_DONE,
568     };
569 
convertStateToText(echo_state state)570     const char *convertStateToText(echo_state state) {
571         switch (state) {
572             case STATE_MEASURE_BACKGROUND:
573                 return "INIT";
574             case STATE_IN_PULSE:
575                 return "PULSE";
576             case STATE_GOT_DATA:
577                 return "GOT_DATA";
578             case STATE_DONE:
579                 return "DONE";
580         }
581         return "UNKNOWN";
582     }
583 
584     int32_t         mDownCounter = 500;
585     int32_t         mLoopCounter = 0;
586     echo_state      mState = STATE_MEASURE_BACKGROUND;
587 
588     static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
589     static constexpr int32_t kPulseLengthMillis = 500;
590 
591     AudioRecording     mPulse;
592     int32_t            mPulseCursor = 0;
593 
594     double             mBackgroundSumSquare = 0.0;
595     int32_t            mBackgroundSumCount = 0;
596     double             mBackgroundRMS = 0.0;
597     double             mSignalRMS = 0.0;
598     int32_t            mFramesToRecord = 0;
599 
600     AudioRecording     mAudioRecording; // contains only the input after starting the pulse
601     LatencyReport      mLatencyReport;
602 };
603 
604 #endif // ANALYZER_LATENCY_ANALYZER_H
605