/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Tools for measuring latency and for detecting glitches. * These classes are pure math and can be used with any audio system. */ #ifndef ANALYZER_LATENCY_ANALYZER_H #define ANALYZER_LATENCY_ANALYZER_H #include #include #include #include #include #include #include #include #include #include #include #include #include "PeakDetector.h" #include "PseudoRandom.h" #include "RandomPulseGenerator.h" // This is used when the code is in not in Android. #ifndef ALOGD #define ALOGD LOGD #define ALOGE LOGE #define ALOGW LOGW #endif #define LOOPBACK_RESULT_TAG "RESULT: " static constexpr int32_t kDefaultSampleRate = 48000; static constexpr int32_t kMillisPerSecond = 1000; // by definition static constexpr int32_t kMaxLatencyMillis = 1000; // arbitrary and generous struct LatencyReport { int32_t latencyInFrames = 0.0; double correlation = 0.0; void reset() { latencyInFrames = 0; correlation = 0.0; } }; /** * Calculate a normalized cross correlation. * @return value between -1.0 and 1.0 */ static float calculateNormalizedCorrelation(const float *a, const float *b, int windowSize) { float correlation = 0.0; float sumProducts = 0.0; float sumSquares = 0.0; // Correlate a against b. for (int i = 0; i < windowSize; i++) { float s1 = a[i]; float s2 = b[i]; // Use a normalized cross-correlation. sumProducts += s1 * s2; sumSquares += ((s1 * s1) + (s2 * s2)); } if (sumSquares >= 1.0e-9) { correlation = 2.0 * sumProducts / sumSquares; } return correlation; } static double calculateRootMeanSquare(float *data, int32_t numSamples) { double sum = 0.0; for (int32_t i = 0; i < numSamples; i++) { double sample = data[i]; sum += sample * sample; } return sqrt(sum / numSamples); } /** * Monophonic recording with processing. * Samples are stored as floats internally. */ class AudioRecording { public: void allocate(int maxFrames) { mData = std::make_unique(maxFrames); mMaxFrames = maxFrames; mFrameCounter = 0; } // Write SHORT data from the first channel. int32_t write(const int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) { // stop at end of buffer if ((mFrameCounter + numFrames) > mMaxFrames) { numFrames = mMaxFrames - mFrameCounter; } for (int i = 0; i < numFrames; i++) { mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768); } return numFrames; } // Write FLOAT data from the first channel. int32_t write(const float *inputData, int32_t inputChannelCount, int32_t numFrames) { // stop at end of buffer if ((mFrameCounter + numFrames) > mMaxFrames) { numFrames = mMaxFrames - mFrameCounter; } for (int i = 0; i < numFrames; i++) { mData[mFrameCounter++] = inputData[i * inputChannelCount]; } return numFrames; } // Write single FLOAT value. int32_t write(float sample) { // stop at end of buffer if (mFrameCounter < mMaxFrames) { mData[mFrameCounter++] = sample; return 1; } return 0; } void clear() { mFrameCounter = 0; } int32_t size() const { return mFrameCounter; } bool isFull() const { return mFrameCounter >= mMaxFrames; } float *getData() const { return mData.get(); } void setSampleRate(int32_t sampleRate) { mSampleRate = sampleRate; } int32_t getSampleRate() const { return mSampleRate; } /** * Square the samples so they are all positive and so the peaks are emphasized. */ void square() { float *x = mData.get(); for (int i = 0; i < mFrameCounter; i++) { x[i] *= x[i]; } } // Envelope follower that rides over the peak values. void detectPeaks(float decay) { float level = 0.0f; float *x = mData.get(); for (int i = 0; i < mFrameCounter; i++) { level *= decay; // exponential decay float input = fabs(x[i]); // never fall below the input signal if (input > level) { level = input; } x[i] = level; // write result back into the array } } /** * Amplify a signal so that the peak matches the specified target. * * @param target final max value * @return gain applied to signal */ float normalize(float target) { float maxValue = 1.0e-9f; for (int i = 0; i < mFrameCounter; i++) { maxValue = std::max(maxValue, abs(mData[i])); } float gain = target / maxValue; for (int i = 0; i < mFrameCounter; i++) { mData[i] *= gain; } return gain; } private: std::unique_ptr mData; int32_t mFrameCounter = 0; int32_t mMaxFrames = 0; int32_t mSampleRate = kDefaultSampleRate; // common default }; static int measureLatencyFromPulse(AudioRecording &recorded, AudioRecording &pulse, LatencyReport *report) { report->reset(); int numCorrelations = recorded.size() - pulse.size(); if (numCorrelations < 10) { ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size()); return -1; } std::unique_ptr correlations= std::make_unique(numCorrelations); // Correlate pulse against the recorded data. for (int i = 0; i < numCorrelations; i++) { float correlation = calculateNormalizedCorrelation(&recorded.getData()[i], &pulse.getData()[0], pulse.size()); correlations[i] = correlation; } // Find highest peak in correlation array. float peakCorrelation = 0.0; int peakIndex = -1; for (int i = 0; i < numCorrelations; i++) { float value = abs(correlations[i]); if (value > peakCorrelation) { peakCorrelation = value; peakIndex = i; } } if (peakIndex < 0) { ALOGE("%s() no signal for correlation\n", __func__); return -2; } #if 0 // Dump correlation data for charting. else { const int margin = 50; int startIndex = std::max(0, peakIndex - margin); int endIndex = std::min(numCorrelations - 1, peakIndex + margin); for (int index = startIndex; index < endIndex; index++) { ALOGD("Correlation, %d, %f", index, correlations[index]); } } #endif report->latencyInFrames = peakIndex; report->correlation = peakCorrelation; return 0; } // ==================================================================================== class LoopbackProcessor { public: virtual ~LoopbackProcessor() = default; enum result_code { RESULT_OK = 0, ERROR_NOISY = -99, ERROR_VOLUME_TOO_LOW, ERROR_VOLUME_TOO_HIGH, ERROR_CONFIDENCE, ERROR_INVALID_STATE, ERROR_GLITCHES, ERROR_NO_LOCK }; virtual void prepareToTest() { reset(); } virtual void reset() { mResult = 0; mResetCount++; } virtual result_code processInputFrame(const float *frameData, int channelCount) = 0; virtual result_code processOutputFrame(float *frameData, int channelCount) = 0; void process(const float *inputData, int inputChannelCount, int numInputFrames, float *outputData, int outputChannelCount, int numOutputFrames) { int numBoth = std::min(numInputFrames, numOutputFrames); // Process one frame at a time. for (int i = 0; i < numBoth; i++) { processInputFrame(inputData, inputChannelCount); inputData += inputChannelCount; processOutputFrame(outputData, outputChannelCount); outputData += outputChannelCount; } // If there is more input than output. for (int i = numBoth; i < numInputFrames; i++) { processInputFrame(inputData, inputChannelCount); inputData += inputChannelCount; } // If there is more output than input. for (int i = numBoth; i < numOutputFrames; i++) { processOutputFrame(outputData, outputChannelCount); outputData += outputChannelCount; } } virtual std::string analyze() = 0; virtual void printStatus() {}; int32_t getResult() { return mResult; } void setResult(int32_t result) { mResult = result; } virtual bool isDone() { return false; } virtual int save(const char *fileName) { (void) fileName; return -1; } virtual int load(const char *fileName) { (void) fileName; return -1; } virtual void setSampleRate(int32_t sampleRate) { mSampleRate = sampleRate; } int32_t getSampleRate() const { return mSampleRate; } int32_t getResetCount() const { return mResetCount; } /** Called when not enough input frames could be read after synchronization. */ virtual void onInsufficientRead() { reset(); } protected: int32_t mResetCount = 0; private: int32_t mSampleRate = kDefaultSampleRate; int32_t mResult = 0; }; class LatencyAnalyzer : public LoopbackProcessor { public: LatencyAnalyzer() : LoopbackProcessor() {} virtual ~LatencyAnalyzer() = default; /** * Call this after the constructor because it calls other virtual methods. */ virtual void setup() = 0; virtual int32_t getProgress() const = 0; virtual int getState() const = 0; // @return latency in frames virtual int32_t getMeasuredLatency() const = 0; /** * This is an overall confidence in the latency result based on correlation, SNR, etc. * @return probability value between 0.0 and 1.0 */ double getMeasuredConfidence() const { // Limit the ratio and prevent divide-by-zero. double noiseSignalRatio = getSignalRMS() <= getBackgroundRMS() ? 1.0 : getBackgroundRMS() / getSignalRMS(); // Prevent high background noise and low signals from generating false matches. double adjustedConfidence = getMeasuredCorrelation() - noiseSignalRatio; return std::max(0.0, adjustedConfidence); } /** * Cross correlation value for the noise pulse against * the corresponding position in the normalized recording. * * @return value between -1.0 and 1.0 */ virtual double getMeasuredCorrelation() const = 0; virtual double getBackgroundRMS() const = 0; virtual double getSignalRMS() const = 0; virtual bool hasEnoughData() const = 0; }; // ==================================================================================== /** * Measure latency given a loopback stream data. * Use an encoded bit train as the sound source because it * has an unambiguous correlation value. * Uses a state machine to cycle through various stages. * */ class PulseLatencyAnalyzer : public LatencyAnalyzer { public: void setup() override { int32_t pulseLength = calculatePulseLength(); int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond; mFramesToRecord = pulseLength + maxLatencyFrames; mAudioRecording.allocate(mFramesToRecord); mAudioRecording.setSampleRate(getSampleRate()); } int getState() const override { return mState; } void setSampleRate(int32_t sampleRate) override { LoopbackProcessor::setSampleRate(sampleRate); mAudioRecording.setSampleRate(sampleRate); } void reset() override { LoopbackProcessor::reset(); mState = STATE_MEASURE_BACKGROUND; mDownCounter = (int32_t) (getSampleRate() * kBackgroundMeasurementLengthSeconds); mLoopCounter = 0; mPulseCursor = 0; mBackgroundSumSquare = 0.0f; mBackgroundSumCount = 0; mBackgroundRMS = 0.0f; mSignalRMS = 0.0f; generatePulseRecording(calculatePulseLength()); mAudioRecording.clear(); mLatencyReport.reset(); } bool hasEnoughData() const override { return mAudioRecording.isFull(); } bool isDone() override { return mState == STATE_DONE; } int32_t getProgress() const override { return mAudioRecording.size(); } std::string analyze() override { std::stringstream report; report << "PulseLatencyAnalyzer ---------------\n"; report << LOOPBACK_RESULT_TAG "test.state = " << std::setw(8) << mState << "\n"; report << LOOPBACK_RESULT_TAG "test.state.name = " << convertStateToText(mState) << "\n"; report << LOOPBACK_RESULT_TAG "background.rms = " << std::setw(8) << mBackgroundRMS << "\n"; int32_t newResult = RESULT_OK; if (mState != STATE_GOT_DATA) { report << "WARNING - Bad state. Check volume on device.\n"; // setResult(ERROR_INVALID_STATE); } else { float gain = mAudioRecording.normalize(1.0f); measureLatency(); // Calculate signalRMS even if it is bogus. // Also it may be used in the confidence calculation below. mSignalRMS = calculateRootMeanSquare( &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size()) / gain; if (getMeasuredConfidence() < getMinimumConfidence()) { report << " ERROR - confidence too low!"; newResult = ERROR_CONFIDENCE; } double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames / getSampleRate(); report << LOOPBACK_RESULT_TAG "latency.frames = " << std::setw(8) << mLatencyReport.latencyInFrames << "\n"; report << LOOPBACK_RESULT_TAG "latency.msec = " << std::setw(8) << latencyMillis << "\n"; report << LOOPBACK_RESULT_TAG "latency.confidence = " << std::setw(8) << getMeasuredConfidence() << "\n"; report << LOOPBACK_RESULT_TAG "latency.correlation = " << std::setw(8) << getMeasuredCorrelation() << "\n"; } mState = STATE_DONE; if (getResult() == RESULT_OK) { setResult(newResult); } return report.str(); } int32_t getMeasuredLatency() const override { return mLatencyReport.latencyInFrames; } double getMeasuredCorrelation() const override { return mLatencyReport.correlation; } double getBackgroundRMS() const override { return mBackgroundRMS; } double getSignalRMS() const override { return mSignalRMS; } bool isRecordingComplete() { return mState == STATE_GOT_DATA; } void printStatus() override { ALOGD("latency: st = %d = %s", mState, convertStateToText(mState)); } result_code processInputFrame(const float *frameData, int channelCount) override { echo_state nextState = mState; mLoopCounter++; float input = frameData[0]; switch (mState) { case STATE_MEASURE_BACKGROUND: // Measure background RMS on channel 0 mBackgroundSumSquare += static_cast(input) * input; mBackgroundSumCount++; mDownCounter--; if (mDownCounter <= 0) { mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount); nextState = STATE_IN_PULSE; mPulseCursor = 0; } break; case STATE_IN_PULSE: // Record input until the mAudioRecording is full. mAudioRecording.write(input); if (hasEnoughData()) { nextState = STATE_GOT_DATA; } break; case STATE_GOT_DATA: case STATE_DONE: default: break; } mState = nextState; return RESULT_OK; } result_code processOutputFrame(float *frameData, int channelCount) override { switch (mState) { case STATE_IN_PULSE: if (mPulseCursor < mPulse.size()) { float pulseSample = mPulse.getData()[mPulseCursor++]; for (int i = 0; i < channelCount; i++) { frameData[i] = pulseSample; } } else { for (int i = 0; i < channelCount; i++) { frameData[i] = 0; } } break; case STATE_MEASURE_BACKGROUND: case STATE_GOT_DATA: case STATE_DONE: default: for (int i = 0; i < channelCount; i++) { frameData[i] = 0.0f; // silence } break; } return RESULT_OK; } protected: virtual int32_t calculatePulseLength() const = 0; virtual void generatePulseRecording(int32_t pulseLength) = 0; virtual void measureLatency() = 0; virtual double getMinimumConfidence() const { return 0.5; } AudioRecording mPulse; AudioRecording mAudioRecording; // contains only the input after starting the pulse LatencyReport mLatencyReport; static constexpr int32_t kPulseLengthMillis = 500; float mPulseAmplitude = 0.5f; double mBackgroundRMS = 0.0; double mSignalRMS = 0.0; private: enum echo_state { STATE_MEASURE_BACKGROUND, STATE_IN_PULSE, STATE_GOT_DATA, // must match RoundTripLatencyActivity.java STATE_DONE, }; const char *convertStateToText(echo_state state) { switch (state) { case STATE_MEASURE_BACKGROUND: return "INIT"; case STATE_IN_PULSE: return "PULSE"; case STATE_GOT_DATA: return "GOT_DATA"; case STATE_DONE: return "DONE"; } return "UNKNOWN"; } int32_t mDownCounter = 500; int32_t mLoopCounter = 0; echo_state mState = STATE_MEASURE_BACKGROUND; static constexpr double kBackgroundMeasurementLengthSeconds = 0.5; int32_t mPulseCursor = 0; double mBackgroundSumSquare = 0.0; int32_t mBackgroundSumCount = 0; int32_t mFramesToRecord = 0; }; /** * This algorithm uses a series of random bits encoded using the * Manchester encoder. It works well for wired loopback but not very well for * through the air loopback. */ class EncodedRandomLatencyAnalyzer : public PulseLatencyAnalyzer { protected: int32_t calculatePulseLength() const override { // Calculate integer number of bits. int32_t numPulseBits = getSampleRate() * kPulseLengthMillis / (kFramesPerEncodedBit * kMillisPerSecond); return numPulseBits * kFramesPerEncodedBit; } void generatePulseRecording(int32_t pulseLength) override { mPulse.allocate(pulseLength); RandomPulseGenerator pulser(kFramesPerEncodedBit); for (int i = 0; i < pulseLength; i++) { mPulse.write(pulser.nextFloat() * mPulseAmplitude); } } double getMinimumConfidence() const override { return 0.2; } void measureLatency() override { measureLatencyFromPulse(mAudioRecording, mPulse, &mLatencyReport); } private: static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2 }; /** * This algorithm uses White Noise sent in a short burst pattern. * The original signal and the recorded signal are then run through * an envelope follower to convert the fine detail into more of * a rectangular block before the correlation phase. */ class WhiteNoiseLatencyAnalyzer : public PulseLatencyAnalyzer { protected: int32_t calculatePulseLength() const override { return getSampleRate() * kPulseLengthMillis / kMillisPerSecond; } void generatePulseRecording(int32_t pulseLength) override { mPulse.allocate(pulseLength); // Turn the noise on and off to sharpen the correlation peak. // Use more zeros than ones so that the correlation will be less than 0.5 even when there // is a strong background noise. int8_t pattern[] = {1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; PseudoRandom random; const int32_t numSections = sizeof(pattern); const int32_t framesPerSection = pulseLength / numSections; for (int section = 0; section < numSections; section++) { if (pattern[section]) { for (int i = 0; i < framesPerSection; i++) { mPulse.write((float) (random.nextRandomDouble() * mPulseAmplitude)); } } else { for (int i = 0; i < framesPerSection; i++) { mPulse.write(0.0f); } } } // Write any remaining frames. int32_t framesWritten = framesPerSection * numSections; for (int i = framesWritten; i < pulseLength; i++) { mPulse.write(0.0f); } } void measureLatency() override { // Smooth out the noise so we see rectangular blocks. // This improves immunity against phase cancellation and distortion. static constexpr float decay = 0.99f; // just under 1.0, lower numbers decay faster mAudioRecording.detectPeaks(decay); mPulse.detectPeaks(decay); measureLatencyFromPulse(mAudioRecording, mPulse, &mLatencyReport); } }; #endif // ANALYZER_LATENCY_ANALYZER_H