src/analyzer/LatencyAnalyzer.h

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Tools for measuring latency and for detecting glitches.
 * These classes are pure math and can be used with any audio system.
 */

#ifndef ANALYZER_LATENCY_ANALYZER_H
#define ANALYZER_LATENCY_ANALYZER_H

#include <algorithm>
#include <assert.h>
#include <cctype>
#include <iomanip>
#include <iostream>
#include <math.h>
#include <memory>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <vector>

#include "PeakDetector.h"
#include "PseudoRandom.h"
#include "RandomPulseGenerator.h"

// This is used when the code is in Oboe.
#ifndef ALOGD
#define ALOGD printf
#define ALOGE printf
#define ALOGW printf
#endif

#define LOOPBACK_RESULT_TAG  "RESULT: "

static constexpr int32_t kDefaultSampleRate = 48000;
static constexpr int32_t kMillisPerSecond   = 1000;
static constexpr int32_t kMaxLatencyMillis  = 700;  // arbitrary and generous
static constexpr double  kMinimumConfidence = 0.2;

struct LatencyReport {
    int32_t latencyInFrames = 0.0;
    double confidence = 0.0;

    void reset() {
        latencyInFrames = 0;
        confidence = 0.0;
    }
};

// Calculate a normalized cross correlation.
static double calculateNormalizedCorrelation(const float *a,
                                             const float *b,
                                             int windowSize) {
    double correlation = 0.0;
    double sumProducts = 0.0;
    double sumSquares = 0.0;

    // Correlate a against b.
    for (int i = 0; i < windowSize; i++) {
        float s1 = a[i];
        float s2 = b[i];
        // Use a normalized cross-correlation.
        sumProducts += s1 * s2;
        sumSquares += ((s1 * s1) + (s2 * s2));
    }

    if (sumSquares >= 1.0e-9) {
        correlation = 2.0 * sumProducts / sumSquares;
    }
    return correlation;
}

static double calculateRootMeanSquare(float *data, int32_t numSamples) {
    double sum = 0.0;
    for (int32_t i = 0; i < numSamples; i++) {
        float sample = data[i];
        sum += sample * sample;
    }
    return sqrt(sum / numSamples);
}

/**
 * Monophonic recording with processing.
 */
class AudioRecording
{
public:

    void allocate(int maxFrames) {
        mData = std::make_unique<float[]>(maxFrames);
        mMaxFrames = maxFrames;
    }

    // Write SHORT data from the first channel.
    int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
        // stop at end of buffer
        if ((mFrameCounter + numFrames) > mMaxFrames) {
            numFrames = mMaxFrames - mFrameCounter;
        }
        for (int i = 0; i < numFrames; i++) {
            mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
        }
        return numFrames;
    }

    // Write FLOAT data from the first channel.
    int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
        // stop at end of buffer
        if ((mFrameCounter + numFrames) > mMaxFrames) {
            numFrames = mMaxFrames - mFrameCounter;
        }
        for (int i = 0; i < numFrames; i++) {
            mData[mFrameCounter++] = inputData[i * inputChannelCount];
        }
        return numFrames;
    }

    // Write FLOAT data from the first channel.
    int32_t write(float sample) {
        // stop at end of buffer
        if (mFrameCounter < mMaxFrames) {
            mData[mFrameCounter++] = sample;
            return 1;
        }
        return 0;
    }

    void clear() {
        mFrameCounter = 0;
    }
    int32_t size() const {
        return mFrameCounter;
    }

    bool isFull() const {
        return mFrameCounter >= mMaxFrames;
    }

    float *getData() const {
        return mData.get();
    }

    void setSampleRate(int32_t sampleRate) {
        mSampleRate = sampleRate;
    }

    int32_t getSampleRate() const {
        return mSampleRate;
    }

    /**
     * Square the samples so they are all positive and so the peaks are emphasized.
     */
    void square() {
        float *x = mData.get();
        for (int i = 0; i < mFrameCounter; i++) {
            x[i] *= x[i];
        }
    }

    /**
     * Amplify a signal so that the peak matches the specified target.
     *
     * @param target final max value
     * @return gain applied to signal
     */
    float normalize(float target) {
        float maxValue = 1.0e-9f;
        for (int i = 0; i < mFrameCounter; i++) {
            maxValue = std::max(maxValue, abs(mData[i]));
        }
        float gain = target / maxValue;
        for (int i = 0; i < mFrameCounter; i++) {
            mData[i] *= gain;
        }
        return gain;
    }

private:
    std::unique_ptr<float[]> mData;
    int32_t       mFrameCounter = 0;
    int32_t       mMaxFrames = 0;
    int32_t       mSampleRate = kDefaultSampleRate; // common default
};

static int measureLatencyFromPulse(AudioRecording &recorded,
                                   AudioRecording &pulse,
                                   LatencyReport *report) {

    report->latencyInFrames = 0;
    report->confidence = 0.0;

    int numCorrelations = recorded.size() - pulse.size();
    if (numCorrelations < 10) {
        ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
        return -1;
    }
    std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);

    // Correlate pulse against the recorded data.
    for (int i = 0; i < numCorrelations; i++) {
        float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
                                                                   &pulse.getData()[0],
                                                                   pulse.size());
        correlations[i] = correlation;
    }

    // Find highest peak in correlation array.
    float peakCorrelation = 0.0;
    int peakIndex = -1;
    for (int i = 0; i < numCorrelations; i++) {
        float value = abs(correlations[i]);
        if (value > peakCorrelation) {
            peakCorrelation = value;
            peakIndex = i;
        }
    }
    if (peakIndex < 0) {
        ALOGE("%s() no signal for correlation\n", __func__);
        return -2;
    }

    report->latencyInFrames = peakIndex;
    report->confidence = peakCorrelation;

    return 0;
}

// ====================================================================================
class LoopbackProcessor {
public:
    virtual ~LoopbackProcessor() = default;

    enum result_code {
        RESULT_OK = 0,
        ERROR_NOISY = -99,
        ERROR_VOLUME_TOO_LOW,
        ERROR_VOLUME_TOO_HIGH,
        ERROR_CONFIDENCE,
        ERROR_INVALID_STATE,
        ERROR_GLITCHES,
        ERROR_NO_LOCK
    };

    virtual void prepareToTest() {
        reset();
    }

    virtual void reset() {
        mResult = 0;
        mResetCount++;
    }

    virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
    virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;

    void process(float *inputData, int inputChannelCount, int numInputFrames,
                 float *outputData, int outputChannelCount, int numOutputFrames) {
        int numBoth = std::min(numInputFrames, numOutputFrames);
        // Process one frame at a time.
        for (int i = 0; i < numBoth; i++) {
            processInputFrame(inputData, inputChannelCount);
            inputData += inputChannelCount;
            processOutputFrame(outputData, outputChannelCount);
            outputData += outputChannelCount;
        }
        // If there is more input than output.
        for (int i = numBoth; i < numInputFrames; i++) {
            processInputFrame(inputData, inputChannelCount);
            inputData += inputChannelCount;
        }
        // If there is more output than input.
        for (int i = numBoth; i < numOutputFrames; i++) {
            processOutputFrame(outputData, outputChannelCount);
            outputData += outputChannelCount;
        }
    }

    virtual std::string analyze() = 0;

    virtual void printStatus() {};

    int32_t getResult() {
        return mResult;
    }

    void setResult(int32_t result) {
        mResult = result;
    }

    virtual bool isDone() {
        return false;
    }

    virtual int save(const char *fileName) {
        (void) fileName;
        return -1;
    }

    virtual int load(const char *fileName) {
        (void) fileName;
        return -1;
    }

    virtual void setSampleRate(int32_t sampleRate) {
        mSampleRate = sampleRate;
    }

    int32_t getSampleRate() const {
        return mSampleRate;
    }

    int32_t getResetCount() const {
        return mResetCount;
    }

    /** Called when not enough input frames could be read after synchronization.
     */
    virtual void onInsufficientRead() {
        reset();
    }

protected:
    int32_t   mResetCount = 0;

private:
    int32_t mSampleRate = kDefaultSampleRate;
    int32_t mResult = 0;
};

class LatencyAnalyzer : public LoopbackProcessor {
public:

    LatencyAnalyzer() : LoopbackProcessor() {}
    virtual ~LatencyAnalyzer() = default;

    virtual int32_t getProgress() const = 0;

    virtual int getState() = 0;

    // @return latency in frames
    virtual int32_t getMeasuredLatency() = 0;

    virtual double getMeasuredConfidence() = 0;

    virtual double getBackgroundRMS() = 0;

    virtual double getSignalRMS() = 0;

};

// ====================================================================================
/**
 * Measure latency given a loopback stream data.
 * Use an encoded bit train as the sound source because it
 * has an unambiguous correlation value.
 * Uses a state machine to cycle through various stages.
 *
 */
class PulseLatencyAnalyzer : public LatencyAnalyzer {
public:

    PulseLatencyAnalyzer() : LatencyAnalyzer() {
        int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
        int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
                / (kFramesPerEncodedBit * kMillisPerSecond);
        int32_t  pulseLength = numPulseBits * kFramesPerEncodedBit;
        mFramesToRecord = pulseLength + maxLatencyFrames;
        mAudioRecording.allocate(mFramesToRecord);
        mAudioRecording.setSampleRate(getSampleRate());
        generateRandomPulse(pulseLength);
    }

    void generateRandomPulse(int32_t pulseLength) {
        mPulse.allocate(pulseLength);
        RandomPulseGenerator pulser(kFramesPerEncodedBit);
        for (int i = 0; i < pulseLength; i++) {
            mPulse.write(pulser.nextFloat());
        }
    }

    int getState() override {
        return mState;
    }

    void setSampleRate(int32_t sampleRate) override {
        LoopbackProcessor::setSampleRate(sampleRate);
        mAudioRecording.setSampleRate(sampleRate);
    }

    void reset() override {
        LoopbackProcessor::reset();
        mDownCounter = getSampleRate() / 2;
        mLoopCounter = 0;

        mPulseCursor = 0;
        mBackgroundSumSquare = 0.0f;
        mBackgroundSumCount = 0;
        mBackgroundRMS = 0.0f;
        mSignalRMS = 0.0f;

        mState = STATE_MEASURE_BACKGROUND;
        mAudioRecording.clear();
        mLatencyReport.reset();
    }

    bool hasEnoughData() {
        return mAudioRecording.isFull();
    }

    bool isDone() override {
        return mState == STATE_DONE;
    }

    int32_t getProgress() const override {
        return mAudioRecording.size();
    }

    std::string analyze() override {
        std::stringstream report;
        report << "PulseLatencyAnalyzer ---------------\n";
        report << LOOPBACK_RESULT_TAG "test.state             = "
                << std::setw(8) << mState << "\n";
        report << LOOPBACK_RESULT_TAG "test.state.name        = "
                << convertStateToText(mState) << "\n";
        report << LOOPBACK_RESULT_TAG "background.rms         = "
                << std::setw(8) << mBackgroundRMS << "\n";

        int32_t newResult = RESULT_OK;
        if (mState != STATE_GOT_DATA) {
            report << "WARNING - Bad state. Check volume on device.\n";
            // setResult(ERROR_INVALID_STATE);
        } else {
            float gain = mAudioRecording.normalize(1.0f);
            measureLatencyFromPulse(mAudioRecording,
                                    mPulse,
                                    &mLatencyReport);

            if (mLatencyReport.confidence < kMinimumConfidence) {
                report << "   ERROR - confidence too low!";
                newResult = ERROR_CONFIDENCE;
            } else {
                mSignalRMS = calculateRootMeanSquare(
                        &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
                                / gain;
            }
            double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
                                   / getSampleRate();
            report << LOOPBACK_RESULT_TAG "latency.frames         = " << std::setw(8)
                   << mLatencyReport.latencyInFrames << "\n";
            report << LOOPBACK_RESULT_TAG "latency.msec           = " << std::setw(8)
                   << latencyMillis << "\n";
            report << LOOPBACK_RESULT_TAG "latency.confidence     = " << std::setw(8)
                   << mLatencyReport.confidence << "\n";
        }
        mState = STATE_DONE;
        if (getResult() == RESULT_OK) {
            setResult(newResult);
        }

        return report.str();
    }

    int32_t getMeasuredLatency() override {
        return mLatencyReport.latencyInFrames;
    }

    double getMeasuredConfidence() override {
        return mLatencyReport.confidence;
    }

    double getBackgroundRMS() override {
        return mBackgroundRMS;
    }

    double getSignalRMS() override {
        return mSignalRMS;
    }

    void printStatus() override {
        ALOGD("st = %d", mState);
    }

    result_code processInputFrame(float *frameData, int channelCount) override {
        echo_state nextState = mState;
        mLoopCounter++;

        switch (mState) {
            case STATE_MEASURE_BACKGROUND:
                // Measure background RMS on channel 0
                mBackgroundSumSquare += frameData[0] * frameData[0];
                mBackgroundSumCount++;
                mDownCounter--;
                if (mDownCounter <= 0) {
                    mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
                    nextState = STATE_IN_PULSE;
                    mPulseCursor = 0;
                }
                break;

            case STATE_IN_PULSE:
                // Record input until the mAudioRecording is full.
                mAudioRecording.write(frameData, channelCount, 1);
                if (hasEnoughData()) {
                    nextState = STATE_GOT_DATA;
                }
                break;

            case STATE_GOT_DATA:
            case STATE_DONE:
            default:
                break;
        }

        mState = nextState;
        return RESULT_OK;
    }

    result_code processOutputFrame(float *frameData, int channelCount) override {
        switch (mState) {
            case STATE_IN_PULSE:
                if (mPulseCursor < mPulse.size()) {
                    float pulseSample = mPulse.getData()[mPulseCursor++];
                    for (int i = 0; i < channelCount; i++) {
                        frameData[i] = pulseSample;
                    }
                } else {
                    for (int i = 0; i < channelCount; i++) {
                        frameData[i] = 0;
                    }
                }
                break;

            case STATE_MEASURE_BACKGROUND:
            case STATE_GOT_DATA:
            case STATE_DONE:
            default:
                for (int i = 0; i < channelCount; i++) {
                    frameData[i] = 0.0f; // silence
                }
                break;
        }

        return RESULT_OK;
    }

private:

    enum echo_state {
        STATE_MEASURE_BACKGROUND,
        STATE_IN_PULSE,
        STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
        STATE_DONE,
    };

    const char *convertStateToText(echo_state state) {
        switch (state) {
            case STATE_MEASURE_BACKGROUND:
                return "INIT";
            case STATE_IN_PULSE:
                return "PULSE";
            case STATE_GOT_DATA:
                return "GOT_DATA";
            case STATE_DONE:
                return "DONE";
        }
        return "UNKNOWN";
    }

    int32_t         mDownCounter = 500;
    int32_t         mLoopCounter = 0;
    echo_state      mState = STATE_MEASURE_BACKGROUND;

    static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
    static constexpr int32_t kPulseLengthMillis = 500;

    AudioRecording     mPulse;
    int32_t            mPulseCursor = 0;

    double             mBackgroundSumSquare = 0.0;
    int32_t            mBackgroundSumCount = 0;
    double             mBackgroundRMS = 0.0;
    double             mSignalRMS = 0.0;
    int32_t            mFramesToRecord = 0;

    AudioRecording     mAudioRecording; // contains only the input after starting the pulse
    LatencyReport      mLatencyReport;
};

#endif // ANALYZER_LATENCY_ANALYZER_H