test/conversational_speech/multiend_call.h

/*
 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_

#include <stddef.h>

#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "api/array_view.h"
#include "modules/audio_processing/test/conversational_speech/timing.h"
#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
#include "rtc_base/constructor_magic.h"

namespace webrtc {
namespace test {
namespace conversational_speech {

class MultiEndCall {
 public:
  struct SpeakingTurn {
    // Constructor required in order to use std::vector::emplace_back().
    SpeakingTurn(std::string new_speaker_name,
                 std::string new_audiotrack_file_name,
                 size_t new_begin,
                 size_t new_end,
                 int gain)
        : speaker_name(std::move(new_speaker_name)),
          audiotrack_file_name(std::move(new_audiotrack_file_name)),
          begin(new_begin),
          end(new_end),
          gain(gain) {}
    std::string speaker_name;
    std::string audiotrack_file_name;
    size_t begin;
    size_t end;
    int gain;
  };

  MultiEndCall(
      rtc::ArrayView<const Turn> timing,
      const std::string& audiotracks_path,
      std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory);
  ~MultiEndCall();

  const std::set<std::string>& speaker_names() const { return speaker_names_; }
  const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
  audiotrack_readers() const {
    return audiotrack_readers_;
  }
  bool valid() const { return valid_; }
  int sample_rate() const { return sample_rate_hz_; }
  size_t total_duration_samples() const { return total_duration_samples_; }
  const std::vector<SpeakingTurn>& speaking_turns() const {
    return speaking_turns_;
  }

 private:
  // Finds unique speaker names.
  void FindSpeakerNames();

  // Creates one WavReader instance for each unique audiotrack. It returns false
  // if the audio tracks do not have the same sample rate or if they are not
  // mono.
  bool CreateAudioTrackReaders();

  // Validates the speaking turns timing information. Accepts cross-talk, but
  // only up to 2 speakers. Rejects unordered turns and self cross-talk.
  bool CheckTiming();

  rtc::ArrayView<const Turn> timing_;
  const std::string& audiotracks_path_;
  std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory_;
  std::set<std::string> speaker_names_;
  std::map<std::string, std::unique_ptr<WavReaderInterface>>
      audiotrack_readers_;
  bool valid_;
  int sample_rate_hz_;
  size_t total_duration_samples_;
  std::vector<SpeakingTurn> speaking_turns_;

  RTC_DISALLOW_COPY_AND_ASSIGN(MultiEndCall);
};

}  // namespace conversational_speech
}  // namespace test
}  // namespace webrtc

#endif  // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_