1 /*
2  * libjingle
3  * Copyright 2011 Google Inc.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *  2. Redistributions in binary form must reproduce the above copyright notice,
11  *     this list of conditions and the following disclaimer in the documentation
12  *     and/or other materials provided with the distribution.
13  *  3. The name of the author may not be used to endorse or promote products
14  *     derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "talk/session/media/currentspeakermonitor.h"
29 
30 #include "talk/media/base/streamparams.h"
31 #include "talk/session/media/audiomonitor.h"
32 #include "webrtc/base/logging.h"
33 
34 namespace cricket {
35 
36 namespace {
37 const int kMaxAudioLevel = 9;
38 // To avoid overswitching, we disable switching for a period of time after a
39 // switch is done.
40 const int kDefaultMinTimeBetweenSwitches = 1000;
41 }
42 
CurrentSpeakerMonitor(AudioSourceContext * audio_source_context)43 CurrentSpeakerMonitor::CurrentSpeakerMonitor(
44     AudioSourceContext* audio_source_context)
45     : started_(false),
46       audio_source_context_(audio_source_context),
47       current_speaker_ssrc_(0),
48       earliest_permitted_switch_time_(0),
49       min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {}
50 
~CurrentSpeakerMonitor()51 CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
52   Stop();
53 }
54 
Start()55 void CurrentSpeakerMonitor::Start() {
56   if (!started_) {
57     audio_source_context_->SignalAudioMonitor.connect(
58         this, &CurrentSpeakerMonitor::OnAudioMonitor);
59     audio_source_context_->SignalMediaStreamsUpdate.connect(
60         this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
61     audio_source_context_->SignalMediaStreamsReset.connect(
62         this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
63 
64     started_ = true;
65   }
66 }
67 
Stop()68 void CurrentSpeakerMonitor::Stop() {
69   if (started_) {
70     audio_source_context_->SignalAudioMonitor.disconnect(this);
71     audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
72 
73     started_ = false;
74     ssrc_to_speaking_state_map_.clear();
75     current_speaker_ssrc_ = 0;
76     earliest_permitted_switch_time_ = 0;
77   }
78 }
79 
set_min_time_between_switches(uint32_t min_time_between_switches)80 void CurrentSpeakerMonitor::set_min_time_between_switches(
81     uint32_t min_time_between_switches) {
82   min_time_between_switches_ = min_time_between_switches;
83 }
84 
OnAudioMonitor(AudioSourceContext * audio_source_context,const AudioInfo & info)85 void CurrentSpeakerMonitor::OnAudioMonitor(
86     AudioSourceContext* audio_source_context, const AudioInfo& info) {
87   std::map<uint32_t, int> active_ssrc_to_level_map;
88   cricket::AudioInfo::StreamList::const_iterator stream_list_it;
89   for (stream_list_it = info.active_streams.begin();
90        stream_list_it != info.active_streams.end(); ++stream_list_it) {
91     uint32_t ssrc = stream_list_it->first;
92     active_ssrc_to_level_map[ssrc] = stream_list_it->second;
93 
94     // It's possible we haven't yet added this source to our map.  If so,
95     // add it now with a "not speaking" state.
96     if (ssrc_to_speaking_state_map_.find(ssrc) ==
97         ssrc_to_speaking_state_map_.end()) {
98       ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
99     }
100   }
101 
102   int max_level = 0;
103   uint32_t loudest_speaker_ssrc = 0;
104 
105   // Update the speaking states of all participants based on the new audio
106   // level information.  Also retain loudest speaker.
107   std::map<uint32_t, SpeakingState>::iterator state_it;
108   for (state_it = ssrc_to_speaking_state_map_.begin();
109        state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
110     bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
111 
112     // This uses a state machine in order to gradually identify
113     // members as having started or stopped speaking. Matches the
114     // algorithm used by the hangouts js code.
115 
116     std::map<uint32_t, int>::const_iterator level_it =
117         active_ssrc_to_level_map.find(state_it->first);
118     // Note that the stream map only contains streams with non-zero audio
119     // levels.
120     int level = (level_it != active_ssrc_to_level_map.end()) ?
121         level_it->second : 0;
122     switch (state_it->second) {
123       case SS_NOT_SPEAKING:
124         if (level > 0) {
125           // Reset level because we don't think they're really speaking.
126           level = 0;
127           state_it->second = SS_MIGHT_BE_SPEAKING;
128         } else {
129           // State unchanged.
130         }
131         break;
132       case SS_MIGHT_BE_SPEAKING:
133         if (level > 0) {
134           state_it->second = SS_SPEAKING;
135         } else {
136           state_it->second = SS_NOT_SPEAKING;
137         }
138         break;
139       case SS_SPEAKING:
140         if (level > 0) {
141           // State unchanged.
142         } else {
143           state_it->second = SS_WAS_SPEAKING_RECENTLY1;
144           if (is_previous_speaker) {
145             // Assume this is an inter-word silence and assign him the highest
146             // volume.
147             level = kMaxAudioLevel;
148           }
149         }
150         break;
151       case SS_WAS_SPEAKING_RECENTLY1:
152         if (level > 0) {
153           state_it->second = SS_SPEAKING;
154         } else {
155           state_it->second = SS_WAS_SPEAKING_RECENTLY2;
156           if (is_previous_speaker) {
157             // Assume this is an inter-word silence and assign him the highest
158             // volume.
159             level = kMaxAudioLevel;
160           }
161         }
162         break;
163       case SS_WAS_SPEAKING_RECENTLY2:
164         if (level > 0) {
165           state_it->second = SS_SPEAKING;
166         } else {
167           state_it->second = SS_NOT_SPEAKING;
168         }
169         break;
170     }
171 
172     if (level > max_level) {
173       loudest_speaker_ssrc = state_it->first;
174       max_level = level;
175     } else if (level > 0 && level == max_level && is_previous_speaker) {
176       // Favor continuity of loudest speakers if audio levels are equal.
177       loudest_speaker_ssrc = state_it->first;
178     }
179   }
180 
181   // We avoid over-switching by disabling switching for a period of time after
182   // a switch is done.
183   uint32_t now = rtc::Time();
184   if (earliest_permitted_switch_time_ <= now &&
185       current_speaker_ssrc_ != loudest_speaker_ssrc) {
186     current_speaker_ssrc_ = loudest_speaker_ssrc;
187     LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
188     earliest_permitted_switch_time_ = now + min_time_between_switches_;
189     SignalUpdate(this, current_speaker_ssrc_);
190   }
191 }
192 
OnMediaStreamsUpdate(AudioSourceContext * audio_source_context,const MediaStreams & added,const MediaStreams & removed)193 void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
194     AudioSourceContext* audio_source_context,
195     const MediaStreams& added,
196     const MediaStreams& removed) {
197   if (audio_source_context == audio_source_context_) {
198     // Update the speaking state map based on added and removed streams.
199     for (std::vector<cricket::StreamParams>::const_iterator
200            it = removed.audio().begin(); it != removed.audio().end(); ++it) {
201       ssrc_to_speaking_state_map_.erase(it->first_ssrc());
202     }
203 
204     for (std::vector<cricket::StreamParams>::const_iterator
205            it = added.audio().begin(); it != added.audio().end(); ++it) {
206       ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
207     }
208   }
209 }
210 
OnMediaStreamsReset(AudioSourceContext * audio_source_context)211 void CurrentSpeakerMonitor::OnMediaStreamsReset(
212     AudioSourceContext* audio_source_context) {
213   if (audio_source_context == audio_source_context_) {
214     ssrc_to_speaking_state_map_.clear();
215   }
216 }
217 
218 }  // namespace cricket
219