1 /*
2 ** Copyright 2011, The Android Open-Source Project
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "echo_reference"
19 
20 #include <errno.h>
21 #include <inttypes.h>
22 #include <pthread.h>
23 #include <stdlib.h>
24 
25 #include <log/log.h>
26 #include <system/audio.h>
27 #include <audio_utils/resampler.h>
28 #include <audio_utils/echo_reference.h>
29 
30 // echo reference state: bit field indicating if read, write or both are active.
31 enum state {
32     ECHOREF_IDLE = 0x00,        // idle
33     ECHOREF_READING = 0x01,     // reading is active
34     ECHOREF_WRITING = 0x02      // writing is active
35 };
36 
37 struct echo_reference {
38     struct echo_reference_itfe itfe;
39     int status;                     // init status
40     uint32_t state;                 // active state: reading, writing or both
41     audio_format_t rd_format;       // read sample format
42     uint32_t rd_channel_count;      // read number of channels
43     uint32_t rd_sampling_rate;      // read sampling rate in Hz
44     size_t rd_frame_size;           // read frame size (bytes per sample)
45     audio_format_t wr_format;       // write sample format
46     uint32_t wr_channel_count;      // write number of channels
47     uint32_t wr_sampling_rate;      // write sampling rate in Hz
48     size_t wr_frame_size;           // write frame size (bytes per sample)
49     void *buffer;                   // main buffer
50     size_t buf_size;                // main buffer size in frames
51     size_t frames_in;               // number of frames in main buffer
52     void *wr_buf;                   // buffer for input conversions
53     size_t wr_buf_size;             // size of conversion buffer in frames
54     size_t wr_frames_in;            // number of frames in conversion buffer
55     size_t wr_curr_frame_size;      // number of frames given to current write() function
56     void *wr_src_buf;               // resampler input buf (either wr_buf or buffer used by write())
57     struct timespec wr_render_time; // latest render time indicated by write()
58                                     // default ALSA gettimeofday() format
59     int32_t  playback_delay;        // playback buffer delay indicated by last write()
60     int16_t prev_delta_sign;        // sign of previous delay difference:
61                                     //  1: positive, -1: negative, 0: unknown
62     uint16_t delta_count;           // number of consecutive delay differences with same sign
63     pthread_mutex_t lock;                      // mutex protecting read/write concurrency
64     pthread_cond_t cond;                       // condition signaled when data is ready to read
65     struct resampler_itfe *resampler;          // input resampler
66     struct resampler_buffer_provider provider; // resampler buffer provider
67 };
68 
69 
echo_reference_get_next_buffer(struct resampler_buffer_provider * buffer_provider,struct resampler_buffer * buffer)70 int echo_reference_get_next_buffer(struct resampler_buffer_provider *buffer_provider,
71                                    struct resampler_buffer* buffer)
72 {
73     struct echo_reference *er;
74 
75     if (buffer_provider == NULL) {
76         return -EINVAL;
77     }
78 
79     er = (struct echo_reference *)((char *)buffer_provider -
80                                       offsetof(struct echo_reference, provider));
81 
82     if (er->wr_src_buf == NULL || er->wr_frames_in == 0) {
83         buffer->raw = NULL;
84         buffer->frame_count = 0;
85         return -ENODATA;
86     }
87 
88     buffer->frame_count = (buffer->frame_count > er->wr_frames_in) ?
89             er->wr_frames_in : buffer->frame_count;
90     // this is er->rd_channel_count here as we resample after stereo to mono conversion if any
91     buffer->i16 = (int16_t *)er->wr_src_buf + (er->wr_curr_frame_size - er->wr_frames_in) *
92             er->rd_channel_count;
93 
94     return 0;
95 }
96 
echo_reference_release_buffer(struct resampler_buffer_provider * buffer_provider,struct resampler_buffer * buffer)97 void echo_reference_release_buffer(struct resampler_buffer_provider *buffer_provider,
98                                   struct resampler_buffer* buffer)
99 {
100     struct echo_reference *er;
101 
102     if (buffer_provider == NULL) {
103         return;
104     }
105 
106     er = (struct echo_reference *)((char *)buffer_provider -
107                                       offsetof(struct echo_reference, provider));
108 
109     er->wr_frames_in -= buffer->frame_count;
110 }
111 
echo_reference_reset_l(struct echo_reference * er)112 static void echo_reference_reset_l(struct echo_reference *er)
113 {
114     ALOGV("echo_reference_reset_l()");
115     free(er->buffer);
116     er->buffer = NULL;
117     er->buf_size = 0;
118     er->frames_in = 0;
119     free(er->wr_buf);
120     er->wr_buf = NULL;
121     er->wr_buf_size = 0;
122     er->wr_render_time.tv_sec = 0;
123     er->wr_render_time.tv_nsec = 0;
124     er->delta_count = 0;
125     er->prev_delta_sign = 0;
126 }
127 
128 /* additional space in resampler buffer allowing for extra samples to be returned
129  * by speex resampler when sample rates ratio is not an integer.
130  */
131 #define RESAMPLER_HEADROOM_SAMPLES   10
132 
echo_reference_write(struct echo_reference_itfe * echo_reference,struct echo_reference_buffer * buffer)133 static int echo_reference_write(struct echo_reference_itfe *echo_reference,
134                          struct echo_reference_buffer *buffer)
135 {
136     struct echo_reference *er = (struct echo_reference *)echo_reference;
137     int status = 0;
138 
139     if (er == NULL) {
140         return -EINVAL;
141     }
142 
143     pthread_mutex_lock(&er->lock);
144 
145     if (buffer == NULL) {
146         ALOGV("echo_reference_write() stop write");
147         er->state &= ~ECHOREF_WRITING;
148         echo_reference_reset_l(er);
149         goto exit;
150     }
151 
152     ALOGV("echo_reference_write() START trying to write %zu frames", buffer->frame_count);
153     ALOGV("echo_reference_write() playbackTimestamp:[%d].[%d], er->playback_delay:[%" PRId32 "]",
154             (int)buffer->time_stamp.tv_sec,
155             (int)buffer->time_stamp.tv_nsec, er->playback_delay);
156 
157     //ALOGV("echo_reference_write() %d frames", buffer->frame_count);
158     // discard writes until a valid time stamp is provided.
159 
160     if ((buffer->time_stamp.tv_sec == 0) && (buffer->time_stamp.tv_nsec == 0) &&
161         (er->wr_render_time.tv_sec == 0) && (er->wr_render_time.tv_nsec == 0)) {
162         goto exit;
163     }
164 
165     if ((er->state & ECHOREF_WRITING) == 0) {
166         ALOGV("echo_reference_write() start write");
167         if (er->resampler != NULL) {
168             er->resampler->reset(er->resampler);
169         }
170         er->state |= ECHOREF_WRITING;
171     }
172 
173     if ((er->state & ECHOREF_READING) == 0) {
174         goto exit;
175     }
176 
177     er->wr_render_time.tv_sec  = buffer->time_stamp.tv_sec;
178     er->wr_render_time.tv_nsec = buffer->time_stamp.tv_nsec;
179 
180     er->playback_delay = buffer->delay_ns;
181 
182     // this will be used in the get_next_buffer, to support variable input buffer sizes
183     er->wr_curr_frame_size = buffer->frame_count;
184 
185     void *srcBuf;
186     size_t inFrames;
187     // do stereo to mono and down sampling if necessary
188     if (er->rd_channel_count != er->wr_channel_count ||
189             er->rd_sampling_rate != er->wr_sampling_rate) {
190         size_t wrBufSize = buffer->frame_count;
191 
192         inFrames = buffer->frame_count;
193 
194         if (er->rd_sampling_rate != er->wr_sampling_rate) {
195             inFrames = (buffer->frame_count * er->rd_sampling_rate) / er->wr_sampling_rate +
196                                                     RESAMPLER_HEADROOM_SAMPLES;
197             // wr_buf is not only used as resampler output but also for stereo to mono conversion
198             // output so buffer size is driven by both write and read sample rates
199             if (inFrames > wrBufSize) {
200                 wrBufSize = inFrames;
201             }
202         }
203 
204         if (er->wr_buf_size < wrBufSize) {
205             ALOGV("echo_reference_write() increasing write buffer size from %zu to %zu",
206                     er->wr_buf_size, wrBufSize);
207             er->wr_buf_size = wrBufSize;
208             er->wr_buf = realloc(er->wr_buf, er->wr_buf_size * er->rd_frame_size);
209         }
210 
211         if (er->rd_channel_count != er->wr_channel_count) {
212             // must be stereo to mono
213             int16_t *src16 = (int16_t *)buffer->raw;
214             int16_t *dst16 = (int16_t *)er->wr_buf;
215             size_t frames = buffer->frame_count;
216             while (frames--) {
217                 *dst16++ = (int16_t)(((int32_t)*src16 + (int32_t)*(src16 + 1)) >> 1);
218                 src16 += 2;
219             }
220         }
221         if (er->wr_sampling_rate != er->rd_sampling_rate) {
222             if (er->resampler == NULL) {
223                 int rc;
224                 ALOGV("echo_reference_write() new ReSampler(%d, %d)",
225                       er->wr_sampling_rate, er->rd_sampling_rate);
226                 er->provider.get_next_buffer = echo_reference_get_next_buffer;
227                 er->provider.release_buffer = echo_reference_release_buffer;
228                 rc = create_resampler(er->wr_sampling_rate,
229                                  er->rd_sampling_rate,
230                                  er->rd_channel_count,
231                                  RESAMPLER_QUALITY_DEFAULT,
232                                  &er->provider,
233                                  &er->resampler);
234                 if (rc != 0) {
235                     er->resampler = NULL;
236                     ALOGV("echo_reference_write() failure to create resampler %d", rc);
237                     status = -ENODEV;
238                     goto exit;
239                 }
240             }
241             // er->wr_src_buf and er->wr_frames_in are used by getNexBuffer() called by the
242             // resampler to get new frames
243             if (er->rd_channel_count != er->wr_channel_count) {
244                 er->wr_src_buf = er->wr_buf;
245             } else {
246                 er->wr_src_buf = buffer->raw;
247             }
248             er->wr_frames_in = buffer->frame_count;
249             // inFrames is always more than we need here to get frames remaining from previous runs
250             // inFrames is updated by resample() with the number of frames produced
251             ALOGV("echo_reference_write() ReSampling(%d, %d)",
252                   er->wr_sampling_rate, er->rd_sampling_rate);
253             er->resampler->resample_from_provider(er->resampler,
254                                                      (int16_t *)er->wr_buf, &inFrames);
255             ALOGV_IF(er->wr_frames_in != 0,
256                     "echo_reference_write() er->wr_frames_in not 0 (%d) after resampler",
257                     er->wr_frames_in);
258         }
259         srcBuf = er->wr_buf;
260     } else {
261         inFrames = buffer->frame_count;
262         srcBuf = buffer->raw;
263     }
264 
265     if (er->frames_in + inFrames > er->buf_size) {
266         ALOGV("echo_reference_write() increasing buffer size from %zu to %zu",
267                 er->buf_size, er->frames_in + inFrames);
268                 er->buf_size = er->frames_in + inFrames;
269                 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
270     }
271     memcpy((char *)er->buffer + er->frames_in * er->rd_frame_size,
272            srcBuf,
273            inFrames * er->rd_frame_size);
274     er->frames_in += inFrames;
275 
276     ALOGV("echo_reference_write() frames written:[%zu], frames total:[%zu] buffer size:[%zu]\n"
277           "                       er->wr_render_time:[%d].[%d], er->playback_delay:[%" PRId32 "]",
278           inFrames, er->frames_in, er->buf_size,
279           (int)er->wr_render_time.tv_sec, (int)er->wr_render_time.tv_nsec, er->playback_delay);
280 
281     pthread_cond_signal(&er->cond);
282 exit:
283     pthread_mutex_unlock(&er->lock);
284     ALOGV("echo_reference_write() END");
285     return status;
286 }
287 
288 // delay jump threshold to update ref buffer: 6 samples at 8kHz in nsecs
289 #define MIN_DELAY_DELTA_NS (375000*2)
290 // number of consecutive delta with same sign between expected and actual delay before adjusting
291 // the buffer
292 #define MIN_DELTA_NUM 4
293 
294 
echo_reference_read(struct echo_reference_itfe * echo_reference,struct echo_reference_buffer * buffer)295 static int echo_reference_read(struct echo_reference_itfe *echo_reference,
296                          struct echo_reference_buffer *buffer)
297 {
298     struct echo_reference *er = (struct echo_reference *)echo_reference;
299 
300     if (er == NULL) {
301         return -EINVAL;
302     }
303 
304     pthread_mutex_lock(&er->lock);
305 
306     if (buffer == NULL) {
307         ALOGV("echo_reference_read() stop read");
308         er->state &= ~ECHOREF_READING;
309         goto exit;
310     }
311 
312     ALOGV("echo_reference_read() START, delayCapture:[%" PRId32 "], "
313             "er->frames_in:[%zu],buffer->frame_count:[%zu]",
314     buffer->delay_ns, er->frames_in, buffer->frame_count);
315 
316     if ((er->state & ECHOREF_READING) == 0) {
317         ALOGV("echo_reference_read() start read");
318         echo_reference_reset_l(er);
319         er->state |= ECHOREF_READING;
320     }
321 
322     if ((er->state & ECHOREF_WRITING) == 0) {
323         memset(buffer->raw, 0, er->rd_frame_size * buffer->frame_count);
324         buffer->delay_ns = 0;
325         goto exit;
326     }
327 
328 //    ALOGV("echo_reference_read() %d frames", buffer->frame_count);
329 
330     // allow some time for new frames to arrive if not enough frames are ready for read
331     if (er->frames_in < buffer->frame_count) {
332         uint32_t timeoutMs = (uint32_t)((1000 * buffer->frame_count) / er->rd_sampling_rate / 2);
333         struct timespec ts = {0, 0};
334 
335 #ifndef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
336         clock_gettime(CLOCK_REALTIME, &ts);
337 #endif
338 
339         ts.tv_sec  += timeoutMs/1000;
340         ts.tv_nsec += (timeoutMs%1000) * 1000000;
341         if (ts.tv_nsec >= 1000000000) {
342             ts.tv_nsec -= 1000000000;
343             ts.tv_sec  += 1;
344         }
345 
346 #ifdef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
347         pthread_cond_timedwait_relative_np(&er->cond, &er->lock, &ts);
348 #else
349         pthread_cond_timedwait(&er->cond, &er->lock, &ts);
350 #endif
351 
352         ALOGV_IF((er->frames_in < buffer->frame_count),
353                  "echo_reference_read() waited %d ms but still not enough frames"\
354                  " er->frames_in: %d, buffer->frame_count = %d",
355                  timeoutMs, er->frames_in, buffer->frame_count);
356     }
357 
358     int64_t timeDiff;
359     struct timespec tmp;
360 
361     if ((er->wr_render_time.tv_sec == 0 && er->wr_render_time.tv_nsec == 0) ||
362         (buffer->time_stamp.tv_sec == 0 && buffer->time_stamp.tv_nsec == 0)) {
363         ALOGV("echo_reference_read(): NEW:timestamp is zero---------setting timeDiff = 0, "\
364              "not updating delay this time");
365         timeDiff = 0;
366     } else {
367         if (buffer->time_stamp.tv_nsec < er->wr_render_time.tv_nsec) {
368             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec - 1;
369             tmp.tv_nsec = 1000000000 + buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
370         } else {
371             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec;
372             tmp.tv_nsec = buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
373         }
374         timeDiff = (((int64_t)tmp.tv_sec * 1000000000 + tmp.tv_nsec));
375 
376         int64_t expectedDelayNs =  er->playback_delay + buffer->delay_ns - timeDiff;
377 
378         if (er->resampler != NULL) {
379             // Resampler already compensates part of the delay
380             int32_t rsmp_delay = er->resampler->delay_ns(er->resampler);
381             expectedDelayNs -= rsmp_delay;
382         }
383 
384         ALOGV("echo_reference_read(): expectedDelayNs[%" PRId64 "] = "
385                 "er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
386                 "] - timeDiff[%" PRId64 "]",
387                 expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
388 
389         if (expectedDelayNs > 0) {
390             int64_t delayNs = ((int64_t)er->frames_in * 1000000000) / er->rd_sampling_rate;
391 
392             int64_t  deltaNs = delayNs - expectedDelayNs;
393 
394             ALOGV("echo_reference_read(): EchoPathDelayDeviation between reference and DMA [%"
395                     PRId64 "]", deltaNs);
396             if (llabs(deltaNs) >= MIN_DELAY_DELTA_NS) {
397                 // smooth the variation and update the reference buffer only
398                 // if a deviation in the same direction is observed for more than MIN_DELTA_NUM
399                 // consecutive reads.
400                 int16_t delay_sign = (deltaNs >= 0) ? 1 : -1;
401                 if (delay_sign == er->prev_delta_sign) {
402                     er->delta_count++;
403                 } else {
404                     er->delta_count = 1;
405                 }
406                 er->prev_delta_sign = delay_sign;
407 
408                 if (er->delta_count > MIN_DELTA_NUM) {
409                     size_t previousFrameIn = er->frames_in;
410                     er->frames_in = (size_t)((expectedDelayNs * er->rd_sampling_rate)/1000000000);
411                     int offset = er->frames_in - previousFrameIn;
412 
413                     ALOGV("echo_reference_read(): deltaNs ENOUGH and %s: "
414                             "er->frames_in: %zu, previousFrameIn = %zu",
415                          delay_sign ? "positive" : "negative", er->frames_in, previousFrameIn);
416 
417                     if (deltaNs < 0) {
418                         // Less data available in the reference buffer than expected
419                         if (er->frames_in > er->buf_size) {
420                             er->buf_size = er->frames_in;
421                             er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
422                             ALOGV("echo_reference_read(): increasing buffer size to %zu",
423                                   er->buf_size);
424                         }
425 
426                         if (offset > 0) {
427                             memset((char *)er->buffer + previousFrameIn * er->rd_frame_size,
428                                    0, offset * er->rd_frame_size);
429                             ALOGV("echo_reference_read(): pushing ref buffer by [%d]", offset);
430                         }
431                     } else {
432                         // More data available in the reference buffer than expected
433                         offset = -offset;
434                         if (offset > 0) {
435                             memcpy(er->buffer, (char *)er->buffer + (offset * er->rd_frame_size),
436                                    er->frames_in * er->rd_frame_size);
437                             ALOGV("echo_reference_read(): shifting ref buffer by [%zu]",
438                                   er->frames_in);
439                         }
440                     }
441                 }
442             } else {
443                 er->delta_count = 0;
444                 er->prev_delta_sign = 0;
445                 ALOGV("echo_reference_read(): Constant EchoPathDelay - difference "
446                         "between reference and DMA %" PRId64, deltaNs);
447             }
448         } else {
449             ALOGV("echo_reference_read(): NEGATIVE expectedDelayNs[%" PRId64
450                  "] = er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
451                  "] - timeDiff[%" PRId64 "]",
452                  expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
453         }
454     }
455 
456     if (er->frames_in < buffer->frame_count) {
457         if (buffer->frame_count > er->buf_size) {
458             er->buf_size = buffer->frame_count;
459             er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
460             ALOGV("echo_reference_read(): increasing buffer size to %zu", er->buf_size);
461         }
462         // filling up the reference buffer with 0s to match the expected delay.
463         memset((char *)er->buffer + er->frames_in * er->rd_frame_size,
464             0, (buffer->frame_count - er->frames_in) * er->rd_frame_size);
465         er->frames_in = buffer->frame_count;
466     }
467 
468     memcpy(buffer->raw,
469            (char *)er->buffer,
470            buffer->frame_count * er->rd_frame_size);
471 
472     er->frames_in -= buffer->frame_count;
473     memcpy(er->buffer,
474            (char *)er->buffer + buffer->frame_count * er->rd_frame_size,
475            er->frames_in * er->rd_frame_size);
476 
477     // As the reference buffer is now time aligned to the microphone signal there is a zero delay
478     buffer->delay_ns = 0;
479 
480     ALOGV("echo_reference_read() END %zu frames, total frames in %zu",
481           buffer->frame_count, er->frames_in);
482 
483     pthread_cond_signal(&er->cond);
484 
485 exit:
486     pthread_mutex_unlock(&er->lock);
487     return 0;
488 }
489 
490 
create_echo_reference(audio_format_t rdFormat,uint32_t rdChannelCount,uint32_t rdSamplingRate,audio_format_t wrFormat,uint32_t wrChannelCount,uint32_t wrSamplingRate,struct echo_reference_itfe ** echo_reference)491 int create_echo_reference(audio_format_t rdFormat,
492                             uint32_t rdChannelCount,
493                             uint32_t rdSamplingRate,
494                             audio_format_t wrFormat,
495                             uint32_t wrChannelCount,
496                             uint32_t wrSamplingRate,
497                             struct echo_reference_itfe **echo_reference)
498 {
499     struct echo_reference *er;
500 
501     ALOGV("create_echo_reference()");
502 
503     if (echo_reference == NULL) {
504         return -EINVAL;
505     }
506 
507     *echo_reference = NULL;
508 
509     if (rdFormat != AUDIO_FORMAT_PCM_16_BIT ||
510             rdFormat != wrFormat) {
511         ALOGW("create_echo_reference bad format rd %d, wr %d", rdFormat, wrFormat);
512         return -EINVAL;
513     }
514     if ((rdChannelCount != 1 && rdChannelCount != 2) ||
515             wrChannelCount != 2) {
516         ALOGW("create_echo_reference bad channel count rd %d, wr %d", rdChannelCount,
517                 wrChannelCount);
518         return -EINVAL;
519     }
520 
521     er = (struct echo_reference *)calloc(1, sizeof(struct echo_reference));
522 
523     er->itfe.read = echo_reference_read;
524     er->itfe.write = echo_reference_write;
525 
526     er->state = ECHOREF_IDLE;
527     er->rd_format = rdFormat;
528     er->rd_channel_count = rdChannelCount;
529     er->rd_sampling_rate = rdSamplingRate;
530     er->wr_format = wrFormat;
531     er->wr_channel_count = wrChannelCount;
532     er->wr_sampling_rate = wrSamplingRate;
533     er->rd_frame_size = audio_bytes_per_sample(rdFormat) * rdChannelCount;
534     er->wr_frame_size = audio_bytes_per_sample(wrFormat) * wrChannelCount;
535     *echo_reference = &er->itfe;
536     return 0;
537 }
538 
release_echo_reference(struct echo_reference_itfe * echo_reference)539 void release_echo_reference(struct echo_reference_itfe *echo_reference) {
540     struct echo_reference *er = (struct echo_reference *)echo_reference;
541 
542     if (er == NULL) {
543         return;
544     }
545 
546     ALOGV("EchoReference dstor");
547     echo_reference_reset_l(er);
548     if (er->resampler != NULL) {
549         release_resampler(er->resampler);
550     }
551     free(er);
552 }
553 
554