1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20 
21 #include "include/WAVExtractor.h"
22 
23 #include <audio_utils/primitives.h>
24 #include <media/stagefright/foundation/ADebug.h>
25 #include <media/stagefright/DataSource.h>
26 #include <media/stagefright/MediaBufferGroup.h>
27 #include <media/stagefright/MediaDefs.h>
28 #include <media/stagefright/MediaErrors.h>
29 #include <media/stagefright/MediaSource.h>
30 #include <media/stagefright/MetaData.h>
31 #include <utils/String8.h>
32 #include <cutils/bitops.h>
33 
34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35 
36 namespace android {
37 
38 enum {
39     WAVE_FORMAT_PCM        = 0x0001,
40     WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41     WAVE_FORMAT_ALAW       = 0x0006,
42     WAVE_FORMAT_MULAW      = 0x0007,
43     WAVE_FORMAT_MSGSM      = 0x0031,
44     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45 };
46 
47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48 
49 
U32_LE_AT(const uint8_t * ptr)50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
51     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52 }
53 
U16_LE_AT(const uint8_t * ptr)54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
55     return ptr[1] << 8 | ptr[0];
56 }
57 
58 struct WAVSource : public MediaSource {
59     WAVSource(
60             const sp<DataSource> &dataSource,
61             const sp<MetaData> &meta,
62             uint16_t waveFormat,
63             int32_t bitsPerSample,
64             off64_t offset, size_t size);
65 
66     virtual status_t start(MetaData *params = NULL);
67     virtual status_t stop();
68     virtual sp<MetaData> getFormat();
69 
70     virtual status_t read(
71             MediaBuffer **buffer, const ReadOptions *options = NULL);
72 
73 protected:
74     virtual ~WAVSource();
75 
76 private:
77     static const size_t kMaxFrameSize;
78 
79     sp<DataSource> mDataSource;
80     sp<MetaData> mMeta;
81     uint16_t mWaveFormat;
82     int32_t mSampleRate;
83     int32_t mNumChannels;
84     int32_t mBitsPerSample;
85     off64_t mOffset;
86     size_t mSize;
87     bool mStarted;
88     MediaBufferGroup *mGroup;
89     off64_t mCurrentPos;
90 
91     WAVSource(const WAVSource &);
92     WAVSource &operator=(const WAVSource &);
93 };
94 
WAVExtractor(const sp<DataSource> & source)95 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
96     : mDataSource(source),
97       mValidFormat(false),
98       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
99     mInitCheck = init();
100 }
101 
~WAVExtractor()102 WAVExtractor::~WAVExtractor() {
103 }
104 
getMetaData()105 sp<MetaData> WAVExtractor::getMetaData() {
106     sp<MetaData> meta = new MetaData;
107 
108     if (mInitCheck != OK) {
109         return meta;
110     }
111 
112     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
113 
114     return meta;
115 }
116 
countTracks()117 size_t WAVExtractor::countTracks() {
118     return mInitCheck == OK ? 1 : 0;
119 }
120 
getTrack(size_t index)121 sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
122     if (mInitCheck != OK || index > 0) {
123         return NULL;
124     }
125 
126     return new WAVSource(
127             mDataSource, mTrackMeta,
128             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
129 }
130 
getTrackMetaData(size_t index,uint32_t)131 sp<MetaData> WAVExtractor::getTrackMetaData(
132         size_t index, uint32_t /* flags */) {
133     if (mInitCheck != OK || index > 0) {
134         return NULL;
135     }
136 
137     return mTrackMeta;
138 }
139 
init()140 status_t WAVExtractor::init() {
141     uint8_t header[12];
142     if (mDataSource->readAt(
143                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
144         return NO_INIT;
145     }
146 
147     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
148         return NO_INIT;
149     }
150 
151     size_t totalSize = U32_LE_AT(&header[4]);
152 
153     off64_t offset = 12;
154     size_t remainingSize = totalSize;
155     while (remainingSize >= 8) {
156         uint8_t chunkHeader[8];
157         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
158             return NO_INIT;
159         }
160 
161         remainingSize -= 8;
162         offset += 8;
163 
164         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
165 
166         if (chunkSize > remainingSize) {
167             return NO_INIT;
168         }
169 
170         if (!memcmp(chunkHeader, "fmt ", 4)) {
171             if (chunkSize < 16) {
172                 return NO_INIT;
173             }
174 
175             uint8_t formatSpec[40];
176             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
177                 return NO_INIT;
178             }
179 
180             mWaveFormat = U16_LE_AT(formatSpec);
181             if (mWaveFormat != WAVE_FORMAT_PCM
182                     && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
183                     && mWaveFormat != WAVE_FORMAT_ALAW
184                     && mWaveFormat != WAVE_FORMAT_MULAW
185                     && mWaveFormat != WAVE_FORMAT_MSGSM
186                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
187                 return ERROR_UNSUPPORTED;
188             }
189 
190             uint8_t fmtSize = 16;
191             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
192                 fmtSize = 40;
193             }
194             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
195                 return NO_INIT;
196             }
197 
198             mNumChannels = U16_LE_AT(&formatSpec[2]);
199 
200             if (mNumChannels < 1 || mNumChannels > 8) {
201                 ALOGE("Unsupported number of channels (%d)", mNumChannels);
202                 return ERROR_UNSUPPORTED;
203             }
204 
205             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
206                 if (mNumChannels != 1 && mNumChannels != 2) {
207                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
208                             mNumChannels);
209                 }
210             }
211 
212             mSampleRate = U32_LE_AT(&formatSpec[4]);
213 
214             if (mSampleRate == 0) {
215                 return ERROR_MALFORMED;
216             }
217 
218             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
219 
220             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
221                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
222                 if (validBitsPerSample != mBitsPerSample) {
223                     if (validBitsPerSample != 0) {
224                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
225                                 validBitsPerSample, mBitsPerSample);
226                         return ERROR_UNSUPPORTED;
227                     } else {
228                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
229                         // writers don't correctly set the valid bits value, and leave it at 0.
230                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
231                     }
232                 }
233 
234                 mChannelMask = U32_LE_AT(&formatSpec[20]);
235                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
236                 if ((mChannelMask >> 18) != 0) {
237                     ALOGE("invalid channel mask 0x%x", mChannelMask);
238                     return ERROR_MALFORMED;
239                 }
240 
241                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
242                         && (popcount(mChannelMask) != mNumChannels)) {
243                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
244                             popcount(mChannelMask), mChannelMask);
245                     return ERROR_MALFORMED;
246                 }
247 
248                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
249                 // the sample format, using the same definitions as a regular WAV header
250                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
251                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
252                     ALOGE("unsupported GUID");
253                     return ERROR_UNSUPPORTED;
254                 }
255             }
256 
257             if (mWaveFormat == WAVE_FORMAT_PCM) {
258                 if (mBitsPerSample != 8 && mBitsPerSample != 16
259                     && mBitsPerSample != 24 && mBitsPerSample != 32) {
260                     return ERROR_UNSUPPORTED;
261                 }
262             } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
263                 if (mBitsPerSample != 32) {  // TODO we don't support double
264                     return ERROR_UNSUPPORTED;
265                 }
266             }
267             else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
268                 if (mBitsPerSample != 0) {
269                     return ERROR_UNSUPPORTED;
270                 }
271             } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
272                 if (mBitsPerSample != 8) {
273                     return ERROR_UNSUPPORTED;
274                 }
275             } else {
276                 return ERROR_UNSUPPORTED;
277             }
278 
279             mValidFormat = true;
280         } else if (!memcmp(chunkHeader, "data", 4)) {
281             if (mValidFormat) {
282                 mDataOffset = offset;
283                 mDataSize = chunkSize;
284 
285                 mTrackMeta = new MetaData;
286 
287                 switch (mWaveFormat) {
288                     case WAVE_FORMAT_PCM:
289                     case WAVE_FORMAT_IEEE_FLOAT:
290                         mTrackMeta->setCString(
291                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
292                         break;
293                     case WAVE_FORMAT_ALAW:
294                         mTrackMeta->setCString(
295                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
296                         break;
297                     case WAVE_FORMAT_MSGSM:
298                         mTrackMeta->setCString(
299                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
300                         break;
301                     default:
302                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
303                         mTrackMeta->setCString(
304                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
305                         break;
306                 }
307 
308                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
309                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
310                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
311                 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
312 
313                 int64_t durationUs = 0;
314                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
315                     // 65 bytes decode to 320 8kHz samples
316                     durationUs =
317                         1000000LL * (mDataSize / 65 * 320) / 8000;
318                 } else {
319                     size_t bytesPerSample = mBitsPerSample >> 3;
320 
321                     if (!bytesPerSample || !mNumChannels)
322                         return ERROR_MALFORMED;
323 
324                     size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
325 
326                     if (!mSampleRate)
327                         return ERROR_MALFORMED;
328 
329                     durationUs =
330                         1000000LL * num_samples / mSampleRate;
331                 }
332 
333                 mTrackMeta->setInt64(kKeyDuration, durationUs);
334 
335                 return OK;
336             }
337         }
338 
339         offset += chunkSize;
340     }
341 
342     return NO_INIT;
343 }
344 
345 const size_t WAVSource::kMaxFrameSize = 32768;
346 
WAVSource(const sp<DataSource> & dataSource,const sp<MetaData> & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)347 WAVSource::WAVSource(
348         const sp<DataSource> &dataSource,
349         const sp<MetaData> &meta,
350         uint16_t waveFormat,
351         int32_t bitsPerSample,
352         off64_t offset, size_t size)
353     : mDataSource(dataSource),
354       mMeta(meta),
355       mWaveFormat(waveFormat),
356       mSampleRate(0),
357       mNumChannels(0),
358       mBitsPerSample(bitsPerSample),
359       mOffset(offset),
360       mSize(size),
361       mStarted(false),
362       mGroup(NULL) {
363     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
364     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
365 
366     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
367 }
368 
~WAVSource()369 WAVSource::~WAVSource() {
370     if (mStarted) {
371         stop();
372     }
373 }
374 
start(MetaData *)375 status_t WAVSource::start(MetaData * /* params */) {
376     ALOGV("WAVSource::start");
377 
378     CHECK(!mStarted);
379 
380     mGroup = new MediaBufferGroup;
381     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
382 
383     if (mBitsPerSample == 8) {
384         // As a temporary buffer for 8->16 bit conversion.
385         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
386     }
387 
388     mCurrentPos = mOffset;
389 
390     mStarted = true;
391 
392     return OK;
393 }
394 
stop()395 status_t WAVSource::stop() {
396     ALOGV("WAVSource::stop");
397 
398     CHECK(mStarted);
399 
400     delete mGroup;
401     mGroup = NULL;
402 
403     mStarted = false;
404 
405     return OK;
406 }
407 
getFormat()408 sp<MetaData> WAVSource::getFormat() {
409     ALOGV("WAVSource::getFormat");
410 
411     return mMeta;
412 }
413 
read(MediaBuffer ** out,const ReadOptions * options)414 status_t WAVSource::read(
415         MediaBuffer **out, const ReadOptions *options) {
416     *out = NULL;
417 
418     int64_t seekTimeUs;
419     ReadOptions::SeekMode mode;
420     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
421         int64_t pos = 0;
422 
423         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
424             // 65 bytes decode to 320 8kHz samples
425             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
426             int64_t framenumber = samplenumber / 320;
427             pos = framenumber * 65;
428         } else {
429             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
430         }
431         if (pos > (off64_t)mSize) {
432             pos = mSize;
433         }
434         mCurrentPos = pos + mOffset;
435     }
436 
437     MediaBuffer *buffer;
438     status_t err = mGroup->acquire_buffer(&buffer);
439     if (err != OK) {
440         return err;
441     }
442 
443     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
444     size_t maxBytesToRead =
445         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
446         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
447 
448     size_t maxBytesAvailable =
449         (mCurrentPos - mOffset >= (off64_t)mSize)
450             ? 0 : mSize - (mCurrentPos - mOffset);
451 
452     if (maxBytesToRead > maxBytesAvailable) {
453         maxBytesToRead = maxBytesAvailable;
454     }
455 
456     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
457         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
458         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
459         if (maxBytesToRead > 1024) {
460             maxBytesToRead = 1024;
461         }
462         maxBytesToRead = (maxBytesToRead / 65) * 65;
463     } else {
464         // read only integral amounts of audio unit frames.
465         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
466         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
467     }
468 
469     ssize_t n = mDataSource->readAt(
470             mCurrentPos, buffer->data(),
471             maxBytesToRead);
472 
473     if (n <= 0) {
474         buffer->release();
475         buffer = NULL;
476 
477         return ERROR_END_OF_STREAM;
478     }
479 
480     buffer->set_range(0, n);
481 
482     // TODO: add capability to return data as float PCM instead of 16 bit PCM.
483     if (mWaveFormat == WAVE_FORMAT_PCM) {
484         if (mBitsPerSample == 8) {
485             // Convert 8-bit unsigned samples to 16-bit signed.
486 
487             // Create new buffer with 2 byte wide samples
488             MediaBuffer *tmp;
489             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
490             tmp->set_range(0, 2 * n);
491 
492             memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
493             buffer->release();
494             buffer = tmp;
495         } else if (mBitsPerSample == 24) {
496             // Convert 24-bit signed samples to 16-bit signed in place
497             const size_t numSamples = n / 3;
498 
499             memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
500             buffer->set_range(0, 2 * numSamples);
501         }  else if (mBitsPerSample == 32) {
502             // Convert 32-bit signed samples to 16-bit signed in place
503             const size_t numSamples = n / 4;
504 
505             memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
506             buffer->set_range(0, 2 * numSamples);
507         }
508     } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
509         if (mBitsPerSample == 32) {
510             // Convert 32-bit float samples to 16-bit signed in place
511             const size_t numSamples = n / 4;
512 
513             memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
514             buffer->set_range(0, 2 * numSamples);
515         }
516     }
517 
518     int64_t timeStampUs = 0;
519 
520     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
521         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
522     } else {
523         size_t bytesPerSample = mBitsPerSample >> 3;
524         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
525                 / (mNumChannels * bytesPerSample) / mSampleRate;
526     }
527 
528     buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
529 
530     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
531     mCurrentPos += n;
532 
533     *out = buffer;
534 
535     return OK;
536 }
537 
538 ////////////////////////////////////////////////////////////////////////////////
539 
SniffWAV(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> *)540 bool SniffWAV(
541         const sp<DataSource> &source, String8 *mimeType, float *confidence,
542         sp<AMessage> *) {
543     char header[12];
544     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
545         return false;
546     }
547 
548     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
549         return false;
550     }
551 
552     sp<MediaExtractor> extractor = new WAVExtractor(source);
553     if (extractor->countTracks() == 0) {
554         return false;
555     }
556 
557     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
558     *confidence = 0.3f;
559 
560     return true;
561 }
562 
563 }  // namespace android
564