1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20 
21 #include "WAVExtractor.h"
22 
23 #include <audio_utils/primitives.h>
24 #include <media/DataSourceBase.h>
25 #include <media/MediaTrack.h>
26 #include <media/stagefright/foundation/ADebug.h>
27 #include <media/stagefright/MediaBufferGroup.h>
28 #include <media/stagefright/MediaDefs.h>
29 #include <media/stagefright/MediaErrors.h>
30 #include <media/stagefright/MetaData.h>
31 #include <utils/String8.h>
32 #include <cutils/bitops.h>
33 
34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35 
36 namespace android {
37 
38 enum {
39     WAVE_FORMAT_PCM        = 0x0001,
40     WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41     WAVE_FORMAT_ALAW       = 0x0006,
42     WAVE_FORMAT_MULAW      = 0x0007,
43     WAVE_FORMAT_MSGSM      = 0x0031,
44     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45 };
46 
47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48 static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
49 
U32_LE_AT(const uint8_t * ptr)50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
51     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52 }
53 
U16_LE_AT(const uint8_t * ptr)54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
55     return ptr[1] << 8 | ptr[0];
56 }
57 
58 struct WAVSource : public MediaTrack {
59     WAVSource(
60             DataSourceBase *dataSource,
61             MetaDataBase &meta,
62             uint16_t waveFormat,
63             int32_t bitsPerSample,
64             off64_t offset, size_t size);
65 
66     virtual status_t start(MetaDataBase *params = NULL);
67     virtual status_t stop();
68     virtual status_t getFormat(MetaDataBase &meta);
69 
70     virtual status_t read(
71             MediaBufferBase **buffer, const ReadOptions *options = NULL);
72 
supportNonblockingReadandroid::WAVSource73     virtual bool supportNonblockingRead() { return true; }
74 
75 protected:
76     virtual ~WAVSource();
77 
78 private:
79     static const size_t kMaxFrameSize;
80 
81     DataSourceBase *mDataSource;
82     MetaDataBase &mMeta;
83     uint16_t mWaveFormat;
84     int32_t mSampleRate;
85     int32_t mNumChannels;
86     int32_t mBitsPerSample;
87     off64_t mOffset;
88     size_t mSize;
89     bool mStarted;
90     MediaBufferGroup *mGroup;
91     off64_t mCurrentPos;
92 
93     WAVSource(const WAVSource &);
94     WAVSource &operator=(const WAVSource &);
95 };
96 
WAVExtractor(DataSourceBase * source)97 WAVExtractor::WAVExtractor(DataSourceBase *source)
98     : mDataSource(source),
99       mValidFormat(false),
100       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
101     mInitCheck = init();
102 }
103 
~WAVExtractor()104 WAVExtractor::~WAVExtractor() {
105 }
106 
getMetaData(MetaDataBase & meta)107 status_t WAVExtractor::getMetaData(MetaDataBase &meta) {
108     meta.clear();
109     if (mInitCheck == OK) {
110         meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111     }
112 
113     return OK;
114 }
115 
countTracks()116 size_t WAVExtractor::countTracks() {
117     return mInitCheck == OK ? 1 : 0;
118 }
119 
getTrack(size_t index)120 MediaTrack *WAVExtractor::getTrack(size_t index) {
121     if (mInitCheck != OK || index > 0) {
122         return NULL;
123     }
124 
125     return new WAVSource(
126             mDataSource, mTrackMeta,
127             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
128 }
129 
getTrackMetaData(MetaDataBase & meta,size_t index,uint32_t)130 status_t WAVExtractor::getTrackMetaData(
131         MetaDataBase &meta,
132         size_t index, uint32_t /* flags */) {
133     if (mInitCheck != OK || index > 0) {
134         return UNKNOWN_ERROR;
135     }
136 
137     meta = mTrackMeta;
138     return OK;
139 }
140 
init()141 status_t WAVExtractor::init() {
142     uint8_t header[12];
143     if (mDataSource->readAt(
144                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
145         return NO_INIT;
146     }
147 
148     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
149         return NO_INIT;
150     }
151 
152     size_t totalSize = U32_LE_AT(&header[4]);
153 
154     off64_t offset = 12;
155     size_t remainingSize = totalSize;
156     while (remainingSize >= 8) {
157         uint8_t chunkHeader[8];
158         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
159             return NO_INIT;
160         }
161 
162         remainingSize -= 8;
163         offset += 8;
164 
165         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
166 
167         if (chunkSize > remainingSize) {
168             return NO_INIT;
169         }
170 
171         if (!memcmp(chunkHeader, "fmt ", 4)) {
172             if (chunkSize < 16) {
173                 return NO_INIT;
174             }
175 
176             uint8_t formatSpec[40];
177             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
178                 return NO_INIT;
179             }
180 
181             mWaveFormat = U16_LE_AT(formatSpec);
182             if (mWaveFormat != WAVE_FORMAT_PCM
183                     && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
184                     && mWaveFormat != WAVE_FORMAT_ALAW
185                     && mWaveFormat != WAVE_FORMAT_MULAW
186                     && mWaveFormat != WAVE_FORMAT_MSGSM
187                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
188                 return ERROR_UNSUPPORTED;
189             }
190 
191             uint8_t fmtSize = 16;
192             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
193                 fmtSize = 40;
194             }
195             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
196                 return NO_INIT;
197             }
198 
199             mNumChannels = U16_LE_AT(&formatSpec[2]);
200 
201             if (mNumChannels < 1 || mNumChannels > 8) {
202                 ALOGE("Unsupported number of channels (%d)", mNumChannels);
203                 return ERROR_UNSUPPORTED;
204             }
205 
206             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
207                 if (mNumChannels != 1 && mNumChannels != 2) {
208                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
209                             mNumChannels);
210                 }
211             }
212 
213             mSampleRate = U32_LE_AT(&formatSpec[4]);
214 
215             if (mSampleRate == 0) {
216                 return ERROR_MALFORMED;
217             }
218 
219             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
220 
221             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
222                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
223                 if (validBitsPerSample != mBitsPerSample) {
224                     if (validBitsPerSample != 0) {
225                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
226                                 validBitsPerSample, mBitsPerSample);
227                         return ERROR_UNSUPPORTED;
228                     } else {
229                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
230                         // writers don't correctly set the valid bits value, and leave it at 0.
231                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
232                     }
233                 }
234 
235                 mChannelMask = U32_LE_AT(&formatSpec[20]);
236                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
237                 if ((mChannelMask >> 18) != 0) {
238                     ALOGE("invalid channel mask 0x%x", mChannelMask);
239                     return ERROR_MALFORMED;
240                 }
241 
242                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
243                         && (popcount(mChannelMask) != mNumChannels)) {
244                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
245                             popcount(mChannelMask), mChannelMask);
246                     return ERROR_MALFORMED;
247                 }
248 
249                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
250                 // the sample format, using the same definitions as a regular WAV header
251                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
252                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
253                     memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
254                     ALOGE("unsupported GUID");
255                     return ERROR_UNSUPPORTED;
256                 }
257             }
258 
259             if (mWaveFormat == WAVE_FORMAT_PCM) {
260                 if (mBitsPerSample != 8 && mBitsPerSample != 16
261                     && mBitsPerSample != 24 && mBitsPerSample != 32) {
262                     return ERROR_UNSUPPORTED;
263                 }
264             } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
265                 if (mBitsPerSample != 32) {  // TODO we don't support double
266                     return ERROR_UNSUPPORTED;
267                 }
268             }
269             else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
270                 if (mBitsPerSample != 0) {
271                     return ERROR_UNSUPPORTED;
272                 }
273             } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
274                 if (mBitsPerSample != 8) {
275                     return ERROR_UNSUPPORTED;
276                 }
277             } else {
278                 return ERROR_UNSUPPORTED;
279             }
280 
281             mValidFormat = true;
282         } else if (!memcmp(chunkHeader, "data", 4)) {
283             if (mValidFormat) {
284                 mDataOffset = offset;
285                 mDataSize = chunkSize;
286 
287                 mTrackMeta.clear();
288 
289                 switch (mWaveFormat) {
290                     case WAVE_FORMAT_PCM:
291                     case WAVE_FORMAT_IEEE_FLOAT:
292                         mTrackMeta.setCString(
293                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
294                         break;
295                     case WAVE_FORMAT_ALAW:
296                         mTrackMeta.setCString(
297                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
298                         break;
299                     case WAVE_FORMAT_MSGSM:
300                         mTrackMeta.setCString(
301                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
302                         break;
303                     default:
304                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
305                         mTrackMeta.setCString(
306                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
307                         break;
308                 }
309 
310                 mTrackMeta.setInt32(kKeyChannelCount, mNumChannels);
311                 mTrackMeta.setInt32(kKeyChannelMask, mChannelMask);
312                 mTrackMeta.setInt32(kKeySampleRate, mSampleRate);
313                 mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
314 
315                 int64_t durationUs = 0;
316                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
317                     // 65 bytes decode to 320 8kHz samples
318                     durationUs =
319                         1000000LL * (mDataSize / 65 * 320) / 8000;
320                 } else {
321                     size_t bytesPerSample = mBitsPerSample >> 3;
322 
323                     if (!bytesPerSample || !mNumChannels)
324                         return ERROR_MALFORMED;
325 
326                     size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
327 
328                     if (!mSampleRate)
329                         return ERROR_MALFORMED;
330 
331                     durationUs =
332                         1000000LL * num_samples / mSampleRate;
333                 }
334 
335                 mTrackMeta.setInt64(kKeyDuration, durationUs);
336 
337                 return OK;
338             }
339         }
340 
341         offset += chunkSize;
342     }
343 
344     return NO_INIT;
345 }
346 
347 const size_t WAVSource::kMaxFrameSize = 32768;
348 
WAVSource(DataSourceBase * dataSource,MetaDataBase & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)349 WAVSource::WAVSource(
350         DataSourceBase *dataSource,
351         MetaDataBase &meta,
352         uint16_t waveFormat,
353         int32_t bitsPerSample,
354         off64_t offset, size_t size)
355     : mDataSource(dataSource),
356       mMeta(meta),
357       mWaveFormat(waveFormat),
358       mSampleRate(0),
359       mNumChannels(0),
360       mBitsPerSample(bitsPerSample),
361       mOffset(offset),
362       mSize(size),
363       mStarted(false),
364       mGroup(NULL) {
365     CHECK(mMeta.findInt32(kKeySampleRate, &mSampleRate));
366     CHECK(mMeta.findInt32(kKeyChannelCount, &mNumChannels));
367 
368     mMeta.setInt32(kKeyMaxInputSize, kMaxFrameSize);
369 }
370 
~WAVSource()371 WAVSource::~WAVSource() {
372     if (mStarted) {
373         stop();
374     }
375 }
376 
start(MetaDataBase *)377 status_t WAVSource::start(MetaDataBase * /* params */) {
378     ALOGV("WAVSource::start");
379 
380     CHECK(!mStarted);
381 
382     // some WAV files may have large audio buffers that use shared memory transfer.
383     mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
384 
385     if (mBitsPerSample == 8) {
386         // As a temporary buffer for 8->16 bit conversion.
387         mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize));
388     }
389 
390     mCurrentPos = mOffset;
391 
392     mStarted = true;
393 
394     return OK;
395 }
396 
stop()397 status_t WAVSource::stop() {
398     ALOGV("WAVSource::stop");
399 
400     CHECK(mStarted);
401 
402     delete mGroup;
403     mGroup = NULL;
404 
405     mStarted = false;
406 
407     return OK;
408 }
409 
getFormat(MetaDataBase & meta)410 status_t WAVSource::getFormat(MetaDataBase &meta) {
411     ALOGV("WAVSource::getFormat");
412 
413     meta = mMeta;
414     return OK;
415 }
416 
read(MediaBufferBase ** out,const ReadOptions * options)417 status_t WAVSource::read(
418         MediaBufferBase **out, const ReadOptions *options) {
419     *out = NULL;
420 
421     if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
422         return WOULD_BLOCK;
423     }
424 
425     int64_t seekTimeUs;
426     ReadOptions::SeekMode mode;
427     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
428         int64_t pos = 0;
429 
430         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
431             // 65 bytes decode to 320 8kHz samples
432             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
433             int64_t framenumber = samplenumber / 320;
434             pos = framenumber * 65;
435         } else {
436             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
437         }
438         if (pos > (off64_t)mSize) {
439             pos = mSize;
440         }
441         mCurrentPos = pos + mOffset;
442     }
443 
444     MediaBufferBase *buffer;
445     status_t err = mGroup->acquire_buffer(&buffer);
446     if (err != OK) {
447         return err;
448     }
449 
450     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
451     size_t maxBytesToRead =
452         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
453         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
454 
455     size_t maxBytesAvailable =
456         (mCurrentPos - mOffset >= (off64_t)mSize)
457             ? 0 : mSize - (mCurrentPos - mOffset);
458 
459     if (maxBytesToRead > maxBytesAvailable) {
460         maxBytesToRead = maxBytesAvailable;
461     }
462 
463     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
464         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
465         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
466         if (maxBytesToRead > 1024) {
467             maxBytesToRead = 1024;
468         }
469         maxBytesToRead = (maxBytesToRead / 65) * 65;
470     } else {
471         // read only integral amounts of audio unit frames.
472         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
473         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
474     }
475 
476     ssize_t n = mDataSource->readAt(
477             mCurrentPos, buffer->data(),
478             maxBytesToRead);
479 
480     if (n <= 0) {
481         buffer->release();
482         buffer = NULL;
483 
484         return ERROR_END_OF_STREAM;
485     }
486 
487     buffer->set_range(0, n);
488 
489     // TODO: add capability to return data as float PCM instead of 16 bit PCM.
490     if (mWaveFormat == WAVE_FORMAT_PCM) {
491         if (mBitsPerSample == 8) {
492             // Convert 8-bit unsigned samples to 16-bit signed.
493 
494             // Create new buffer with 2 byte wide samples
495             MediaBufferBase *tmp;
496             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
497             tmp->set_range(0, 2 * n);
498 
499             memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
500             buffer->release();
501             buffer = tmp;
502         } else if (mBitsPerSample == 24) {
503             // Convert 24-bit signed samples to 16-bit signed in place
504             const size_t numSamples = n / 3;
505 
506             memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
507             buffer->set_range(0, 2 * numSamples);
508         }  else if (mBitsPerSample == 32) {
509             // Convert 32-bit signed samples to 16-bit signed in place
510             const size_t numSamples = n / 4;
511 
512             memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
513             buffer->set_range(0, 2 * numSamples);
514         }
515     } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
516         if (mBitsPerSample == 32) {
517             // Convert 32-bit float samples to 16-bit signed in place
518             const size_t numSamples = n / 4;
519 
520             memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
521             buffer->set_range(0, 2 * numSamples);
522         }
523     }
524 
525     int64_t timeStampUs = 0;
526 
527     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
528         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
529     } else {
530         size_t bytesPerSample = mBitsPerSample >> 3;
531         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
532                 / (mNumChannels * bytesPerSample) / mSampleRate;
533     }
534 
535     buffer->meta_data().setInt64(kKeyTime, timeStampUs);
536 
537     buffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
538     mCurrentPos += n;
539 
540     *out = buffer;
541 
542     return OK;
543 }
544 
545 ////////////////////////////////////////////////////////////////////////////////
546 
CreateExtractor(DataSourceBase * source,void *)547 static MediaExtractor* CreateExtractor(
548         DataSourceBase *source,
549         void *) {
550     return new WAVExtractor(source);
551 }
552 
Sniff(DataSourceBase * source,float * confidence,void **,MediaExtractor::FreeMetaFunc *)553 static MediaExtractor::CreatorFunc Sniff(
554         DataSourceBase *source,
555         float *confidence,
556         void **,
557         MediaExtractor::FreeMetaFunc *) {
558     char header[12];
559     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
560         return NULL;
561     }
562 
563     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
564         return NULL;
565     }
566 
567     MediaExtractor *extractor = new WAVExtractor(source);
568     int numTracks = extractor->countTracks();
569     delete extractor;
570     if (numTracks == 0) {
571         return NULL;
572     }
573 
574     *confidence = 0.3f;
575 
576     return CreateExtractor;
577 }
578 
579 extern "C" {
580 // This is the only symbol that needs to be exported
581 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()582 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
583     return {
584         MediaExtractor::EXTRACTORDEF_VERSION,
585         UUID("7d613858-5837-4a38-84c5-332d1cddee27"),
586         1, // version
587         "WAV Extractor",
588         Sniff
589     };
590 }
591 
592 } // extern "C"
593 
594 } // namespace android
595