1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20
21 #include "WAVExtractor.h"
22
23 #include <audio_utils/primitives.h>
24 #include <media/DataSourceBase.h>
25 #include <media/MediaTrack.h>
26 #include <media/stagefright/foundation/ADebug.h>
27 #include <media/stagefright/MediaBufferGroup.h>
28 #include <media/stagefright/MediaDefs.h>
29 #include <media/stagefright/MediaErrors.h>
30 #include <media/stagefright/MetaData.h>
31 #include <utils/String8.h>
32 #include <cutils/bitops.h>
33
34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35
36 namespace android {
37
38 enum {
39 WAVE_FORMAT_PCM = 0x0001,
40 WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41 WAVE_FORMAT_ALAW = 0x0006,
42 WAVE_FORMAT_MULAW = 0x0007,
43 WAVE_FORMAT_MSGSM = 0x0031,
44 WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45 };
46
47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48 static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
49
U32_LE_AT(const uint8_t * ptr)50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
51 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52 }
53
U16_LE_AT(const uint8_t * ptr)54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
55 return ptr[1] << 8 | ptr[0];
56 }
57
58 struct WAVSource : public MediaTrack {
59 WAVSource(
60 DataSourceBase *dataSource,
61 MetaDataBase &meta,
62 uint16_t waveFormat,
63 int32_t bitsPerSample,
64 off64_t offset, size_t size);
65
66 virtual status_t start(MetaDataBase *params = NULL);
67 virtual status_t stop();
68 virtual status_t getFormat(MetaDataBase &meta);
69
70 virtual status_t read(
71 MediaBufferBase **buffer, const ReadOptions *options = NULL);
72
supportNonblockingReadandroid::WAVSource73 virtual bool supportNonblockingRead() { return true; }
74
75 protected:
76 virtual ~WAVSource();
77
78 private:
79 static const size_t kMaxFrameSize;
80
81 DataSourceBase *mDataSource;
82 MetaDataBase &mMeta;
83 uint16_t mWaveFormat;
84 int32_t mSampleRate;
85 int32_t mNumChannels;
86 int32_t mBitsPerSample;
87 off64_t mOffset;
88 size_t mSize;
89 bool mStarted;
90 MediaBufferGroup *mGroup;
91 off64_t mCurrentPos;
92
93 WAVSource(const WAVSource &);
94 WAVSource &operator=(const WAVSource &);
95 };
96
WAVExtractor(DataSourceBase * source)97 WAVExtractor::WAVExtractor(DataSourceBase *source)
98 : mDataSource(source),
99 mValidFormat(false),
100 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
101 mInitCheck = init();
102 }
103
~WAVExtractor()104 WAVExtractor::~WAVExtractor() {
105 }
106
getMetaData(MetaDataBase & meta)107 status_t WAVExtractor::getMetaData(MetaDataBase &meta) {
108 meta.clear();
109 if (mInitCheck == OK) {
110 meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111 }
112
113 return OK;
114 }
115
countTracks()116 size_t WAVExtractor::countTracks() {
117 return mInitCheck == OK ? 1 : 0;
118 }
119
getTrack(size_t index)120 MediaTrack *WAVExtractor::getTrack(size_t index) {
121 if (mInitCheck != OK || index > 0) {
122 return NULL;
123 }
124
125 return new WAVSource(
126 mDataSource, mTrackMeta,
127 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
128 }
129
getTrackMetaData(MetaDataBase & meta,size_t index,uint32_t)130 status_t WAVExtractor::getTrackMetaData(
131 MetaDataBase &meta,
132 size_t index, uint32_t /* flags */) {
133 if (mInitCheck != OK || index > 0) {
134 return UNKNOWN_ERROR;
135 }
136
137 meta = mTrackMeta;
138 return OK;
139 }
140
init()141 status_t WAVExtractor::init() {
142 uint8_t header[12];
143 if (mDataSource->readAt(
144 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
145 return NO_INIT;
146 }
147
148 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
149 return NO_INIT;
150 }
151
152 size_t totalSize = U32_LE_AT(&header[4]);
153
154 off64_t offset = 12;
155 size_t remainingSize = totalSize;
156 while (remainingSize >= 8) {
157 uint8_t chunkHeader[8];
158 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
159 return NO_INIT;
160 }
161
162 remainingSize -= 8;
163 offset += 8;
164
165 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
166
167 if (chunkSize > remainingSize) {
168 return NO_INIT;
169 }
170
171 if (!memcmp(chunkHeader, "fmt ", 4)) {
172 if (chunkSize < 16) {
173 return NO_INIT;
174 }
175
176 uint8_t formatSpec[40];
177 if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
178 return NO_INIT;
179 }
180
181 mWaveFormat = U16_LE_AT(formatSpec);
182 if (mWaveFormat != WAVE_FORMAT_PCM
183 && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
184 && mWaveFormat != WAVE_FORMAT_ALAW
185 && mWaveFormat != WAVE_FORMAT_MULAW
186 && mWaveFormat != WAVE_FORMAT_MSGSM
187 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
188 return ERROR_UNSUPPORTED;
189 }
190
191 uint8_t fmtSize = 16;
192 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
193 fmtSize = 40;
194 }
195 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
196 return NO_INIT;
197 }
198
199 mNumChannels = U16_LE_AT(&formatSpec[2]);
200
201 if (mNumChannels < 1 || mNumChannels > 8) {
202 ALOGE("Unsupported number of channels (%d)", mNumChannels);
203 return ERROR_UNSUPPORTED;
204 }
205
206 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
207 if (mNumChannels != 1 && mNumChannels != 2) {
208 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
209 mNumChannels);
210 }
211 }
212
213 mSampleRate = U32_LE_AT(&formatSpec[4]);
214
215 if (mSampleRate == 0) {
216 return ERROR_MALFORMED;
217 }
218
219 mBitsPerSample = U16_LE_AT(&formatSpec[14]);
220
221 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
222 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
223 if (validBitsPerSample != mBitsPerSample) {
224 if (validBitsPerSample != 0) {
225 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
226 validBitsPerSample, mBitsPerSample);
227 return ERROR_UNSUPPORTED;
228 } else {
229 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
230 // writers don't correctly set the valid bits value, and leave it at 0.
231 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
232 }
233 }
234
235 mChannelMask = U32_LE_AT(&formatSpec[20]);
236 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
237 if ((mChannelMask >> 18) != 0) {
238 ALOGE("invalid channel mask 0x%x", mChannelMask);
239 return ERROR_MALFORMED;
240 }
241
242 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
243 && (popcount(mChannelMask) != mNumChannels)) {
244 ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
245 popcount(mChannelMask), mChannelMask);
246 return ERROR_MALFORMED;
247 }
248
249 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
250 // the sample format, using the same definitions as a regular WAV header
251 mWaveFormat = U16_LE_AT(&formatSpec[24]);
252 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
253 memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
254 ALOGE("unsupported GUID");
255 return ERROR_UNSUPPORTED;
256 }
257 }
258
259 if (mWaveFormat == WAVE_FORMAT_PCM) {
260 if (mBitsPerSample != 8 && mBitsPerSample != 16
261 && mBitsPerSample != 24 && mBitsPerSample != 32) {
262 return ERROR_UNSUPPORTED;
263 }
264 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
265 if (mBitsPerSample != 32) { // TODO we don't support double
266 return ERROR_UNSUPPORTED;
267 }
268 }
269 else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
270 if (mBitsPerSample != 0) {
271 return ERROR_UNSUPPORTED;
272 }
273 } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
274 if (mBitsPerSample != 8) {
275 return ERROR_UNSUPPORTED;
276 }
277 } else {
278 return ERROR_UNSUPPORTED;
279 }
280
281 mValidFormat = true;
282 } else if (!memcmp(chunkHeader, "data", 4)) {
283 if (mValidFormat) {
284 mDataOffset = offset;
285 mDataSize = chunkSize;
286
287 mTrackMeta.clear();
288
289 switch (mWaveFormat) {
290 case WAVE_FORMAT_PCM:
291 case WAVE_FORMAT_IEEE_FLOAT:
292 mTrackMeta.setCString(
293 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
294 break;
295 case WAVE_FORMAT_ALAW:
296 mTrackMeta.setCString(
297 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
298 break;
299 case WAVE_FORMAT_MSGSM:
300 mTrackMeta.setCString(
301 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
302 break;
303 default:
304 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
305 mTrackMeta.setCString(
306 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
307 break;
308 }
309
310 mTrackMeta.setInt32(kKeyChannelCount, mNumChannels);
311 mTrackMeta.setInt32(kKeyChannelMask, mChannelMask);
312 mTrackMeta.setInt32(kKeySampleRate, mSampleRate);
313 mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
314
315 int64_t durationUs = 0;
316 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
317 // 65 bytes decode to 320 8kHz samples
318 durationUs =
319 1000000LL * (mDataSize / 65 * 320) / 8000;
320 } else {
321 size_t bytesPerSample = mBitsPerSample >> 3;
322
323 if (!bytesPerSample || !mNumChannels)
324 return ERROR_MALFORMED;
325
326 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
327
328 if (!mSampleRate)
329 return ERROR_MALFORMED;
330
331 durationUs =
332 1000000LL * num_samples / mSampleRate;
333 }
334
335 mTrackMeta.setInt64(kKeyDuration, durationUs);
336
337 return OK;
338 }
339 }
340
341 offset += chunkSize;
342 }
343
344 return NO_INIT;
345 }
346
347 const size_t WAVSource::kMaxFrameSize = 32768;
348
WAVSource(DataSourceBase * dataSource,MetaDataBase & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)349 WAVSource::WAVSource(
350 DataSourceBase *dataSource,
351 MetaDataBase &meta,
352 uint16_t waveFormat,
353 int32_t bitsPerSample,
354 off64_t offset, size_t size)
355 : mDataSource(dataSource),
356 mMeta(meta),
357 mWaveFormat(waveFormat),
358 mSampleRate(0),
359 mNumChannels(0),
360 mBitsPerSample(bitsPerSample),
361 mOffset(offset),
362 mSize(size),
363 mStarted(false),
364 mGroup(NULL) {
365 CHECK(mMeta.findInt32(kKeySampleRate, &mSampleRate));
366 CHECK(mMeta.findInt32(kKeyChannelCount, &mNumChannels));
367
368 mMeta.setInt32(kKeyMaxInputSize, kMaxFrameSize);
369 }
370
~WAVSource()371 WAVSource::~WAVSource() {
372 if (mStarted) {
373 stop();
374 }
375 }
376
start(MetaDataBase *)377 status_t WAVSource::start(MetaDataBase * /* params */) {
378 ALOGV("WAVSource::start");
379
380 CHECK(!mStarted);
381
382 // some WAV files may have large audio buffers that use shared memory transfer.
383 mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
384
385 if (mBitsPerSample == 8) {
386 // As a temporary buffer for 8->16 bit conversion.
387 mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize));
388 }
389
390 mCurrentPos = mOffset;
391
392 mStarted = true;
393
394 return OK;
395 }
396
stop()397 status_t WAVSource::stop() {
398 ALOGV("WAVSource::stop");
399
400 CHECK(mStarted);
401
402 delete mGroup;
403 mGroup = NULL;
404
405 mStarted = false;
406
407 return OK;
408 }
409
getFormat(MetaDataBase & meta)410 status_t WAVSource::getFormat(MetaDataBase &meta) {
411 ALOGV("WAVSource::getFormat");
412
413 meta = mMeta;
414 return OK;
415 }
416
read(MediaBufferBase ** out,const ReadOptions * options)417 status_t WAVSource::read(
418 MediaBufferBase **out, const ReadOptions *options) {
419 *out = NULL;
420
421 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
422 return WOULD_BLOCK;
423 }
424
425 int64_t seekTimeUs;
426 ReadOptions::SeekMode mode;
427 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
428 int64_t pos = 0;
429
430 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
431 // 65 bytes decode to 320 8kHz samples
432 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
433 int64_t framenumber = samplenumber / 320;
434 pos = framenumber * 65;
435 } else {
436 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
437 }
438 if (pos > (off64_t)mSize) {
439 pos = mSize;
440 }
441 mCurrentPos = pos + mOffset;
442 }
443
444 MediaBufferBase *buffer;
445 status_t err = mGroup->acquire_buffer(&buffer);
446 if (err != OK) {
447 return err;
448 }
449
450 // make sure that maxBytesToRead is multiple of 3, in 24-bit case
451 size_t maxBytesToRead =
452 mBitsPerSample == 8 ? kMaxFrameSize / 2 :
453 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
454
455 size_t maxBytesAvailable =
456 (mCurrentPos - mOffset >= (off64_t)mSize)
457 ? 0 : mSize - (mCurrentPos - mOffset);
458
459 if (maxBytesToRead > maxBytesAvailable) {
460 maxBytesToRead = maxBytesAvailable;
461 }
462
463 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
464 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
465 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
466 if (maxBytesToRead > 1024) {
467 maxBytesToRead = 1024;
468 }
469 maxBytesToRead = (maxBytesToRead / 65) * 65;
470 } else {
471 // read only integral amounts of audio unit frames.
472 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
473 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
474 }
475
476 ssize_t n = mDataSource->readAt(
477 mCurrentPos, buffer->data(),
478 maxBytesToRead);
479
480 if (n <= 0) {
481 buffer->release();
482 buffer = NULL;
483
484 return ERROR_END_OF_STREAM;
485 }
486
487 buffer->set_range(0, n);
488
489 // TODO: add capability to return data as float PCM instead of 16 bit PCM.
490 if (mWaveFormat == WAVE_FORMAT_PCM) {
491 if (mBitsPerSample == 8) {
492 // Convert 8-bit unsigned samples to 16-bit signed.
493
494 // Create new buffer with 2 byte wide samples
495 MediaBufferBase *tmp;
496 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
497 tmp->set_range(0, 2 * n);
498
499 memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
500 buffer->release();
501 buffer = tmp;
502 } else if (mBitsPerSample == 24) {
503 // Convert 24-bit signed samples to 16-bit signed in place
504 const size_t numSamples = n / 3;
505
506 memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
507 buffer->set_range(0, 2 * numSamples);
508 } else if (mBitsPerSample == 32) {
509 // Convert 32-bit signed samples to 16-bit signed in place
510 const size_t numSamples = n / 4;
511
512 memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
513 buffer->set_range(0, 2 * numSamples);
514 }
515 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
516 if (mBitsPerSample == 32) {
517 // Convert 32-bit float samples to 16-bit signed in place
518 const size_t numSamples = n / 4;
519
520 memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
521 buffer->set_range(0, 2 * numSamples);
522 }
523 }
524
525 int64_t timeStampUs = 0;
526
527 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
528 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
529 } else {
530 size_t bytesPerSample = mBitsPerSample >> 3;
531 timeStampUs = 1000000LL * (mCurrentPos - mOffset)
532 / (mNumChannels * bytesPerSample) / mSampleRate;
533 }
534
535 buffer->meta_data().setInt64(kKeyTime, timeStampUs);
536
537 buffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
538 mCurrentPos += n;
539
540 *out = buffer;
541
542 return OK;
543 }
544
545 ////////////////////////////////////////////////////////////////////////////////
546
CreateExtractor(DataSourceBase * source,void *)547 static MediaExtractor* CreateExtractor(
548 DataSourceBase *source,
549 void *) {
550 return new WAVExtractor(source);
551 }
552
Sniff(DataSourceBase * source,float * confidence,void **,MediaExtractor::FreeMetaFunc *)553 static MediaExtractor::CreatorFunc Sniff(
554 DataSourceBase *source,
555 float *confidence,
556 void **,
557 MediaExtractor::FreeMetaFunc *) {
558 char header[12];
559 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
560 return NULL;
561 }
562
563 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
564 return NULL;
565 }
566
567 MediaExtractor *extractor = new WAVExtractor(source);
568 int numTracks = extractor->countTracks();
569 delete extractor;
570 if (numTracks == 0) {
571 return NULL;
572 }
573
574 *confidence = 0.3f;
575
576 return CreateExtractor;
577 }
578
579 extern "C" {
580 // This is the only symbol that needs to be exported
581 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()582 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
583 return {
584 MediaExtractor::EXTRACTORDEF_VERSION,
585 UUID("7d613858-5837-4a38-84c5-332d1cddee27"),
586 1, // version
587 "WAV Extractor",
588 Sniff
589 };
590 }
591
592 } // extern "C"
593
594 } // namespace android
595