/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //#define LOG_NDEBUG 0 #define LOG_TAG "WAVExtractor" #include #include "WAVExtractor.h" #include #include // for AIBinder_getCallingUid #include #include #include #include #include #include // for AID_MEDIA #include #include #include #define CHANNEL_MASK_USE_CHANNEL_ORDER 0 // NOTE: This code assumes the device processor is little endian. namespace android { // MediaServer is capable of handling float extractor output, but general processes // may not be able to do so. // TODO: Improve API to set extractor float output. // (Note: duplicated with FLACExtractor.cpp) static inline bool shouldExtractorOutputFloat(int bitsPerSample) { return bitsPerSample > 16 && AIBinder_getCallingUid() == AID_MEDIA && android::base::GetBoolProperty("media.extractor.float", true); } enum { WAVE_FORMAT_PCM = 0x0001, WAVE_FORMAT_IEEE_FLOAT = 0x0003, WAVE_FORMAT_ALAW = 0x0006, WAVE_FORMAT_MULAW = 0x0007, WAVE_FORMAT_MSGSM = 0x0031, WAVE_FORMAT_EXTENSIBLE = 0xFFFE }; static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00"; static uint32_t U32_LE_AT(const uint8_t *ptr) { return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; } static uint16_t U16_LE_AT(const uint8_t *ptr) { return ptr[1] << 8 | ptr[0]; } struct WAVSource : public MediaTrackHelper { WAVSource( DataSourceHelper *dataSource, AMediaFormat *meta, uint16_t waveFormat, bool outputFloat, off64_t offset, size_t size); virtual media_status_t start(); virtual media_status_t stop(); virtual media_status_t getFormat(AMediaFormat *meta); virtual media_status_t read( MediaBufferHelper **buffer, const ReadOptions *options = NULL); bool supportsNonBlockingRead() override { return false; } protected: virtual ~WAVSource(); private: static const size_t kMaxFrameSize; DataSourceHelper *mDataSource; AMediaFormat *mMeta; uint16_t mWaveFormat; const bool mOutputFloat; int32_t mSampleRate; int32_t mNumChannels; int32_t mBitsPerSample; off64_t mOffset; size_t mSize; bool mStarted; off64_t mCurrentPos; WAVSource(const WAVSource &); WAVSource &operator=(const WAVSource &); }; WAVExtractor::WAVExtractor(DataSourceHelper *source) : mDataSource(source), mValidFormat(false), mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { mTrackMeta = AMediaFormat_new(); mInitCheck = init(); } WAVExtractor::~WAVExtractor() { delete mDataSource; AMediaFormat_delete(mTrackMeta); } media_status_t WAVExtractor::getMetaData(AMediaFormat *meta) { AMediaFormat_clear(meta); if (mInitCheck == OK) { AMediaFormat_setString(meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_WAV); } return AMEDIA_OK; } size_t WAVExtractor::countTracks() { return mInitCheck == OK ? 1 : 0; } MediaTrackHelper *WAVExtractor::getTrack(size_t index) { if (mInitCheck != OK || index > 0) { return NULL; } return new WAVSource( mDataSource, mTrackMeta, mWaveFormat, shouldExtractorOutputFloat(mBitsPerSample), mDataOffset, mDataSize); } media_status_t WAVExtractor::getTrackMetaData( AMediaFormat *meta, size_t index, uint32_t /* flags */) { if (mInitCheck != OK || index > 0) { return AMEDIA_ERROR_UNKNOWN; } const media_status_t status = AMediaFormat_copy(meta, mTrackMeta); if (status == OK) { AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_PCM_ENCODING, shouldExtractorOutputFloat(mBitsPerSample) ? kAudioEncodingPcmFloat : kAudioEncodingPcm16bit); } return status; } status_t WAVExtractor::init() { uint8_t header[12]; if (mDataSource->readAt( 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { return NO_INIT; } if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { return NO_INIT; } size_t totalSize = U32_LE_AT(&header[4]); off64_t offset = 12; size_t remainingSize = totalSize; while (remainingSize >= 8) { uint8_t chunkHeader[8]; if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { return NO_INIT; } remainingSize -= 8; offset += 8; uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); if (chunkSize > remainingSize) { return NO_INIT; } if (!memcmp(chunkHeader, "fmt ", 4)) { if (chunkSize < 16) { return NO_INIT; } uint8_t formatSpec[40]; if (mDataSource->readAt(offset, formatSpec, 2) < 2) { return NO_INIT; } mWaveFormat = U16_LE_AT(formatSpec); if (mWaveFormat != WAVE_FORMAT_PCM && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT && mWaveFormat != WAVE_FORMAT_ALAW && mWaveFormat != WAVE_FORMAT_MULAW && mWaveFormat != WAVE_FORMAT_MSGSM && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { return AMEDIA_ERROR_UNSUPPORTED; } uint8_t fmtSize = 16; if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { fmtSize = 40; } if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { return NO_INIT; } mNumChannels = U16_LE_AT(&formatSpec[2]); if (mNumChannels < 1 || mNumChannels > FCC_8) { ALOGE("Unsupported number of channels (%d)", mNumChannels); return AMEDIA_ERROR_UNSUPPORTED; } if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { if (mNumChannels != 1 && mNumChannels != FCC_2) { ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", mNumChannels); } } mSampleRate = U32_LE_AT(&formatSpec[4]); if (mSampleRate == 0) { return ERROR_MALFORMED; } mBitsPerSample = U16_LE_AT(&formatSpec[14]); if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); if (validBitsPerSample != mBitsPerSample) { if (validBitsPerSample != 0) { ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", validBitsPerSample, mBitsPerSample); return AMEDIA_ERROR_UNSUPPORTED; } else { // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT // writers don't correctly set the valid bits value, and leave it at 0. ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); } } mChannelMask = U32_LE_AT(&formatSpec[20]); ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); if ((mChannelMask >> 18) != 0) { ALOGE("invalid channel mask 0x%x", mChannelMask); return ERROR_MALFORMED; } if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) && (popcount(mChannelMask) != mNumChannels)) { ALOGE("invalid number of channels (%d) in channel mask (0x%x)", popcount(mChannelMask), mChannelMask); return ERROR_MALFORMED; } // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain // the sample format, using the same definitions as a regular WAV header mWaveFormat = U16_LE_AT(&formatSpec[24]); if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) && memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) { ALOGE("unsupported GUID"); return ERROR_UNSUPPORTED; } } if (mWaveFormat == WAVE_FORMAT_PCM) { if (mBitsPerSample != 8 && mBitsPerSample != 16 && mBitsPerSample != 24 && mBitsPerSample != 32) { return ERROR_UNSUPPORTED; } } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { if (mBitsPerSample != 32) { // TODO we don't support double return ERROR_UNSUPPORTED; } } else if (mWaveFormat == WAVE_FORMAT_MSGSM) { if (mBitsPerSample != 0) { return ERROR_UNSUPPORTED; } } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) { if (mBitsPerSample != 8) { return ERROR_UNSUPPORTED; } } else { return ERROR_UNSUPPORTED; } mValidFormat = true; } else if (!memcmp(chunkHeader, "data", 4)) { if (mValidFormat) { mDataOffset = offset; mDataSize = chunkSize; AMediaFormat_clear(mTrackMeta); switch (mWaveFormat) { case WAVE_FORMAT_PCM: case WAVE_FORMAT_IEEE_FLOAT: AMediaFormat_setString(mTrackMeta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_RAW); break; case WAVE_FORMAT_ALAW: AMediaFormat_setString(mTrackMeta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_G711_ALAW); break; case WAVE_FORMAT_MSGSM: AMediaFormat_setString(mTrackMeta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MSGSM); break; default: CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); AMediaFormat_setString(mTrackMeta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_G711_MLAW); break; } AMediaFormat_setInt32(mTrackMeta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, mNumChannels); AMediaFormat_setInt32(mTrackMeta, AMEDIAFORMAT_KEY_CHANNEL_MASK, mChannelMask); AMediaFormat_setInt32(mTrackMeta, AMEDIAFORMAT_KEY_SAMPLE_RATE, mSampleRate); AMediaFormat_setInt32(mTrackMeta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, mBitsPerSample); int64_t durationUs = 0; if (mWaveFormat == WAVE_FORMAT_MSGSM) { // 65 bytes decode to 320 8kHz samples durationUs = 1000000LL * (mDataSize / 65 * 320) / 8000; } else { size_t bytesPerSample = mBitsPerSample >> 3; if (!bytesPerSample || !mNumChannels) return AMEDIA_ERROR_MALFORMED; size_t num_samples = mDataSize / (mNumChannels * bytesPerSample); if (!mSampleRate) return AMEDIA_ERROR_MALFORMED; durationUs = 1000000LL * num_samples / mSampleRate; } AMediaFormat_setInt64(mTrackMeta, AMEDIAFORMAT_KEY_DURATION, durationUs); return OK; } } offset += chunkSize; } return NO_INIT; } const size_t WAVSource::kMaxFrameSize = 32768; WAVSource::WAVSource( DataSourceHelper *dataSource, AMediaFormat *meta, uint16_t waveFormat, bool outputFloat, off64_t offset, size_t size) : mDataSource(dataSource), mMeta(meta), mWaveFormat(waveFormat), mOutputFloat(outputFloat), mOffset(offset), mSize(size), mStarted(false) { CHECK(AMediaFormat_getInt32(mMeta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &mSampleRate)); CHECK(AMediaFormat_getInt32(mMeta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &mNumChannels)); CHECK(AMediaFormat_getInt32(mMeta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &mBitsPerSample)); } WAVSource::~WAVSource() { if (mStarted) { stop(); } } media_status_t WAVSource::start() { ALOGV("WAVSource::start"); CHECK(!mStarted); // some WAV files may have large audio buffers that use shared memory transfer. if (!mBufferGroup->init(4 /* buffers */, kMaxFrameSize)) { return AMEDIA_ERROR_UNKNOWN; } mCurrentPos = mOffset; mStarted = true; return AMEDIA_OK; } media_status_t WAVSource::stop() { ALOGV("WAVSource::stop"); CHECK(mStarted); mStarted = false; return AMEDIA_OK; } media_status_t WAVSource::getFormat(AMediaFormat *meta) { ALOGV("WAVSource::getFormat"); const media_status_t status = AMediaFormat_copy(meta, mMeta); if (status == OK) { AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, kMaxFrameSize); AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_PCM_ENCODING, mOutputFloat ? kAudioEncodingPcmFloat : kAudioEncodingPcm16bit); } return status; } media_status_t WAVSource::read( MediaBufferHelper **out, const ReadOptions *options) { *out = NULL; if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) { return AMEDIA_ERROR_WOULD_BLOCK; } int64_t seekTimeUs; ReadOptions::SeekMode mode; if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { int64_t pos = 0; if (mWaveFormat == WAVE_FORMAT_MSGSM) { // 65 bytes decode to 320 8kHz samples int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; int64_t framenumber = samplenumber / 320; pos = framenumber * 65; } else { pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); } if (pos > (off64_t)mSize) { pos = mSize; } mCurrentPos = pos + mOffset; } MediaBufferHelper *buffer; media_status_t err = mBufferGroup->acquire_buffer(&buffer); if (err != OK) { return err; } // maxBytesToRead may be reduced so that in-place data conversion will fit in buffer size. const size_t bufferSize = std::min(buffer->size(), kMaxFrameSize); size_t maxBytesToRead; if (mOutputFloat) { // destination is float at 4 bytes per sample, source may be less. maxBytesToRead = (mBitsPerSample / 8) * (bufferSize / 4); } else { // destination is int16_t at 2 bytes per sample, only source of 8 bits is less. maxBytesToRead = mBitsPerSample == 8 ? bufferSize / 2 : bufferSize; } const size_t maxBytesAvailable = (mCurrentPos - mOffset >= (off64_t)mSize) ? 0 : mSize - (mCurrentPos - mOffset); if (maxBytesToRead > maxBytesAvailable) { maxBytesToRead = maxBytesAvailable; } if (mWaveFormat == WAVE_FORMAT_MSGSM) { // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio if (maxBytesToRead > 1024) { maxBytesToRead = 1024; } maxBytesToRead = (maxBytesToRead / 65) * 65; } else { // read only integral amounts of audio unit frames. const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8; maxBytesToRead -= maxBytesToRead % inputUnitFrameSize; } ssize_t n = mDataSource->readAt( mCurrentPos, buffer->data(), maxBytesToRead); if (n <= 0) { buffer->release(); buffer = NULL; return AMEDIA_ERROR_END_OF_STREAM; } buffer->set_range(0, n); // TODO: add capability to return data as float PCM instead of 16 bit PCM. if (mWaveFormat == WAVE_FORMAT_PCM) { const size_t bytesPerFrame = (mBitsPerSample >> 3) * mNumChannels; const size_t numFrames = n / bytesPerFrame; const size_t numSamples = numFrames * mNumChannels; if (mOutputFloat) { float *fdest = (float *)buffer->data(); buffer->set_range(0, 4 * numSamples); switch (mBitsPerSample) { case 8: { memcpy_to_float_from_u8(fdest, (const uint8_t *)buffer->data(), numSamples); } break; case 16: { memcpy_to_float_from_i16(fdest, (const int16_t *)buffer->data(), numSamples); } break; case 24: { memcpy_to_float_from_p24(fdest, (const uint8_t *)buffer->data(), numSamples); } break; case 32: { // buffer range is correct memcpy_to_float_from_i32(fdest, (const int32_t *)buffer->data(), numSamples); } break; } } else { int16_t *idest = (int16_t *)buffer->data(); buffer->set_range(0, 2 * numSamples); switch (mBitsPerSample) { case 8: { memcpy_to_i16_from_u8(idest, (const uint8_t *)buffer->data(), numSamples); } break; case 16: // no conversion needed break; case 24: { memcpy_to_i16_from_p24(idest, (const uint8_t *)buffer->data(), numSamples); } break; case 32: { memcpy_to_i16_from_i32(idest, (const int32_t *)buffer->data(), numSamples); } break; } } } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { if (!mOutputFloat) { // mBitsPerSample == 32 int16_t *idest = (int16_t *)buffer->data(); const size_t numSamples = n / 4; memcpy_to_i16_from_float(idest, (const float *)buffer->data(), numSamples); buffer->set_range(0, 2 * numSamples); } // Note: if output encoding is float, no need to convert if source is float. } int64_t timeStampUs = 0; if (mWaveFormat == WAVE_FORMAT_MSGSM) { timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; } else { size_t bytesPerSample = mBitsPerSample >> 3; timeStampUs = 1000000LL * (mCurrentPos - mOffset) / (mNumChannels * bytesPerSample) / mSampleRate; } AMediaFormat *meta = buffer->meta_data(); AMediaFormat_setInt64(meta, AMEDIAFORMAT_KEY_TIME_US, timeStampUs); AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1); mCurrentPos += n; *out = buffer; return AMEDIA_OK; } //////////////////////////////////////////////////////////////////////////////// static CMediaExtractor* CreateExtractor( CDataSource *source, void *) { return wrap(new WAVExtractor(new DataSourceHelper(source))); } static CreatorFunc Sniff( CDataSource *source, float *confidence, void **, FreeMetaFunc *) { DataSourceHelper *helper = new DataSourceHelper(source); char header[12]; if (helper->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { delete helper; return NULL; } if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { delete helper; return NULL; } WAVExtractor *extractor = new WAVExtractor(helper); // extractor owns the helper int numTracks = extractor->countTracks(); delete extractor; if (numTracks == 0) { return NULL; } *confidence = 0.3f; return CreateExtractor; } static const char *extensions[] = { "wav", NULL }; extern "C" { // This is the only symbol that needs to be exported __attribute__ ((visibility ("default"))) ExtractorDef GETEXTRACTORDEF() { return { EXTRACTORDEF_VERSION, UUID("7d613858-5837-4a38-84c5-332d1cddee27"), 1, // version "WAV Extractor", { .v3 = {Sniff, extensions} }, }; } } // extern "C" } // namespace android