1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include <utils/Log.h>
27
28 #include "include/MPEG4Extractor.h"
29 #include "include/SampleTable.h"
30 #include "include/ESDS.h"
31
32 #include <media/stagefright/foundation/ABitReader.h>
33 #include <media/stagefright/foundation/ABuffer.h>
34 #include <media/stagefright/foundation/ADebug.h>
35 #include <media/stagefright/foundation/AMessage.h>
36 #include <media/stagefright/MediaBuffer.h>
37 #include <media/stagefright/MediaBufferGroup.h>
38 #include <media/stagefright/MediaDefs.h>
39 #include <media/stagefright/MediaSource.h>
40 #include <media/stagefright/MetaData.h>
41 #include <utils/String8.h>
42
43 #include <byteswap.h>
44 #include "include/ID3.h"
45
46 namespace android {
47
48 class MPEG4Source : public MediaSource {
49 public:
50 // Caller retains ownership of both "dataSource" and "sampleTable".
51 MPEG4Source(const sp<MPEG4Extractor> &owner,
52 const sp<MetaData> &format,
53 const sp<DataSource> &dataSource,
54 int32_t timeScale,
55 const sp<SampleTable> &sampleTable,
56 Vector<SidxEntry> &sidx,
57 const Trex *trex,
58 off64_t firstMoofOffset);
59
60 virtual status_t start(MetaData *params = NULL);
61 virtual status_t stop();
62
63 virtual sp<MetaData> getFormat();
64
65 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
66 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
67
68 protected:
69 virtual ~MPEG4Source();
70
71 private:
72 Mutex mLock;
73
74 // keep the MPEG4Extractor around, since we're referencing its data
75 sp<MPEG4Extractor> mOwner;
76 sp<MetaData> mFormat;
77 sp<DataSource> mDataSource;
78 int32_t mTimescale;
79 sp<SampleTable> mSampleTable;
80 uint32_t mCurrentSampleIndex;
81 uint32_t mCurrentFragmentIndex;
82 Vector<SidxEntry> &mSegments;
83 const Trex *mTrex;
84 off64_t mFirstMoofOffset;
85 off64_t mCurrentMoofOffset;
86 off64_t mNextMoofOffset;
87 uint32_t mCurrentTime;
88 int32_t mLastParsedTrackId;
89 int32_t mTrackId;
90
91 int32_t mCryptoMode; // passed in from extractor
92 int32_t mDefaultIVSize; // passed in from extractor
93 uint8_t mCryptoKey[16]; // passed in from extractor
94 uint32_t mCurrentAuxInfoType;
95 uint32_t mCurrentAuxInfoTypeParameter;
96 int32_t mCurrentDefaultSampleInfoSize;
97 uint32_t mCurrentSampleInfoCount;
98 uint32_t mCurrentSampleInfoAllocSize;
99 uint8_t* mCurrentSampleInfoSizes;
100 uint32_t mCurrentSampleInfoOffsetCount;
101 uint32_t mCurrentSampleInfoOffsetsAllocSize;
102 uint64_t* mCurrentSampleInfoOffsets;
103
104 bool mIsAVC;
105 bool mIsHEVC;
106 size_t mNALLengthSize;
107
108 bool mStarted;
109
110 MediaBufferGroup *mGroup;
111
112 MediaBuffer *mBuffer;
113
114 bool mWantsNALFragments;
115
116 uint8_t *mSrcBuffer;
117
118 size_t parseNALSize(const uint8_t *data) const;
119 status_t parseChunk(off64_t *offset);
120 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
121 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
122 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
123 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
124
125 struct TrackFragmentHeaderInfo {
126 enum Flags {
127 kBaseDataOffsetPresent = 0x01,
128 kSampleDescriptionIndexPresent = 0x02,
129 kDefaultSampleDurationPresent = 0x08,
130 kDefaultSampleSizePresent = 0x10,
131 kDefaultSampleFlagsPresent = 0x20,
132 kDurationIsEmpty = 0x10000,
133 };
134
135 uint32_t mTrackID;
136 uint32_t mFlags;
137 uint64_t mBaseDataOffset;
138 uint32_t mSampleDescriptionIndex;
139 uint32_t mDefaultSampleDuration;
140 uint32_t mDefaultSampleSize;
141 uint32_t mDefaultSampleFlags;
142
143 uint64_t mDataOffset;
144 };
145 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
146
147 struct Sample {
148 off64_t offset;
149 size_t size;
150 uint32_t duration;
151 int32_t compositionOffset;
152 uint8_t iv[16];
153 Vector<size_t> clearsizes;
154 Vector<size_t> encryptedsizes;
155 };
156 Vector<Sample> mCurrentSamples;
157
158 MPEG4Source(const MPEG4Source &);
159 MPEG4Source &operator=(const MPEG4Source &);
160 };
161
162 // This custom data source wraps an existing one and satisfies requests
163 // falling entirely within a cached range from the cache while forwarding
164 // all remaining requests to the wrapped datasource.
165 // This is used to cache the full sampletable metadata for a single track,
166 // possibly wrapping multiple times to cover all tracks, i.e.
167 // Each MPEG4DataSource caches the sampletable metadata for a single track.
168
169 struct MPEG4DataSource : public DataSource {
170 MPEG4DataSource(const sp<DataSource> &source);
171
172 virtual status_t initCheck() const;
173 virtual ssize_t readAt(off64_t offset, void *data, size_t size);
174 virtual status_t getSize(off64_t *size);
175 virtual uint32_t flags();
176
177 status_t setCachedRange(off64_t offset, size_t size);
178
179 protected:
180 virtual ~MPEG4DataSource();
181
182 private:
183 Mutex mLock;
184
185 sp<DataSource> mSource;
186 off64_t mCachedOffset;
187 size_t mCachedSize;
188 uint8_t *mCache;
189
190 void clearCache();
191
192 MPEG4DataSource(const MPEG4DataSource &);
193 MPEG4DataSource &operator=(const MPEG4DataSource &);
194 };
195
MPEG4DataSource(const sp<DataSource> & source)196 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
197 : mSource(source),
198 mCachedOffset(0),
199 mCachedSize(0),
200 mCache(NULL) {
201 }
202
~MPEG4DataSource()203 MPEG4DataSource::~MPEG4DataSource() {
204 clearCache();
205 }
206
clearCache()207 void MPEG4DataSource::clearCache() {
208 if (mCache) {
209 free(mCache);
210 mCache = NULL;
211 }
212
213 mCachedOffset = 0;
214 mCachedSize = 0;
215 }
216
initCheck() const217 status_t MPEG4DataSource::initCheck() const {
218 return mSource->initCheck();
219 }
220
readAt(off64_t offset,void * data,size_t size)221 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
222 Mutex::Autolock autoLock(mLock);
223
224 if (offset >= mCachedOffset
225 && offset + size <= mCachedOffset + mCachedSize) {
226 memcpy(data, &mCache[offset - mCachedOffset], size);
227 return size;
228 }
229
230 return mSource->readAt(offset, data, size);
231 }
232
getSize(off64_t * size)233 status_t MPEG4DataSource::getSize(off64_t *size) {
234 return mSource->getSize(size);
235 }
236
flags()237 uint32_t MPEG4DataSource::flags() {
238 return mSource->flags();
239 }
240
setCachedRange(off64_t offset,size_t size)241 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
242 Mutex::Autolock autoLock(mLock);
243
244 clearCache();
245
246 mCache = (uint8_t *)malloc(size);
247
248 if (mCache == NULL) {
249 return -ENOMEM;
250 }
251
252 mCachedOffset = offset;
253 mCachedSize = size;
254
255 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
256
257 if (err < (ssize_t)size) {
258 clearCache();
259
260 return ERROR_IO;
261 }
262
263 return OK;
264 }
265
266 ////////////////////////////////////////////////////////////////////////////////
267
hexdump(const void * _data,size_t size)268 static void hexdump(const void *_data, size_t size) {
269 const uint8_t *data = (const uint8_t *)_data;
270 size_t offset = 0;
271 while (offset < size) {
272 printf("0x%04zx ", offset);
273
274 size_t n = size - offset;
275 if (n > 16) {
276 n = 16;
277 }
278
279 for (size_t i = 0; i < 16; ++i) {
280 if (i == 8) {
281 printf(" ");
282 }
283
284 if (offset + i < size) {
285 printf("%02x ", data[offset + i]);
286 } else {
287 printf(" ");
288 }
289 }
290
291 printf(" ");
292
293 for (size_t i = 0; i < n; ++i) {
294 if (isprint(data[offset + i])) {
295 printf("%c", data[offset + i]);
296 } else {
297 printf(".");
298 }
299 }
300
301 printf("\n");
302
303 offset += 16;
304 }
305 }
306
FourCC2MIME(uint32_t fourcc)307 static const char *FourCC2MIME(uint32_t fourcc) {
308 switch (fourcc) {
309 case FOURCC('m', 'p', '4', 'a'):
310 return MEDIA_MIMETYPE_AUDIO_AAC;
311
312 case FOURCC('s', 'a', 'm', 'r'):
313 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
314
315 case FOURCC('s', 'a', 'w', 'b'):
316 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
317
318 case FOURCC('m', 'p', '4', 'v'):
319 return MEDIA_MIMETYPE_VIDEO_MPEG4;
320
321 case FOURCC('s', '2', '6', '3'):
322 case FOURCC('h', '2', '6', '3'):
323 case FOURCC('H', '2', '6', '3'):
324 return MEDIA_MIMETYPE_VIDEO_H263;
325
326 case FOURCC('a', 'v', 'c', '1'):
327 return MEDIA_MIMETYPE_VIDEO_AVC;
328
329 case FOURCC('h', 'v', 'c', '1'):
330 case FOURCC('h', 'e', 'v', '1'):
331 return MEDIA_MIMETYPE_VIDEO_HEVC;
332 default:
333 CHECK(!"should not be here.");
334 return NULL;
335 }
336 }
337
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)338 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340 // AMR NB audio is always mono, 8kHz
341 *channels = 1;
342 *rate = 8000;
343 return true;
344 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345 // AMR WB audio is always mono, 16kHz
346 *channels = 1;
347 *rate = 16000;
348 return true;
349 }
350 return false;
351 }
352
MPEG4Extractor(const sp<DataSource> & source)353 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
354 : mMoofOffset(0),
355 mDataSource(source),
356 mInitCheck(NO_INIT),
357 mHasVideo(false),
358 mHeaderTimescale(0),
359 mFirstTrack(NULL),
360 mLastTrack(NULL),
361 mFileMetaData(new MetaData),
362 mFirstSINF(NULL),
363 mIsDrm(false) {
364 }
365
~MPEG4Extractor()366 MPEG4Extractor::~MPEG4Extractor() {
367 Track *track = mFirstTrack;
368 while (track) {
369 Track *next = track->next;
370
371 delete track;
372 track = next;
373 }
374 mFirstTrack = mLastTrack = NULL;
375
376 SINF *sinf = mFirstSINF;
377 while (sinf) {
378 SINF *next = sinf->next;
379 delete[] sinf->IPMPData;
380 delete sinf;
381 sinf = next;
382 }
383 mFirstSINF = NULL;
384
385 for (size_t i = 0; i < mPssh.size(); i++) {
386 delete [] mPssh[i].data;
387 }
388 }
389
flags() const390 uint32_t MPEG4Extractor::flags() const {
391 return CAN_PAUSE |
392 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
393 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
394 }
395
getMetaData()396 sp<MetaData> MPEG4Extractor::getMetaData() {
397 status_t err;
398 if ((err = readMetaData()) != OK) {
399 return new MetaData;
400 }
401
402 return mFileMetaData;
403 }
404
countTracks()405 size_t MPEG4Extractor::countTracks() {
406 status_t err;
407 if ((err = readMetaData()) != OK) {
408 ALOGV("MPEG4Extractor::countTracks: no tracks");
409 return 0;
410 }
411
412 size_t n = 0;
413 Track *track = mFirstTrack;
414 while (track) {
415 ++n;
416 track = track->next;
417 }
418
419 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
420 return n;
421 }
422
getTrackMetaData(size_t index,uint32_t flags)423 sp<MetaData> MPEG4Extractor::getTrackMetaData(
424 size_t index, uint32_t flags) {
425 status_t err;
426 if ((err = readMetaData()) != OK) {
427 return NULL;
428 }
429
430 Track *track = mFirstTrack;
431 while (index > 0) {
432 if (track == NULL) {
433 return NULL;
434 }
435
436 track = track->next;
437 --index;
438 }
439
440 if (track == NULL) {
441 return NULL;
442 }
443
444 if ((flags & kIncludeExtensiveMetaData)
445 && !track->includes_expensive_metadata) {
446 track->includes_expensive_metadata = true;
447
448 const char *mime;
449 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
450 if (!strncasecmp("video/", mime, 6)) {
451 if (mMoofOffset > 0) {
452 int64_t duration;
453 if (track->meta->findInt64(kKeyDuration, &duration)) {
454 // nothing fancy, just pick a frame near 1/4th of the duration
455 track->meta->setInt64(
456 kKeyThumbnailTime, duration / 4);
457 }
458 } else {
459 uint32_t sampleIndex;
460 uint32_t sampleTime;
461 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
462 && track->sampleTable->getMetaDataForSample(
463 sampleIndex, NULL /* offset */, NULL /* size */,
464 &sampleTime) == OK) {
465 track->meta->setInt64(
466 kKeyThumbnailTime,
467 ((int64_t)sampleTime * 1000000) / track->timescale);
468 }
469 }
470 }
471 }
472
473 return track->meta;
474 }
475
MakeFourCCString(uint32_t x,char * s)476 static void MakeFourCCString(uint32_t x, char *s) {
477 s[0] = x >> 24;
478 s[1] = (x >> 16) & 0xff;
479 s[2] = (x >> 8) & 0xff;
480 s[3] = x & 0xff;
481 s[4] = '\0';
482 }
483
readMetaData()484 status_t MPEG4Extractor::readMetaData() {
485 if (mInitCheck != NO_INIT) {
486 return mInitCheck;
487 }
488
489 off64_t offset = 0;
490 status_t err;
491 while (true) {
492 off64_t orig_offset = offset;
493 err = parseChunk(&offset, 0);
494
495 if (err != OK && err != UNKNOWN_ERROR) {
496 break;
497 } else if (offset <= orig_offset) {
498 // only continue parsing if the offset was advanced,
499 // otherwise we might end up in an infinite loop
500 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
501 err = ERROR_MALFORMED;
502 break;
503 } else if (err == OK) {
504 continue;
505 }
506
507 uint32_t hdr[2];
508 if (mDataSource->readAt(offset, hdr, 8) < 8) {
509 break;
510 }
511 uint32_t chunk_type = ntohl(hdr[1]);
512 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
513 // store the offset of the first segment
514 mMoofOffset = offset;
515 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
516 // keep parsing until we get to the data
517 continue;
518 }
519 break;
520 }
521
522 if (mInitCheck == OK) {
523 if (mHasVideo) {
524 mFileMetaData->setCString(
525 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
526 } else {
527 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
528 }
529 } else {
530 mInitCheck = err;
531 }
532
533 CHECK_NE(err, (status_t)NO_INIT);
534
535 // copy pssh data into file metadata
536 int psshsize = 0;
537 for (size_t i = 0; i < mPssh.size(); i++) {
538 psshsize += 20 + mPssh[i].datalen;
539 }
540 if (psshsize) {
541 char *buf = (char*)malloc(psshsize);
542 char *ptr = buf;
543 for (size_t i = 0; i < mPssh.size(); i++) {
544 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
545 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
546 ptr += (20 + mPssh[i].datalen);
547 }
548 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
549 free(buf);
550 }
551 return mInitCheck;
552 }
553
getDrmTrackInfo(size_t trackID,int * len)554 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
555 if (mFirstSINF == NULL) {
556 return NULL;
557 }
558
559 SINF *sinf = mFirstSINF;
560 while (sinf && (trackID != sinf->trackID)) {
561 sinf = sinf->next;
562 }
563
564 if (sinf == NULL) {
565 return NULL;
566 }
567
568 *len = sinf->len;
569 return sinf->IPMPData;
570 }
571
572 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
readSize(off64_t offset,const sp<DataSource> DataSource,uint8_t * numOfBytes)573 static int32_t readSize(off64_t offset,
574 const sp<DataSource> DataSource, uint8_t *numOfBytes) {
575 uint32_t size = 0;
576 uint8_t data;
577 bool moreData = true;
578 *numOfBytes = 0;
579
580 while (moreData) {
581 if (DataSource->readAt(offset, &data, 1) < 1) {
582 return -1;
583 }
584 offset ++;
585 moreData = (data >= 128) ? true : false;
586 size = (size << 7) | (data & 0x7f); // Take last 7 bits
587 (*numOfBytes) ++;
588 }
589
590 return size;
591 }
592
parseDrmSINF(off64_t *,off64_t data_offset)593 status_t MPEG4Extractor::parseDrmSINF(
594 off64_t * /* offset */, off64_t data_offset) {
595 uint8_t updateIdTag;
596 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
597 return ERROR_IO;
598 }
599 data_offset ++;
600
601 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
602 return ERROR_MALFORMED;
603 }
604
605 uint8_t numOfBytes;
606 int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
607 if (size < 0) {
608 return ERROR_IO;
609 }
610 int32_t classSize = size;
611 data_offset += numOfBytes;
612
613 while(size >= 11 ) {
614 uint8_t descriptorTag;
615 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
616 return ERROR_IO;
617 }
618 data_offset ++;
619
620 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
621 return ERROR_MALFORMED;
622 }
623
624 uint8_t buffer[8];
625 //ObjectDescriptorID and ObjectDescriptor url flag
626 if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
627 return ERROR_IO;
628 }
629 data_offset += 2;
630
631 if ((buffer[1] >> 5) & 0x0001) { //url flag is set
632 return ERROR_MALFORMED;
633 }
634
635 if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
636 return ERROR_IO;
637 }
638 data_offset += 8;
639
640 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
641 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
642 return ERROR_MALFORMED;
643 }
644
645 SINF *sinf = new SINF;
646 sinf->trackID = U16_AT(&buffer[3]);
647 sinf->IPMPDescriptorID = buffer[7];
648 sinf->next = mFirstSINF;
649 mFirstSINF = sinf;
650
651 size -= (8 + 2 + 1);
652 }
653
654 if (size != 0) {
655 return ERROR_MALFORMED;
656 }
657
658 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
659 return ERROR_IO;
660 }
661 data_offset ++;
662
663 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
664 return ERROR_MALFORMED;
665 }
666
667 size = readSize(data_offset, mDataSource, &numOfBytes);
668 if (size < 0) {
669 return ERROR_IO;
670 }
671 classSize = size;
672 data_offset += numOfBytes;
673
674 while (size > 0) {
675 uint8_t tag;
676 int32_t dataLen;
677 if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
678 return ERROR_IO;
679 }
680 data_offset ++;
681
682 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
683 uint8_t id;
684 dataLen = readSize(data_offset, mDataSource, &numOfBytes);
685 if (dataLen < 0) {
686 return ERROR_IO;
687 } else if (dataLen < 4) {
688 return ERROR_MALFORMED;
689 }
690 data_offset += numOfBytes;
691
692 if (mDataSource->readAt(data_offset, &id, 1) < 1) {
693 return ERROR_IO;
694 }
695 data_offset ++;
696
697 SINF *sinf = mFirstSINF;
698 while (sinf && (sinf->IPMPDescriptorID != id)) {
699 sinf = sinf->next;
700 }
701 if (sinf == NULL) {
702 return ERROR_MALFORMED;
703 }
704 sinf->len = dataLen - 3;
705 sinf->IPMPData = new (std::nothrow) char[sinf->len];
706 if (sinf->IPMPData == NULL) {
707 return ERROR_MALFORMED;
708 }
709 data_offset += 2;
710
711 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
712 return ERROR_IO;
713 }
714 data_offset += sinf->len;
715
716 size -= (dataLen + numOfBytes + 1);
717 }
718 }
719
720 if (size != 0) {
721 return ERROR_MALFORMED;
722 }
723
724 return UNKNOWN_ERROR; // Return a dummy error.
725 }
726
727 struct PathAdder {
PathAdderandroid::PathAdder728 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
729 : mPath(path) {
730 mPath->push(chunkType);
731 }
732
~PathAdderandroid::PathAdder733 ~PathAdder() {
734 mPath->pop();
735 }
736
737 private:
738 Vector<uint32_t> *mPath;
739
740 PathAdder(const PathAdder &);
741 PathAdder &operator=(const PathAdder &);
742 };
743
underMetaDataPath(const Vector<uint32_t> & path)744 static bool underMetaDataPath(const Vector<uint32_t> &path) {
745 return path.size() >= 5
746 && path[0] == FOURCC('m', 'o', 'o', 'v')
747 && path[1] == FOURCC('u', 'd', 't', 'a')
748 && path[2] == FOURCC('m', 'e', 't', 'a')
749 && path[3] == FOURCC('i', 'l', 's', 't');
750 }
751
752 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)753 static void convertTimeToDate(int64_t time_1904, String8 *s) {
754 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
755
756 char tmp[32];
757 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
758
759 s->setTo(tmp);
760 }
761
parseChunk(off64_t * offset,int depth)762 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
763 ALOGV("entering parseChunk %lld/%d", *offset, depth);
764 uint32_t hdr[2];
765 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
766 return ERROR_IO;
767 }
768 uint64_t chunk_size = ntohl(hdr[0]);
769 uint32_t chunk_type = ntohl(hdr[1]);
770 off64_t data_offset = *offset + 8;
771
772 if (chunk_size == 1) {
773 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
774 return ERROR_IO;
775 }
776 chunk_size = ntoh64(chunk_size);
777 data_offset += 8;
778
779 if (chunk_size < 16) {
780 // The smallest valid chunk is 16 bytes long in this case.
781 return ERROR_MALFORMED;
782 }
783 } else if (chunk_size == 0) {
784 if (depth == 0) {
785 // atom extends to end of file
786 off64_t sourceSize;
787 if (mDataSource->getSize(&sourceSize) == OK) {
788 chunk_size = (sourceSize - *offset);
789 } else {
790 // XXX could we just pick a "sufficiently large" value here?
791 ALOGE("atom size is 0, and data source has no size");
792 return ERROR_MALFORMED;
793 }
794 } else {
795 // not allowed for non-toplevel atoms, skip it
796 *offset += 4;
797 return OK;
798 }
799 } else if (chunk_size < 8) {
800 // The smallest valid chunk is 8 bytes long.
801 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
802 return ERROR_MALFORMED;
803 }
804
805 char chunk[5];
806 MakeFourCCString(chunk_type, chunk);
807 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
808
809 #if 0
810 static const char kWhitespace[] = " ";
811 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
812 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
813
814 char buffer[256];
815 size_t n = chunk_size;
816 if (n > sizeof(buffer)) {
817 n = sizeof(buffer);
818 }
819 if (mDataSource->readAt(*offset, buffer, n)
820 < (ssize_t)n) {
821 return ERROR_IO;
822 }
823
824 hexdump(buffer, n);
825 #endif
826
827 PathAdder autoAdder(&mPath, chunk_type);
828
829 off64_t chunk_data_size = *offset + chunk_size - data_offset;
830
831 if (chunk_type != FOURCC('c', 'p', 'r', 't')
832 && chunk_type != FOURCC('c', 'o', 'v', 'r')
833 && mPath.size() == 5 && underMetaDataPath(mPath)) {
834 off64_t stop_offset = *offset + chunk_size;
835 *offset = data_offset;
836 while (*offset < stop_offset) {
837 status_t err = parseChunk(offset, depth + 1);
838 if (err != OK) {
839 return err;
840 }
841 }
842
843 if (*offset != stop_offset) {
844 return ERROR_MALFORMED;
845 }
846
847 return OK;
848 }
849
850 switch(chunk_type) {
851 case FOURCC('m', 'o', 'o', 'v'):
852 case FOURCC('t', 'r', 'a', 'k'):
853 case FOURCC('m', 'd', 'i', 'a'):
854 case FOURCC('m', 'i', 'n', 'f'):
855 case FOURCC('d', 'i', 'n', 'f'):
856 case FOURCC('s', 't', 'b', 'l'):
857 case FOURCC('m', 'v', 'e', 'x'):
858 case FOURCC('m', 'o', 'o', 'f'):
859 case FOURCC('t', 'r', 'a', 'f'):
860 case FOURCC('m', 'f', 'r', 'a'):
861 case FOURCC('u', 'd', 't', 'a'):
862 case FOURCC('i', 'l', 's', 't'):
863 case FOURCC('s', 'i', 'n', 'f'):
864 case FOURCC('s', 'c', 'h', 'i'):
865 case FOURCC('e', 'd', 't', 's'):
866 {
867 if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
868 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
869
870 if (mDataSource->flags()
871 & (DataSource::kWantsPrefetching
872 | DataSource::kIsCachingDataSource)) {
873 sp<MPEG4DataSource> cachedSource =
874 new MPEG4DataSource(mDataSource);
875
876 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
877 mDataSource = cachedSource;
878 }
879 }
880
881 mLastTrack->sampleTable = new SampleTable(mDataSource);
882 }
883
884 bool isTrack = false;
885 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
886 isTrack = true;
887
888 Track *track = new Track;
889 track->next = NULL;
890 if (mLastTrack) {
891 mLastTrack->next = track;
892 } else {
893 mFirstTrack = track;
894 }
895 mLastTrack = track;
896
897 track->meta = new MetaData;
898 track->includes_expensive_metadata = false;
899 track->skipTrack = false;
900 track->timescale = 0;
901 track->meta->setCString(kKeyMIMEType, "application/octet-stream");
902 }
903
904 off64_t stop_offset = *offset + chunk_size;
905 *offset = data_offset;
906 while (*offset < stop_offset) {
907 status_t err = parseChunk(offset, depth + 1);
908 if (err != OK) {
909 return err;
910 }
911 }
912
913 if (*offset != stop_offset) {
914 return ERROR_MALFORMED;
915 }
916
917 if (isTrack) {
918 if (mLastTrack->skipTrack) {
919 Track *cur = mFirstTrack;
920
921 if (cur == mLastTrack) {
922 delete cur;
923 mFirstTrack = mLastTrack = NULL;
924 } else {
925 while (cur && cur->next != mLastTrack) {
926 cur = cur->next;
927 }
928 cur->next = NULL;
929 delete mLastTrack;
930 mLastTrack = cur;
931 }
932
933 return OK;
934 }
935
936 status_t err = verifyTrack(mLastTrack);
937
938 if (err != OK) {
939 return err;
940 }
941 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
942 mInitCheck = OK;
943
944 if (!mIsDrm) {
945 return UNKNOWN_ERROR; // Return a dummy error.
946 } else {
947 return OK;
948 }
949 }
950 break;
951 }
952
953 case FOURCC('e', 'l', 's', 't'):
954 {
955 *offset += chunk_size;
956
957 // See 14496-12 8.6.6
958 uint8_t version;
959 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
960 return ERROR_IO;
961 }
962
963 uint32_t entry_count;
964 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
965 return ERROR_IO;
966 }
967
968 if (entry_count != 1) {
969 // we only support a single entry at the moment, for gapless playback
970 ALOGW("ignoring edit list with %d entries", entry_count);
971 } else if (mHeaderTimescale == 0) {
972 ALOGW("ignoring edit list because timescale is 0");
973 } else {
974 off64_t entriesoffset = data_offset + 8;
975 uint64_t segment_duration;
976 int64_t media_time;
977
978 if (version == 1) {
979 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
980 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
981 return ERROR_IO;
982 }
983 } else if (version == 0) {
984 uint32_t sd;
985 int32_t mt;
986 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
987 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
988 return ERROR_IO;
989 }
990 segment_duration = sd;
991 media_time = mt;
992 } else {
993 return ERROR_IO;
994 }
995
996 uint64_t halfscale = mHeaderTimescale / 2;
997 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
998 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
999
1000 int64_t duration;
1001 int32_t samplerate;
1002 if (!mLastTrack) {
1003 return ERROR_MALFORMED;
1004 }
1005 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1006 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1007
1008 int64_t delay = (media_time * samplerate + 500000) / 1000000;
1009 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1010
1011 int64_t paddingus = duration - (segment_duration + media_time);
1012 if (paddingus < 0) {
1013 // track duration from media header (which is what kKeyDuration is) might
1014 // be slightly shorter than the segment duration, which would make the
1015 // padding negative. Clamp to zero.
1016 paddingus = 0;
1017 }
1018 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1019 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1020 }
1021 }
1022 break;
1023 }
1024
1025 case FOURCC('f', 'r', 'm', 'a'):
1026 {
1027 *offset += chunk_size;
1028
1029 uint32_t original_fourcc;
1030 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1031 return ERROR_IO;
1032 }
1033 original_fourcc = ntohl(original_fourcc);
1034 ALOGV("read original format: %d", original_fourcc);
1035 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1036 uint32_t num_channels = 0;
1037 uint32_t sample_rate = 0;
1038 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1039 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1040 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1041 }
1042 break;
1043 }
1044
1045 case FOURCC('t', 'e', 'n', 'c'):
1046 {
1047 *offset += chunk_size;
1048
1049 if (chunk_size < 32) {
1050 return ERROR_MALFORMED;
1051 }
1052
1053 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1054 // default IV size, 16 bytes default KeyID
1055 // (ISO 23001-7)
1056 char buf[4];
1057 memset(buf, 0, 4);
1058 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1059 return ERROR_IO;
1060 }
1061 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1062 if (defaultAlgorithmId > 1) {
1063 // only 0 (clear) and 1 (AES-128) are valid
1064 return ERROR_MALFORMED;
1065 }
1066
1067 memset(buf, 0, 4);
1068 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1069 return ERROR_IO;
1070 }
1071 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1072
1073 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1074 (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1075 // only unencrypted data must have 0 IV size
1076 return ERROR_MALFORMED;
1077 } else if (defaultIVSize != 0 &&
1078 defaultIVSize != 8 &&
1079 defaultIVSize != 16) {
1080 // only supported sizes are 0, 8 and 16
1081 return ERROR_MALFORMED;
1082 }
1083
1084 uint8_t defaultKeyId[16];
1085
1086 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1087 return ERROR_IO;
1088 }
1089
1090 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1091 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1092 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1093 break;
1094 }
1095
1096 case FOURCC('t', 'k', 'h', 'd'):
1097 {
1098 *offset += chunk_size;
1099
1100 status_t err;
1101 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1102 return err;
1103 }
1104
1105 break;
1106 }
1107
1108 case FOURCC('p', 's', 's', 'h'):
1109 {
1110 *offset += chunk_size;
1111
1112 PsshInfo pssh;
1113
1114 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1115 return ERROR_IO;
1116 }
1117
1118 uint32_t psshdatalen = 0;
1119 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1120 return ERROR_IO;
1121 }
1122 pssh.datalen = ntohl(psshdatalen);
1123 ALOGV("pssh data size: %d", pssh.datalen);
1124 if (pssh.datalen + 20 > chunk_size) {
1125 // pssh data length exceeds size of containing box
1126 return ERROR_MALFORMED;
1127 }
1128
1129 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1130 if (pssh.data == NULL) {
1131 return ERROR_MALFORMED;
1132 }
1133 ALOGV("allocated pssh @ %p", pssh.data);
1134 ssize_t requested = (ssize_t) pssh.datalen;
1135 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1136 return ERROR_IO;
1137 }
1138 mPssh.push_back(pssh);
1139
1140 break;
1141 }
1142
1143 case FOURCC('m', 'd', 'h', 'd'):
1144 {
1145 *offset += chunk_size;
1146
1147 if (chunk_data_size < 4 || mLastTrack == NULL) {
1148 return ERROR_MALFORMED;
1149 }
1150
1151 uint8_t version;
1152 if (mDataSource->readAt(
1153 data_offset, &version, sizeof(version))
1154 < (ssize_t)sizeof(version)) {
1155 return ERROR_IO;
1156 }
1157
1158 off64_t timescale_offset;
1159
1160 if (version == 1) {
1161 timescale_offset = data_offset + 4 + 16;
1162 } else if (version == 0) {
1163 timescale_offset = data_offset + 4 + 8;
1164 } else {
1165 return ERROR_IO;
1166 }
1167
1168 uint32_t timescale;
1169 if (mDataSource->readAt(
1170 timescale_offset, ×cale, sizeof(timescale))
1171 < (ssize_t)sizeof(timescale)) {
1172 return ERROR_IO;
1173 }
1174
1175 mLastTrack->timescale = ntohl(timescale);
1176
1177 // 14496-12 says all ones means indeterminate, but some files seem to use
1178 // 0 instead. We treat both the same.
1179 int64_t duration = 0;
1180 if (version == 1) {
1181 if (mDataSource->readAt(
1182 timescale_offset + 4, &duration, sizeof(duration))
1183 < (ssize_t)sizeof(duration)) {
1184 return ERROR_IO;
1185 }
1186 if (duration != -1) {
1187 duration = ntoh64(duration);
1188 }
1189 } else {
1190 uint32_t duration32;
1191 if (mDataSource->readAt(
1192 timescale_offset + 4, &duration32, sizeof(duration32))
1193 < (ssize_t)sizeof(duration32)) {
1194 return ERROR_IO;
1195 }
1196 if (duration32 != 0xffffffff) {
1197 duration = ntohl(duration32);
1198 }
1199 }
1200 if (duration != 0) {
1201 mLastTrack->meta->setInt64(
1202 kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1203 }
1204
1205 uint8_t lang[2];
1206 off64_t lang_offset;
1207 if (version == 1) {
1208 lang_offset = timescale_offset + 4 + 8;
1209 } else if (version == 0) {
1210 lang_offset = timescale_offset + 4 + 4;
1211 } else {
1212 return ERROR_IO;
1213 }
1214
1215 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1216 < (ssize_t)sizeof(lang)) {
1217 return ERROR_IO;
1218 }
1219
1220 // To get the ISO-639-2/T three character language code
1221 // 1 bit pad followed by 3 5-bits characters. Each character
1222 // is packed as the difference between its ASCII value and 0x60.
1223 char lang_code[4];
1224 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1225 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1226 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1227 lang_code[3] = '\0';
1228
1229 mLastTrack->meta->setCString(
1230 kKeyMediaLanguage, lang_code);
1231
1232 break;
1233 }
1234
1235 case FOURCC('s', 't', 's', 'd'):
1236 {
1237 if (chunk_data_size < 8) {
1238 return ERROR_MALFORMED;
1239 }
1240
1241 uint8_t buffer[8];
1242 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1243 return ERROR_MALFORMED;
1244 }
1245
1246 if (mDataSource->readAt(
1247 data_offset, buffer, 8) < 8) {
1248 return ERROR_IO;
1249 }
1250
1251 if (U32_AT(buffer) != 0) {
1252 // Should be version 0, flags 0.
1253 return ERROR_MALFORMED;
1254 }
1255
1256 uint32_t entry_count = U32_AT(&buffer[4]);
1257
1258 if (entry_count > 1) {
1259 // For 3GPP timed text, there could be multiple tx3g boxes contain
1260 // multiple text display formats. These formats will be used to
1261 // display the timed text.
1262 // For encrypted files, there may also be more than one entry.
1263 const char *mime;
1264 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1265 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1266 strcasecmp(mime, "application/octet-stream")) {
1267 // For now we only support a single type of media per track.
1268 mLastTrack->skipTrack = true;
1269 *offset += chunk_size;
1270 break;
1271 }
1272 }
1273 off64_t stop_offset = *offset + chunk_size;
1274 *offset = data_offset + 8;
1275 for (uint32_t i = 0; i < entry_count; ++i) {
1276 status_t err = parseChunk(offset, depth + 1);
1277 if (err != OK) {
1278 return err;
1279 }
1280 }
1281
1282 if (*offset != stop_offset) {
1283 return ERROR_MALFORMED;
1284 }
1285 break;
1286 }
1287
1288 case FOURCC('m', 'p', '4', 'a'):
1289 case FOURCC('e', 'n', 'c', 'a'):
1290 case FOURCC('s', 'a', 'm', 'r'):
1291 case FOURCC('s', 'a', 'w', 'b'):
1292 {
1293 uint8_t buffer[8 + 20];
1294 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1295 // Basic AudioSampleEntry size.
1296 return ERROR_MALFORMED;
1297 }
1298
1299 if (mDataSource->readAt(
1300 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1301 return ERROR_IO;
1302 }
1303
1304 uint16_t data_ref_index = U16_AT(&buffer[6]);
1305 uint32_t num_channels = U16_AT(&buffer[16]);
1306
1307 uint16_t sample_size = U16_AT(&buffer[18]);
1308 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1309
1310 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1311 // if the chunk type is enca, we'll get the type from the sinf/frma box later
1312 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1313 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1314 }
1315 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1316 chunk, num_channels, sample_size, sample_rate);
1317 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1318 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1319
1320 off64_t stop_offset = *offset + chunk_size;
1321 *offset = data_offset + sizeof(buffer);
1322 while (*offset < stop_offset) {
1323 status_t err = parseChunk(offset, depth + 1);
1324 if (err != OK) {
1325 return err;
1326 }
1327 }
1328
1329 if (*offset != stop_offset) {
1330 return ERROR_MALFORMED;
1331 }
1332 break;
1333 }
1334
1335 case FOURCC('m', 'p', '4', 'v'):
1336 case FOURCC('e', 'n', 'c', 'v'):
1337 case FOURCC('s', '2', '6', '3'):
1338 case FOURCC('H', '2', '6', '3'):
1339 case FOURCC('h', '2', '6', '3'):
1340 case FOURCC('a', 'v', 'c', '1'):
1341 case FOURCC('h', 'v', 'c', '1'):
1342 case FOURCC('h', 'e', 'v', '1'):
1343 {
1344 mHasVideo = true;
1345
1346 uint8_t buffer[78];
1347 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1348 // Basic VideoSampleEntry size.
1349 return ERROR_MALFORMED;
1350 }
1351
1352 if (mDataSource->readAt(
1353 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1354 return ERROR_IO;
1355 }
1356
1357 uint16_t data_ref_index = U16_AT(&buffer[6]);
1358 uint16_t width = U16_AT(&buffer[6 + 18]);
1359 uint16_t height = U16_AT(&buffer[6 + 20]);
1360
1361 // The video sample is not standard-compliant if it has invalid dimension.
1362 // Use some default width and height value, and
1363 // let the decoder figure out the actual width and height (and thus
1364 // be prepared for INFO_FOMRAT_CHANGED event).
1365 if (width == 0) width = 352;
1366 if (height == 0) height = 288;
1367
1368 // printf("*** coding='%s' width=%d height=%d\n",
1369 // chunk, width, height);
1370
1371 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1372 // if the chunk type is encv, we'll get the type from the sinf/frma box later
1373 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1374 }
1375 mLastTrack->meta->setInt32(kKeyWidth, width);
1376 mLastTrack->meta->setInt32(kKeyHeight, height);
1377
1378 off64_t stop_offset = *offset + chunk_size;
1379 *offset = data_offset + sizeof(buffer);
1380 while (*offset < stop_offset) {
1381 status_t err = parseChunk(offset, depth + 1);
1382 if (err != OK) {
1383 return err;
1384 }
1385 }
1386
1387 if (*offset != stop_offset) {
1388 return ERROR_MALFORMED;
1389 }
1390 break;
1391 }
1392
1393 case FOURCC('s', 't', 'c', 'o'):
1394 case FOURCC('c', 'o', '6', '4'):
1395 {
1396 status_t err =
1397 mLastTrack->sampleTable->setChunkOffsetParams(
1398 chunk_type, data_offset, chunk_data_size);
1399
1400 *offset += chunk_size;
1401
1402 if (err != OK) {
1403 return err;
1404 }
1405
1406 break;
1407 }
1408
1409 case FOURCC('s', 't', 's', 'c'):
1410 {
1411 status_t err =
1412 mLastTrack->sampleTable->setSampleToChunkParams(
1413 data_offset, chunk_data_size);
1414
1415 *offset += chunk_size;
1416
1417 if (err != OK) {
1418 return err;
1419 }
1420
1421 break;
1422 }
1423
1424 case FOURCC('s', 't', 's', 'z'):
1425 case FOURCC('s', 't', 'z', '2'):
1426 {
1427 status_t err =
1428 mLastTrack->sampleTable->setSampleSizeParams(
1429 chunk_type, data_offset, chunk_data_size);
1430
1431 *offset += chunk_size;
1432
1433 if (err != OK) {
1434 return err;
1435 }
1436
1437 size_t max_size;
1438 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1439
1440 if (err != OK) {
1441 return err;
1442 }
1443
1444 if (max_size != 0) {
1445 // Assume that a given buffer only contains at most 10 chunks,
1446 // each chunk originally prefixed with a 2 byte length will
1447 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1448 // and thus will grow by 2 bytes per chunk.
1449 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1450 } else {
1451 // No size was specified. Pick a conservatively large size.
1452 int32_t width, height;
1453 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1454 !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1455 ALOGE("No width or height, assuming worst case 1080p");
1456 width = 1920;
1457 height = 1080;
1458 }
1459
1460 const char *mime;
1461 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1462 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1463 // AVC requires compression ratio of at least 2, and uses
1464 // macroblocks
1465 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1466 } else {
1467 // For all other formats there is no minimum compression
1468 // ratio. Use compression ratio of 1.
1469 max_size = width * height * 3 / 2;
1470 }
1471 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1472 }
1473
1474 // NOTE: setting another piece of metadata invalidates any pointers (such as the
1475 // mimetype) previously obtained, so don't cache them.
1476 const char *mime;
1477 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1478 // Calculate average frame rate.
1479 if (!strncasecmp("video/", mime, 6)) {
1480 size_t nSamples = mLastTrack->sampleTable->countSamples();
1481 int64_t durationUs;
1482 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1483 if (durationUs > 0) {
1484 int32_t frameRate = (nSamples * 1000000LL +
1485 (durationUs >> 1)) / durationUs;
1486 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1487 }
1488 }
1489 }
1490
1491 break;
1492 }
1493
1494 case FOURCC('s', 't', 't', 's'):
1495 {
1496 *offset += chunk_size;
1497
1498 status_t err =
1499 mLastTrack->sampleTable->setTimeToSampleParams(
1500 data_offset, chunk_data_size);
1501
1502 if (err != OK) {
1503 return err;
1504 }
1505
1506 break;
1507 }
1508
1509 case FOURCC('c', 't', 't', 's'):
1510 {
1511 *offset += chunk_size;
1512
1513 status_t err =
1514 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1515 data_offset, chunk_data_size);
1516
1517 if (err != OK) {
1518 return err;
1519 }
1520
1521 break;
1522 }
1523
1524 case FOURCC('s', 't', 's', 's'):
1525 {
1526 *offset += chunk_size;
1527
1528 status_t err =
1529 mLastTrack->sampleTable->setSyncSampleParams(
1530 data_offset, chunk_data_size);
1531
1532 if (err != OK) {
1533 return err;
1534 }
1535
1536 break;
1537 }
1538
1539 // @xyz
1540 case FOURCC('\xA9', 'x', 'y', 'z'):
1541 {
1542 *offset += chunk_size;
1543
1544 // Best case the total data length inside "@xyz" box
1545 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1546 // where "\x00\x04" is the text string length with value = 4,
1547 // "\0x15\xc7" is the language code = en, and "0+0" is a
1548 // location (string) value with longitude = 0 and latitude = 0.
1549 if (chunk_data_size < 8) {
1550 return ERROR_MALFORMED;
1551 }
1552
1553 // Worst case the location string length would be 18,
1554 // for instance +90.0000-180.0000, without the trailing "/" and
1555 // the string length + language code.
1556 char buffer[18];
1557
1558 // Substracting 5 from the data size is because the text string length +
1559 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1560 off64_t location_length = chunk_data_size - 5;
1561 if (location_length >= (off64_t) sizeof(buffer)) {
1562 return ERROR_MALFORMED;
1563 }
1564
1565 if (mDataSource->readAt(
1566 data_offset + 4, buffer, location_length) < location_length) {
1567 return ERROR_IO;
1568 }
1569
1570 buffer[location_length] = '\0';
1571 mFileMetaData->setCString(kKeyLocation, buffer);
1572 break;
1573 }
1574
1575 case FOURCC('e', 's', 'd', 's'):
1576 {
1577 *offset += chunk_size;
1578
1579 if (chunk_data_size < 4) {
1580 return ERROR_MALFORMED;
1581 }
1582
1583 uint8_t buffer[256];
1584 if (chunk_data_size > (off64_t)sizeof(buffer)) {
1585 return ERROR_BUFFER_TOO_SMALL;
1586 }
1587
1588 if (mDataSource->readAt(
1589 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1590 return ERROR_IO;
1591 }
1592
1593 if (U32_AT(buffer) != 0) {
1594 // Should be version 0, flags 0.
1595 return ERROR_MALFORMED;
1596 }
1597
1598 mLastTrack->meta->setData(
1599 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1600
1601 if (mPath.size() >= 2
1602 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1603 // Information from the ESDS must be relied on for proper
1604 // setup of sample rate and channel count for MPEG4 Audio.
1605 // The generic header appears to only contain generic
1606 // information...
1607
1608 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1609 &buffer[4], chunk_data_size - 4);
1610
1611 if (err != OK) {
1612 return err;
1613 }
1614 }
1615
1616 break;
1617 }
1618
1619 case FOURCC('a', 'v', 'c', 'C'):
1620 {
1621 *offset += chunk_size;
1622
1623 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1624
1625 if (mDataSource->readAt(
1626 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1627 return ERROR_IO;
1628 }
1629
1630 mLastTrack->meta->setData(
1631 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1632
1633 break;
1634 }
1635 case FOURCC('h', 'v', 'c', 'C'):
1636 {
1637 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1638
1639 if (mDataSource->readAt(
1640 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1641 return ERROR_IO;
1642 }
1643
1644 mLastTrack->meta->setData(
1645 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1646
1647 *offset += chunk_size;
1648 break;
1649 }
1650
1651 case FOURCC('d', '2', '6', '3'):
1652 {
1653 *offset += chunk_size;
1654 /*
1655 * d263 contains a fixed 7 bytes part:
1656 * vendor - 4 bytes
1657 * version - 1 byte
1658 * level - 1 byte
1659 * profile - 1 byte
1660 * optionally, "d263" box itself may contain a 16-byte
1661 * bit rate box (bitr)
1662 * average bit rate - 4 bytes
1663 * max bit rate - 4 bytes
1664 */
1665 char buffer[23];
1666 if (chunk_data_size != 7 &&
1667 chunk_data_size != 23) {
1668 ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1669 return ERROR_MALFORMED;
1670 }
1671
1672 if (mDataSource->readAt(
1673 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1674 return ERROR_IO;
1675 }
1676
1677 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1678
1679 break;
1680 }
1681
1682 case FOURCC('m', 'e', 't', 'a'):
1683 {
1684 uint8_t buffer[4];
1685 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1686 *offset += chunk_size;
1687 return ERROR_MALFORMED;
1688 }
1689
1690 if (mDataSource->readAt(
1691 data_offset, buffer, 4) < 4) {
1692 *offset += chunk_size;
1693 return ERROR_IO;
1694 }
1695
1696 if (U32_AT(buffer) != 0) {
1697 // Should be version 0, flags 0.
1698
1699 // If it's not, let's assume this is one of those
1700 // apparently malformed chunks that don't have flags
1701 // and completely different semantics than what's
1702 // in the MPEG4 specs and skip it.
1703 *offset += chunk_size;
1704 return OK;
1705 }
1706
1707 off64_t stop_offset = *offset + chunk_size;
1708 *offset = data_offset + sizeof(buffer);
1709 while (*offset < stop_offset) {
1710 status_t err = parseChunk(offset, depth + 1);
1711 if (err != OK) {
1712 return err;
1713 }
1714 }
1715
1716 if (*offset != stop_offset) {
1717 return ERROR_MALFORMED;
1718 }
1719 break;
1720 }
1721
1722 case FOURCC('m', 'e', 'a', 'n'):
1723 case FOURCC('n', 'a', 'm', 'e'):
1724 case FOURCC('d', 'a', 't', 'a'):
1725 {
1726 *offset += chunk_size;
1727
1728 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1729 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1730
1731 if (err != OK) {
1732 return err;
1733 }
1734 }
1735
1736 break;
1737 }
1738
1739 case FOURCC('m', 'v', 'h', 'd'):
1740 {
1741 *offset += chunk_size;
1742
1743 if (chunk_data_size < 32) {
1744 return ERROR_MALFORMED;
1745 }
1746
1747 uint8_t header[32];
1748 if (mDataSource->readAt(
1749 data_offset, header, sizeof(header))
1750 < (ssize_t)sizeof(header)) {
1751 return ERROR_IO;
1752 }
1753
1754 uint64_t creationTime;
1755 uint64_t duration = 0;
1756 if (header[0] == 1) {
1757 creationTime = U64_AT(&header[4]);
1758 mHeaderTimescale = U32_AT(&header[20]);
1759 duration = U64_AT(&header[24]);
1760 if (duration == 0xffffffffffffffff) {
1761 duration = 0;
1762 }
1763 } else if (header[0] != 0) {
1764 return ERROR_MALFORMED;
1765 } else {
1766 creationTime = U32_AT(&header[4]);
1767 mHeaderTimescale = U32_AT(&header[12]);
1768 uint32_t d32 = U32_AT(&header[16]);
1769 if (d32 == 0xffffffff) {
1770 d32 = 0;
1771 }
1772 duration = d32;
1773 }
1774 if (duration != 0) {
1775 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1776 }
1777
1778 String8 s;
1779 convertTimeToDate(creationTime, &s);
1780
1781 mFileMetaData->setCString(kKeyDate, s.string());
1782
1783 break;
1784 }
1785
1786 case FOURCC('m', 'e', 'h', 'd'):
1787 {
1788 *offset += chunk_size;
1789
1790 if (chunk_data_size < 8) {
1791 return ERROR_MALFORMED;
1792 }
1793
1794 uint8_t flags[4];
1795 if (mDataSource->readAt(
1796 data_offset, flags, sizeof(flags))
1797 < (ssize_t)sizeof(flags)) {
1798 return ERROR_IO;
1799 }
1800
1801 uint64_t duration = 0;
1802 if (flags[0] == 1) {
1803 // 64 bit
1804 if (chunk_data_size < 12) {
1805 return ERROR_MALFORMED;
1806 }
1807 mDataSource->getUInt64(data_offset + 4, &duration);
1808 if (duration == 0xffffffffffffffff) {
1809 duration = 0;
1810 }
1811 } else if (flags[0] == 0) {
1812 // 32 bit
1813 uint32_t d32;
1814 mDataSource->getUInt32(data_offset + 4, &d32);
1815 if (d32 == 0xffffffff) {
1816 d32 = 0;
1817 }
1818 duration = d32;
1819 } else {
1820 return ERROR_MALFORMED;
1821 }
1822
1823 if (duration != 0) {
1824 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1825 }
1826
1827 break;
1828 }
1829
1830 case FOURCC('m', 'd', 'a', 't'):
1831 {
1832 ALOGV("mdat chunk, drm: %d", mIsDrm);
1833 if (!mIsDrm) {
1834 *offset += chunk_size;
1835 break;
1836 }
1837
1838 if (chunk_size < 8) {
1839 return ERROR_MALFORMED;
1840 }
1841
1842 return parseDrmSINF(offset, data_offset);
1843 }
1844
1845 case FOURCC('h', 'd', 'l', 'r'):
1846 {
1847 *offset += chunk_size;
1848
1849 uint32_t buffer;
1850 if (mDataSource->readAt(
1851 data_offset + 8, &buffer, 4) < 4) {
1852 return ERROR_IO;
1853 }
1854
1855 uint32_t type = ntohl(buffer);
1856 // For the 3GPP file format, the handler-type within the 'hdlr' box
1857 // shall be 'text'. We also want to support 'sbtl' handler type
1858 // for a practical reason as various MPEG4 containers use it.
1859 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1860 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1861 }
1862
1863 break;
1864 }
1865
1866 case FOURCC('t', 'r', 'e', 'x'):
1867 {
1868 *offset += chunk_size;
1869
1870 if (chunk_data_size < 24) {
1871 return ERROR_IO;
1872 }
1873 uint32_t duration;
1874 Trex trex;
1875 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
1876 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
1877 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
1878 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
1879 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
1880 return ERROR_IO;
1881 }
1882 mTrex.add(trex);
1883 break;
1884 }
1885
1886 case FOURCC('t', 'x', '3', 'g'):
1887 {
1888 uint32_t type;
1889 const void *data;
1890 size_t size = 0;
1891 if (!mLastTrack->meta->findData(
1892 kKeyTextFormatData, &type, &data, &size)) {
1893 size = 0;
1894 }
1895
1896 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
1897 if (buffer == NULL) {
1898 return ERROR_MALFORMED;
1899 }
1900
1901 if (size > 0) {
1902 memcpy(buffer, data, size);
1903 }
1904
1905 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1906 < chunk_size) {
1907 delete[] buffer;
1908 buffer = NULL;
1909
1910 // advance read pointer so we don't end up reading this again
1911 *offset += chunk_size;
1912 return ERROR_IO;
1913 }
1914
1915 mLastTrack->meta->setData(
1916 kKeyTextFormatData, 0, buffer, size + chunk_size);
1917
1918 delete[] buffer;
1919
1920 *offset += chunk_size;
1921 break;
1922 }
1923
1924 case FOURCC('c', 'o', 'v', 'r'):
1925 {
1926 *offset += chunk_size;
1927
1928 if (mFileMetaData != NULL) {
1929 ALOGV("chunk_data_size = %lld and data_offset = %lld",
1930 chunk_data_size, data_offset);
1931 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1932 if (mDataSource->readAt(
1933 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1934 return ERROR_IO;
1935 }
1936 const int kSkipBytesOfDataBox = 16;
1937 mFileMetaData->setData(
1938 kKeyAlbumArt, MetaData::TYPE_NONE,
1939 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1940 }
1941
1942 break;
1943 }
1944
1945 case FOURCC('t', 'i', 't', 'l'):
1946 case FOURCC('p', 'e', 'r', 'f'):
1947 case FOURCC('a', 'u', 't', 'h'):
1948 case FOURCC('g', 'n', 'r', 'e'):
1949 case FOURCC('a', 'l', 'b', 'm'):
1950 case FOURCC('y', 'r', 'r', 'c'):
1951 {
1952 *offset += chunk_size;
1953
1954 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1955
1956 if (err != OK) {
1957 return err;
1958 }
1959
1960 break;
1961 }
1962
1963 case FOURCC('I', 'D', '3', '2'):
1964 {
1965 *offset += chunk_size;
1966
1967 if (chunk_data_size < 6) {
1968 return ERROR_MALFORMED;
1969 }
1970
1971 parseID3v2MetaData(data_offset + 6);
1972
1973 break;
1974 }
1975
1976 case FOURCC('-', '-', '-', '-'):
1977 {
1978 mLastCommentMean.clear();
1979 mLastCommentName.clear();
1980 mLastCommentData.clear();
1981 *offset += chunk_size;
1982 break;
1983 }
1984
1985 case FOURCC('s', 'i', 'd', 'x'):
1986 {
1987 parseSegmentIndex(data_offset, chunk_data_size);
1988 *offset += chunk_size;
1989 return UNKNOWN_ERROR; // stop parsing after sidx
1990 }
1991
1992 default:
1993 {
1994 *offset += chunk_size;
1995 break;
1996 }
1997 }
1998
1999 return OK;
2000 }
2001
parseSegmentIndex(off64_t offset,size_t size)2002 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2003 ALOGV("MPEG4Extractor::parseSegmentIndex");
2004
2005 if (size < 12) {
2006 return -EINVAL;
2007 }
2008
2009 uint32_t flags;
2010 if (!mDataSource->getUInt32(offset, &flags)) {
2011 return ERROR_MALFORMED;
2012 }
2013
2014 uint32_t version = flags >> 24;
2015 flags &= 0xffffff;
2016
2017 ALOGV("sidx version %d", version);
2018
2019 uint32_t referenceId;
2020 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2021 return ERROR_MALFORMED;
2022 }
2023
2024 uint32_t timeScale;
2025 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2026 return ERROR_MALFORMED;
2027 }
2028 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2029
2030 uint64_t earliestPresentationTime;
2031 uint64_t firstOffset;
2032
2033 offset += 12;
2034 size -= 12;
2035
2036 if (version == 0) {
2037 if (size < 8) {
2038 return -EINVAL;
2039 }
2040 uint32_t tmp;
2041 if (!mDataSource->getUInt32(offset, &tmp)) {
2042 return ERROR_MALFORMED;
2043 }
2044 earliestPresentationTime = tmp;
2045 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2046 return ERROR_MALFORMED;
2047 }
2048 firstOffset = tmp;
2049 offset += 8;
2050 size -= 8;
2051 } else {
2052 if (size < 16) {
2053 return -EINVAL;
2054 }
2055 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2056 return ERROR_MALFORMED;
2057 }
2058 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2059 return ERROR_MALFORMED;
2060 }
2061 offset += 16;
2062 size -= 16;
2063 }
2064 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2065
2066 if (size < 4) {
2067 return -EINVAL;
2068 }
2069
2070 uint16_t referenceCount;
2071 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2072 return ERROR_MALFORMED;
2073 }
2074 offset += 4;
2075 size -= 4;
2076 ALOGV("refcount: %d", referenceCount);
2077
2078 if (size < referenceCount * 12) {
2079 return -EINVAL;
2080 }
2081
2082 uint64_t total_duration = 0;
2083 for (unsigned int i = 0; i < referenceCount; i++) {
2084 uint32_t d1, d2, d3;
2085
2086 if (!mDataSource->getUInt32(offset, &d1) || // size
2087 !mDataSource->getUInt32(offset + 4, &d2) || // duration
2088 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2089 return ERROR_MALFORMED;
2090 }
2091
2092 if (d1 & 0x80000000) {
2093 ALOGW("sub-sidx boxes not supported yet");
2094 }
2095 bool sap = d3 & 0x80000000;
2096 uint32_t saptype = (d3 >> 28) & 7;
2097 if (!sap || (saptype != 1 && saptype != 2)) {
2098 // type 1 and 2 are sync samples
2099 ALOGW("not a stream access point, or unsupported type: %08x", d3);
2100 }
2101 total_duration += d2;
2102 offset += 12;
2103 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2104 SidxEntry se;
2105 se.mSize = d1 & 0x7fffffff;
2106 se.mDurationUs = 1000000LL * d2 / timeScale;
2107 mSidxEntries.add(se);
2108 }
2109
2110 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2111
2112 int64_t metaDuration;
2113 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2114 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2115 }
2116 return OK;
2117 }
2118
2119
2120
parseTrackHeader(off64_t data_offset,off64_t data_size)2121 status_t MPEG4Extractor::parseTrackHeader(
2122 off64_t data_offset, off64_t data_size) {
2123 if (data_size < 4) {
2124 return ERROR_MALFORMED;
2125 }
2126
2127 uint8_t version;
2128 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2129 return ERROR_IO;
2130 }
2131
2132 size_t dynSize = (version == 1) ? 36 : 24;
2133
2134 uint8_t buffer[36 + 60];
2135
2136 if (data_size != (off64_t)dynSize + 60) {
2137 return ERROR_MALFORMED;
2138 }
2139
2140 if (mDataSource->readAt(
2141 data_offset, buffer, data_size) < (ssize_t)data_size) {
2142 return ERROR_IO;
2143 }
2144
2145 uint64_t ctime, mtime, duration;
2146 int32_t id;
2147
2148 if (version == 1) {
2149 ctime = U64_AT(&buffer[4]);
2150 mtime = U64_AT(&buffer[12]);
2151 id = U32_AT(&buffer[20]);
2152 duration = U64_AT(&buffer[28]);
2153 } else if (version == 0) {
2154 ctime = U32_AT(&buffer[4]);
2155 mtime = U32_AT(&buffer[8]);
2156 id = U32_AT(&buffer[12]);
2157 duration = U32_AT(&buffer[20]);
2158 } else {
2159 return ERROR_UNSUPPORTED;
2160 }
2161
2162 mLastTrack->meta->setInt32(kKeyTrackID, id);
2163
2164 size_t matrixOffset = dynSize + 16;
2165 int32_t a00 = U32_AT(&buffer[matrixOffset]);
2166 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2167 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2168 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2169 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2170 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2171
2172 #if 0
2173 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2174 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2175 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2176 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2177 #endif
2178
2179 uint32_t rotationDegrees;
2180
2181 static const int32_t kFixedOne = 0x10000;
2182 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2183 // Identity, no rotation
2184 rotationDegrees = 0;
2185 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2186 rotationDegrees = 90;
2187 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2188 rotationDegrees = 270;
2189 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2190 rotationDegrees = 180;
2191 } else {
2192 ALOGW("We only support 0,90,180,270 degree rotation matrices");
2193 rotationDegrees = 0;
2194 }
2195
2196 if (rotationDegrees != 0) {
2197 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2198 }
2199
2200 // Handle presentation display size, which could be different
2201 // from the image size indicated by kKeyWidth and kKeyHeight.
2202 uint32_t width = U32_AT(&buffer[dynSize + 52]);
2203 uint32_t height = U32_AT(&buffer[dynSize + 56]);
2204 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2205 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2206
2207 return OK;
2208 }
2209
parseITunesMetaData(off64_t offset,size_t size)2210 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2211 if (size < 4) {
2212 return ERROR_MALFORMED;
2213 }
2214
2215 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2216 if (buffer == NULL) {
2217 return ERROR_MALFORMED;
2218 }
2219 if (mDataSource->readAt(
2220 offset, buffer, size) != (ssize_t)size) {
2221 delete[] buffer;
2222 buffer = NULL;
2223
2224 return ERROR_IO;
2225 }
2226
2227 uint32_t flags = U32_AT(buffer);
2228
2229 uint32_t metadataKey = 0;
2230 char chunk[5];
2231 MakeFourCCString(mPath[4], chunk);
2232 ALOGV("meta: %s @ %lld", chunk, offset);
2233 switch (mPath[4]) {
2234 case FOURCC(0xa9, 'a', 'l', 'b'):
2235 {
2236 metadataKey = kKeyAlbum;
2237 break;
2238 }
2239 case FOURCC(0xa9, 'A', 'R', 'T'):
2240 {
2241 metadataKey = kKeyArtist;
2242 break;
2243 }
2244 case FOURCC('a', 'A', 'R', 'T'):
2245 {
2246 metadataKey = kKeyAlbumArtist;
2247 break;
2248 }
2249 case FOURCC(0xa9, 'd', 'a', 'y'):
2250 {
2251 metadataKey = kKeyYear;
2252 break;
2253 }
2254 case FOURCC(0xa9, 'n', 'a', 'm'):
2255 {
2256 metadataKey = kKeyTitle;
2257 break;
2258 }
2259 case FOURCC(0xa9, 'w', 'r', 't'):
2260 {
2261 metadataKey = kKeyWriter;
2262 break;
2263 }
2264 case FOURCC('c', 'o', 'v', 'r'):
2265 {
2266 metadataKey = kKeyAlbumArt;
2267 break;
2268 }
2269 case FOURCC('g', 'n', 'r', 'e'):
2270 {
2271 metadataKey = kKeyGenre;
2272 break;
2273 }
2274 case FOURCC(0xa9, 'g', 'e', 'n'):
2275 {
2276 metadataKey = kKeyGenre;
2277 break;
2278 }
2279 case FOURCC('c', 'p', 'i', 'l'):
2280 {
2281 if (size == 9 && flags == 21) {
2282 char tmp[16];
2283 sprintf(tmp, "%d",
2284 (int)buffer[size - 1]);
2285
2286 mFileMetaData->setCString(kKeyCompilation, tmp);
2287 }
2288 break;
2289 }
2290 case FOURCC('t', 'r', 'k', 'n'):
2291 {
2292 if (size == 16 && flags == 0) {
2293 char tmp[16];
2294 uint16_t* pTrack = (uint16_t*)&buffer[10];
2295 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2296 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2297
2298 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2299 }
2300 break;
2301 }
2302 case FOURCC('d', 'i', 's', 'k'):
2303 {
2304 if ((size == 14 || size == 16) && flags == 0) {
2305 char tmp[16];
2306 uint16_t* pDisc = (uint16_t*)&buffer[10];
2307 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2308 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2309
2310 mFileMetaData->setCString(kKeyDiscNumber, tmp);
2311 }
2312 break;
2313 }
2314 case FOURCC('-', '-', '-', '-'):
2315 {
2316 buffer[size] = '\0';
2317 switch (mPath[5]) {
2318 case FOURCC('m', 'e', 'a', 'n'):
2319 mLastCommentMean.setTo((const char *)buffer + 4);
2320 break;
2321 case FOURCC('n', 'a', 'm', 'e'):
2322 mLastCommentName.setTo((const char *)buffer + 4);
2323 break;
2324 case FOURCC('d', 'a', 't', 'a'):
2325 mLastCommentData.setTo((const char *)buffer + 8);
2326 break;
2327 }
2328
2329 // Once we have a set of mean/name/data info, go ahead and process
2330 // it to see if its something we are interested in. Whether or not
2331 // were are interested in the specific tag, make sure to clear out
2332 // the set so we can be ready to process another tuple should one
2333 // show up later in the file.
2334 if ((mLastCommentMean.length() != 0) &&
2335 (mLastCommentName.length() != 0) &&
2336 (mLastCommentData.length() != 0)) {
2337
2338 if (mLastCommentMean == "com.apple.iTunes"
2339 && mLastCommentName == "iTunSMPB") {
2340 int32_t delay, padding;
2341 if (sscanf(mLastCommentData,
2342 " %*x %x %x %*x", &delay, &padding) == 2) {
2343 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2344 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2345 }
2346 }
2347
2348 mLastCommentMean.clear();
2349 mLastCommentName.clear();
2350 mLastCommentData.clear();
2351 }
2352 break;
2353 }
2354
2355 default:
2356 break;
2357 }
2358
2359 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2360 if (metadataKey == kKeyAlbumArt) {
2361 mFileMetaData->setData(
2362 kKeyAlbumArt, MetaData::TYPE_NONE,
2363 buffer + 8, size - 8);
2364 } else if (metadataKey == kKeyGenre) {
2365 if (flags == 0) {
2366 // uint8_t genre code, iTunes genre codes are
2367 // the standard id3 codes, except they start
2368 // at 1 instead of 0 (e.g. Pop is 14, not 13)
2369 // We use standard id3 numbering, so subtract 1.
2370 int genrecode = (int)buffer[size - 1];
2371 genrecode--;
2372 if (genrecode < 0) {
2373 genrecode = 255; // reserved for 'unknown genre'
2374 }
2375 char genre[10];
2376 sprintf(genre, "%d", genrecode);
2377
2378 mFileMetaData->setCString(metadataKey, genre);
2379 } else if (flags == 1) {
2380 // custom genre string
2381 buffer[size] = '\0';
2382
2383 mFileMetaData->setCString(
2384 metadataKey, (const char *)buffer + 8);
2385 }
2386 } else {
2387 buffer[size] = '\0';
2388
2389 mFileMetaData->setCString(
2390 metadataKey, (const char *)buffer + 8);
2391 }
2392 }
2393
2394 delete[] buffer;
2395 buffer = NULL;
2396
2397 return OK;
2398 }
2399
parse3GPPMetaData(off64_t offset,size_t size,int depth)2400 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2401 if (size < 4) {
2402 return ERROR_MALFORMED;
2403 }
2404
2405 uint8_t *buffer = new (std::nothrow) uint8_t[size];
2406 if (buffer == NULL) {
2407 return ERROR_MALFORMED;
2408 }
2409 if (mDataSource->readAt(
2410 offset, buffer, size) != (ssize_t)size) {
2411 delete[] buffer;
2412 buffer = NULL;
2413
2414 return ERROR_IO;
2415 }
2416
2417 uint32_t metadataKey = 0;
2418 switch (mPath[depth]) {
2419 case FOURCC('t', 'i', 't', 'l'):
2420 {
2421 metadataKey = kKeyTitle;
2422 break;
2423 }
2424 case FOURCC('p', 'e', 'r', 'f'):
2425 {
2426 metadataKey = kKeyArtist;
2427 break;
2428 }
2429 case FOURCC('a', 'u', 't', 'h'):
2430 {
2431 metadataKey = kKeyWriter;
2432 break;
2433 }
2434 case FOURCC('g', 'n', 'r', 'e'):
2435 {
2436 metadataKey = kKeyGenre;
2437 break;
2438 }
2439 case FOURCC('a', 'l', 'b', 'm'):
2440 {
2441 if (buffer[size - 1] != '\0') {
2442 char tmp[4];
2443 sprintf(tmp, "%u", buffer[size - 1]);
2444
2445 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2446 }
2447
2448 metadataKey = kKeyAlbum;
2449 break;
2450 }
2451 case FOURCC('y', 'r', 'r', 'c'):
2452 {
2453 char tmp[5];
2454 uint16_t year = U16_AT(&buffer[4]);
2455
2456 if (year < 10000) {
2457 sprintf(tmp, "%u", year);
2458
2459 mFileMetaData->setCString(kKeyYear, tmp);
2460 }
2461 break;
2462 }
2463
2464 default:
2465 break;
2466 }
2467
2468 if (metadataKey > 0) {
2469 bool isUTF8 = true; // Common case
2470 char16_t *framedata = NULL;
2471 int len16 = 0; // Number of UTF-16 characters
2472
2473 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2474 if (size - 6 >= 4) {
2475 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2476 framedata = (char16_t *)(buffer + 6);
2477 if (0xfffe == *framedata) {
2478 // endianness marker (BOM) doesn't match host endianness
2479 for (int i = 0; i < len16; i++) {
2480 framedata[i] = bswap_16(framedata[i]);
2481 }
2482 // BOM is now swapped to 0xfeff, we will execute next block too
2483 }
2484
2485 if (0xfeff == *framedata) {
2486 // Remove the BOM
2487 framedata++;
2488 len16--;
2489 isUTF8 = false;
2490 }
2491 // else normal non-zero-length UTF-8 string
2492 // we can't handle UTF-16 without BOM as there is no other
2493 // indication of encoding.
2494 }
2495
2496 if (isUTF8) {
2497 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2498 } else {
2499 // Convert from UTF-16 string to UTF-8 string.
2500 String8 tmpUTF8str(framedata, len16);
2501 mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2502 }
2503 }
2504
2505 delete[] buffer;
2506 buffer = NULL;
2507
2508 return OK;
2509 }
2510
parseID3v2MetaData(off64_t offset)2511 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2512 ID3 id3(mDataSource, true /* ignorev1 */, offset);
2513
2514 if (id3.isValid()) {
2515 struct Map {
2516 int key;
2517 const char *tag1;
2518 const char *tag2;
2519 };
2520 static const Map kMap[] = {
2521 { kKeyAlbum, "TALB", "TAL" },
2522 { kKeyArtist, "TPE1", "TP1" },
2523 { kKeyAlbumArtist, "TPE2", "TP2" },
2524 { kKeyComposer, "TCOM", "TCM" },
2525 { kKeyGenre, "TCON", "TCO" },
2526 { kKeyTitle, "TIT2", "TT2" },
2527 { kKeyYear, "TYE", "TYER" },
2528 { kKeyAuthor, "TXT", "TEXT" },
2529 { kKeyCDTrackNumber, "TRK", "TRCK" },
2530 { kKeyDiscNumber, "TPA", "TPOS" },
2531 { kKeyCompilation, "TCP", "TCMP" },
2532 };
2533 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2534
2535 for (size_t i = 0; i < kNumMapEntries; ++i) {
2536 if (!mFileMetaData->hasData(kMap[i].key)) {
2537 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2538 if (it->done()) {
2539 delete it;
2540 it = new ID3::Iterator(id3, kMap[i].tag2);
2541 }
2542
2543 if (it->done()) {
2544 delete it;
2545 continue;
2546 }
2547
2548 String8 s;
2549 it->getString(&s);
2550 delete it;
2551
2552 mFileMetaData->setCString(kMap[i].key, s);
2553 }
2554 }
2555
2556 size_t dataSize;
2557 String8 mime;
2558 const void *data = id3.getAlbumArt(&dataSize, &mime);
2559
2560 if (data) {
2561 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2562 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2563 }
2564 }
2565 }
2566
getTrack(size_t index)2567 sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2568 status_t err;
2569 if ((err = readMetaData()) != OK) {
2570 return NULL;
2571 }
2572
2573 Track *track = mFirstTrack;
2574 while (index > 0) {
2575 if (track == NULL) {
2576 return NULL;
2577 }
2578
2579 track = track->next;
2580 --index;
2581 }
2582
2583 if (track == NULL) {
2584 return NULL;
2585 }
2586
2587
2588 Trex *trex = NULL;
2589 int32_t trackId;
2590 if (track->meta->findInt32(kKeyTrackID, &trackId)) {
2591 for (size_t i = 0; i < mTrex.size(); i++) {
2592 Trex *t = &mTrex.editItemAt(index);
2593 if (t->track_ID == (uint32_t) trackId) {
2594 trex = t;
2595 break;
2596 }
2597 }
2598 }
2599
2600 ALOGV("getTrack called, pssh: %zu", mPssh.size());
2601
2602 return new MPEG4Source(this,
2603 track->meta, mDataSource, track->timescale, track->sampleTable,
2604 mSidxEntries, trex, mMoofOffset);
2605 }
2606
2607 // static
verifyTrack(Track * track)2608 status_t MPEG4Extractor::verifyTrack(Track *track) {
2609 const char *mime;
2610 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2611
2612 uint32_t type;
2613 const void *data;
2614 size_t size;
2615 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2616 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2617 || type != kTypeAVCC) {
2618 return ERROR_MALFORMED;
2619 }
2620 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2621 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2622 || type != kTypeHVCC) {
2623 return ERROR_MALFORMED;
2624 }
2625 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2626 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2627 if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2628 || type != kTypeESDS) {
2629 return ERROR_MALFORMED;
2630 }
2631 }
2632
2633 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2634 // Make sure we have all the metadata we need.
2635 ALOGE("stbl atom missing/invalid.");
2636 return ERROR_MALFORMED;
2637 }
2638
2639 return OK;
2640 }
2641
2642 typedef enum {
2643 //AOT_NONE = -1,
2644 //AOT_NULL_OBJECT = 0,
2645 //AOT_AAC_MAIN = 1, /**< Main profile */
2646 AOT_AAC_LC = 2, /**< Low Complexity object */
2647 //AOT_AAC_SSR = 3,
2648 //AOT_AAC_LTP = 4,
2649 AOT_SBR = 5,
2650 //AOT_AAC_SCAL = 6,
2651 //AOT_TWIN_VQ = 7,
2652 //AOT_CELP = 8,
2653 //AOT_HVXC = 9,
2654 //AOT_RSVD_10 = 10, /**< (reserved) */
2655 //AOT_RSVD_11 = 11, /**< (reserved) */
2656 //AOT_TTSI = 12, /**< TTSI Object */
2657 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
2658 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
2659 //AOT_GEN_MIDI = 15, /**< General MIDI object */
2660 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2661 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
2662 //AOT_RSVD_18 = 18, /**< (reserved) */
2663 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
2664 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
2665 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
2666 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
2667 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
2668 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
2669 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
2670 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
2671 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
2672 //AOT_RSVD_28 = 28, /**< might become SSC */
2673 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
2674 //AOT_MPEGS = 30, /**< MPEG Surround */
2675
2676 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
2677
2678 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
2679 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
2680 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
2681 //AOT_RSVD_35 = 35, /**< might become DST */
2682 //AOT_RSVD_36 = 36, /**< might become ALS */
2683 //AOT_AAC_SLS = 37, /**< AAC + SLS */
2684 //AOT_SLS = 38, /**< SLS */
2685 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
2686
2687 //AOT_USAC = 42, /**< USAC */
2688 //AOT_SAOC = 43, /**< SAOC */
2689 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
2690
2691 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
2692 } AUDIO_OBJECT_TYPE;
2693
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)2694 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2695 const void *esds_data, size_t esds_size) {
2696 ESDS esds(esds_data, esds_size);
2697
2698 uint8_t objectTypeIndication;
2699 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2700 return ERROR_MALFORMED;
2701 }
2702
2703 if (objectTypeIndication == 0xe1) {
2704 // This isn't MPEG4 audio at all, it's QCELP 14k...
2705 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2706 return OK;
2707 }
2708
2709 if (objectTypeIndication == 0x6b) {
2710 // The media subtype is MP3 audio
2711 // Our software MP3 audio decoder may not be able to handle
2712 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2713 ALOGE("MP3 track in MP4/3GPP file is not supported");
2714 return ERROR_UNSUPPORTED;
2715 }
2716
2717 const uint8_t *csd;
2718 size_t csd_size;
2719 if (esds.getCodecSpecificInfo(
2720 (const void **)&csd, &csd_size) != OK) {
2721 return ERROR_MALFORMED;
2722 }
2723
2724 #if 0
2725 printf("ESD of size %d\n", csd_size);
2726 hexdump(csd, csd_size);
2727 #endif
2728
2729 if (csd_size == 0) {
2730 // There's no further information, i.e. no codec specific data
2731 // Let's assume that the information provided in the mpeg4 headers
2732 // is accurate and hope for the best.
2733
2734 return OK;
2735 }
2736
2737 if (csd_size < 2) {
2738 return ERROR_MALFORMED;
2739 }
2740
2741 static uint32_t kSamplingRate[] = {
2742 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2743 16000, 12000, 11025, 8000, 7350
2744 };
2745
2746 ABitReader br(csd, csd_size);
2747 uint32_t objectType = br.getBits(5);
2748
2749 if (objectType == 31) { // AAC-ELD => additional 6 bits
2750 objectType = 32 + br.getBits(6);
2751 }
2752
2753 //keep AOT type
2754 mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2755
2756 uint32_t freqIndex = br.getBits(4);
2757
2758 int32_t sampleRate = 0;
2759 int32_t numChannels = 0;
2760 if (freqIndex == 15) {
2761 if (csd_size < 5) {
2762 return ERROR_MALFORMED;
2763 }
2764 sampleRate = br.getBits(24);
2765 numChannels = br.getBits(4);
2766 } else {
2767 numChannels = br.getBits(4);
2768
2769 if (freqIndex == 13 || freqIndex == 14) {
2770 return ERROR_MALFORMED;
2771 }
2772
2773 sampleRate = kSamplingRate[freqIndex];
2774 }
2775
2776 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2777 uint32_t extFreqIndex = br.getBits(4);
2778 int32_t extSampleRate;
2779 if (extFreqIndex == 15) {
2780 if (csd_size < 8) {
2781 return ERROR_MALFORMED;
2782 }
2783 extSampleRate = br.getBits(24);
2784 } else {
2785 if (extFreqIndex == 13 || extFreqIndex == 14) {
2786 return ERROR_MALFORMED;
2787 }
2788 extSampleRate = kSamplingRate[extFreqIndex];
2789 }
2790 //TODO: save the extension sampling rate value in meta data =>
2791 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2792 }
2793
2794 switch (numChannels) {
2795 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2796 case 0:
2797 case 1:// FC
2798 case 2:// FL FR
2799 case 3:// FC, FL FR
2800 case 4:// FC, FL FR, RC
2801 case 5:// FC, FL FR, SL SR
2802 case 6:// FC, FL FR, SL SR, LFE
2803 //numChannels already contains the right value
2804 break;
2805 case 11:// FC, FL FR, SL SR, RC, LFE
2806 numChannels = 7;
2807 break;
2808 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2809 case 12:// FC, FL FR, SL SR, RL RR, LFE
2810 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
2811 numChannels = 8;
2812 break;
2813 default:
2814 return ERROR_UNSUPPORTED;
2815 }
2816
2817 {
2818 if (objectType == AOT_SBR || objectType == AOT_PS) {
2819 objectType = br.getBits(5);
2820
2821 if (objectType == AOT_ESCAPE) {
2822 objectType = 32 + br.getBits(6);
2823 }
2824 }
2825 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2826 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2827 objectType == AOT_ER_BSAC) {
2828 const int32_t frameLengthFlag = br.getBits(1);
2829
2830 const int32_t dependsOnCoreCoder = br.getBits(1);
2831
2832 if (dependsOnCoreCoder ) {
2833 const int32_t coreCoderDelay = br.getBits(14);
2834 }
2835
2836 int32_t extensionFlag = -1;
2837 if (br.numBitsLeft() > 0) {
2838 extensionFlag = br.getBits(1);
2839 } else {
2840 switch (objectType) {
2841 // 14496-3 4.5.1.1 extensionFlag
2842 case AOT_AAC_LC:
2843 extensionFlag = 0;
2844 break;
2845 case AOT_ER_AAC_LC:
2846 case AOT_ER_AAC_SCAL:
2847 case AOT_ER_BSAC:
2848 case AOT_ER_AAC_LD:
2849 extensionFlag = 1;
2850 break;
2851 default:
2852 TRESPASS();
2853 break;
2854 }
2855 ALOGW("csd missing extension flag; assuming %d for object type %u.",
2856 extensionFlag, objectType);
2857 }
2858
2859 if (numChannels == 0) {
2860 int32_t channelsEffectiveNum = 0;
2861 int32_t channelsNum = 0;
2862 const int32_t ElementInstanceTag = br.getBits(4);
2863 const int32_t Profile = br.getBits(2);
2864 const int32_t SamplingFrequencyIndex = br.getBits(4);
2865 const int32_t NumFrontChannelElements = br.getBits(4);
2866 const int32_t NumSideChannelElements = br.getBits(4);
2867 const int32_t NumBackChannelElements = br.getBits(4);
2868 const int32_t NumLfeChannelElements = br.getBits(2);
2869 const int32_t NumAssocDataElements = br.getBits(3);
2870 const int32_t NumValidCcElements = br.getBits(4);
2871
2872 const int32_t MonoMixdownPresent = br.getBits(1);
2873 if (MonoMixdownPresent != 0) {
2874 const int32_t MonoMixdownElementNumber = br.getBits(4);
2875 }
2876
2877 const int32_t StereoMixdownPresent = br.getBits(1);
2878 if (StereoMixdownPresent != 0) {
2879 const int32_t StereoMixdownElementNumber = br.getBits(4);
2880 }
2881
2882 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2883 if (MatrixMixdownIndexPresent != 0) {
2884 const int32_t MatrixMixdownIndex = br.getBits(2);
2885 const int32_t PseudoSurroundEnable = br.getBits(1);
2886 }
2887
2888 int i;
2889 for (i=0; i < NumFrontChannelElements; i++) {
2890 const int32_t FrontElementIsCpe = br.getBits(1);
2891 const int32_t FrontElementTagSelect = br.getBits(4);
2892 channelsNum += FrontElementIsCpe ? 2 : 1;
2893 }
2894
2895 for (i=0; i < NumSideChannelElements; i++) {
2896 const int32_t SideElementIsCpe = br.getBits(1);
2897 const int32_t SideElementTagSelect = br.getBits(4);
2898 channelsNum += SideElementIsCpe ? 2 : 1;
2899 }
2900
2901 for (i=0; i < NumBackChannelElements; i++) {
2902 const int32_t BackElementIsCpe = br.getBits(1);
2903 const int32_t BackElementTagSelect = br.getBits(4);
2904 channelsNum += BackElementIsCpe ? 2 : 1;
2905 }
2906 channelsEffectiveNum = channelsNum;
2907
2908 for (i=0; i < NumLfeChannelElements; i++) {
2909 const int32_t LfeElementTagSelect = br.getBits(4);
2910 channelsNum += 1;
2911 }
2912 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2913 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2914 numChannels = channelsNum;
2915 }
2916 }
2917 }
2918
2919 if (numChannels == 0) {
2920 return ERROR_UNSUPPORTED;
2921 }
2922
2923 int32_t prevSampleRate;
2924 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2925
2926 if (prevSampleRate != sampleRate) {
2927 ALOGV("mpeg4 audio sample rate different from previous setting. "
2928 "was: %d, now: %d", prevSampleRate, sampleRate);
2929 }
2930
2931 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2932
2933 int32_t prevChannelCount;
2934 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2935
2936 if (prevChannelCount != numChannels) {
2937 ALOGV("mpeg4 audio channel count different from previous setting. "
2938 "was: %d, now: %d", prevChannelCount, numChannels);
2939 }
2940
2941 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2942
2943 return OK;
2944 }
2945
2946 ////////////////////////////////////////////////////////////////////////////////
2947
MPEG4Source(const sp<MPEG4Extractor> & owner,const sp<MetaData> & format,const sp<DataSource> & dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset)2948 MPEG4Source::MPEG4Source(
2949 const sp<MPEG4Extractor> &owner,
2950 const sp<MetaData> &format,
2951 const sp<DataSource> &dataSource,
2952 int32_t timeScale,
2953 const sp<SampleTable> &sampleTable,
2954 Vector<SidxEntry> &sidx,
2955 const Trex *trex,
2956 off64_t firstMoofOffset)
2957 : mOwner(owner),
2958 mFormat(format),
2959 mDataSource(dataSource),
2960 mTimescale(timeScale),
2961 mSampleTable(sampleTable),
2962 mCurrentSampleIndex(0),
2963 mCurrentFragmentIndex(0),
2964 mSegments(sidx),
2965 mTrex(trex),
2966 mFirstMoofOffset(firstMoofOffset),
2967 mCurrentMoofOffset(firstMoofOffset),
2968 mCurrentTime(0),
2969 mCurrentSampleInfoAllocSize(0),
2970 mCurrentSampleInfoSizes(NULL),
2971 mCurrentSampleInfoOffsetsAllocSize(0),
2972 mCurrentSampleInfoOffsets(NULL),
2973 mIsAVC(false),
2974 mIsHEVC(false),
2975 mNALLengthSize(0),
2976 mStarted(false),
2977 mGroup(NULL),
2978 mBuffer(NULL),
2979 mWantsNALFragments(false),
2980 mSrcBuffer(NULL) {
2981
2982 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
2983
2984 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2985 mDefaultIVSize = 0;
2986 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2987 uint32_t keytype;
2988 const void *key;
2989 size_t keysize;
2990 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2991 CHECK(keysize <= 16);
2992 memset(mCryptoKey, 0, 16);
2993 memcpy(mCryptoKey, key, keysize);
2994 }
2995
2996 const char *mime;
2997 bool success = mFormat->findCString(kKeyMIMEType, &mime);
2998 CHECK(success);
2999
3000 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3001 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3002
3003 if (mIsAVC) {
3004 uint32_t type;
3005 const void *data;
3006 size_t size;
3007 CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3008
3009 const uint8_t *ptr = (const uint8_t *)data;
3010
3011 CHECK(size >= 7);
3012 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3013
3014 // The number of bytes used to encode the length of a NAL unit.
3015 mNALLengthSize = 1 + (ptr[4] & 3);
3016 } else if (mIsHEVC) {
3017 uint32_t type;
3018 const void *data;
3019 size_t size;
3020 CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3021
3022 const uint8_t *ptr = (const uint8_t *)data;
3023
3024 CHECK(size >= 7);
3025 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3026
3027 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3028 }
3029
3030 CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3031
3032 if (mFirstMoofOffset != 0) {
3033 off64_t offset = mFirstMoofOffset;
3034 parseChunk(&offset);
3035 }
3036 }
3037
~MPEG4Source()3038 MPEG4Source::~MPEG4Source() {
3039 if (mStarted) {
3040 stop();
3041 }
3042 free(mCurrentSampleInfoSizes);
3043 free(mCurrentSampleInfoOffsets);
3044 }
3045
start(MetaData * params)3046 status_t MPEG4Source::start(MetaData *params) {
3047 Mutex::Autolock autoLock(mLock);
3048
3049 CHECK(!mStarted);
3050
3051 int32_t val;
3052 if (params && params->findInt32(kKeyWantsNALFragments, &val)
3053 && val != 0) {
3054 mWantsNALFragments = true;
3055 } else {
3056 mWantsNALFragments = false;
3057 }
3058
3059 mGroup = new MediaBufferGroup;
3060
3061 int32_t max_size;
3062 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
3063
3064 mGroup->add_buffer(new MediaBuffer(max_size));
3065
3066 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3067 if (mSrcBuffer == NULL) {
3068 // file probably specified a bad max size
3069 return ERROR_MALFORMED;
3070 }
3071
3072 mStarted = true;
3073
3074 return OK;
3075 }
3076
stop()3077 status_t MPEG4Source::stop() {
3078 Mutex::Autolock autoLock(mLock);
3079
3080 CHECK(mStarted);
3081
3082 if (mBuffer != NULL) {
3083 mBuffer->release();
3084 mBuffer = NULL;
3085 }
3086
3087 delete[] mSrcBuffer;
3088 mSrcBuffer = NULL;
3089
3090 delete mGroup;
3091 mGroup = NULL;
3092
3093 mStarted = false;
3094 mCurrentSampleIndex = 0;
3095
3096 return OK;
3097 }
3098
parseChunk(off64_t * offset)3099 status_t MPEG4Source::parseChunk(off64_t *offset) {
3100 uint32_t hdr[2];
3101 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3102 return ERROR_IO;
3103 }
3104 uint64_t chunk_size = ntohl(hdr[0]);
3105 uint32_t chunk_type = ntohl(hdr[1]);
3106 off64_t data_offset = *offset + 8;
3107
3108 if (chunk_size == 1) {
3109 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3110 return ERROR_IO;
3111 }
3112 chunk_size = ntoh64(chunk_size);
3113 data_offset += 8;
3114
3115 if (chunk_size < 16) {
3116 // The smallest valid chunk is 16 bytes long in this case.
3117 return ERROR_MALFORMED;
3118 }
3119 } else if (chunk_size < 8) {
3120 // The smallest valid chunk is 8 bytes long.
3121 return ERROR_MALFORMED;
3122 }
3123
3124 char chunk[5];
3125 MakeFourCCString(chunk_type, chunk);
3126 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
3127
3128 off64_t chunk_data_size = *offset + chunk_size - data_offset;
3129
3130 switch(chunk_type) {
3131
3132 case FOURCC('t', 'r', 'a', 'f'):
3133 case FOURCC('m', 'o', 'o', 'f'): {
3134 off64_t stop_offset = *offset + chunk_size;
3135 *offset = data_offset;
3136 while (*offset < stop_offset) {
3137 status_t err = parseChunk(offset);
3138 if (err != OK) {
3139 return err;
3140 }
3141 }
3142 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3143 // *offset points to the box following this moof. Find the next moof from there.
3144
3145 while (true) {
3146 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3147 return ERROR_END_OF_STREAM;
3148 }
3149 chunk_size = ntohl(hdr[0]);
3150 chunk_type = ntohl(hdr[1]);
3151 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3152 mNextMoofOffset = *offset;
3153 break;
3154 }
3155 *offset += chunk_size;
3156 }
3157 }
3158 break;
3159 }
3160
3161 case FOURCC('t', 'f', 'h', 'd'): {
3162 status_t err;
3163 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3164 return err;
3165 }
3166 *offset += chunk_size;
3167 break;
3168 }
3169
3170 case FOURCC('t', 'r', 'u', 'n'): {
3171 status_t err;
3172 if (mLastParsedTrackId == mTrackId) {
3173 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3174 return err;
3175 }
3176 }
3177
3178 *offset += chunk_size;
3179 break;
3180 }
3181
3182 case FOURCC('s', 'a', 'i', 'z'): {
3183 status_t err;
3184 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3185 return err;
3186 }
3187 *offset += chunk_size;
3188 break;
3189 }
3190 case FOURCC('s', 'a', 'i', 'o'): {
3191 status_t err;
3192 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3193 return err;
3194 }
3195 *offset += chunk_size;
3196 break;
3197 }
3198
3199 case FOURCC('m', 'd', 'a', 't'): {
3200 // parse DRM info if present
3201 ALOGV("MPEG4Source::parseChunk mdat");
3202 // if saiz/saoi was previously observed, do something with the sampleinfos
3203 *offset += chunk_size;
3204 break;
3205 }
3206
3207 default: {
3208 *offset += chunk_size;
3209 break;
3210 }
3211 }
3212 return OK;
3213 }
3214
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)3215 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3216 off64_t offset, off64_t /* size */) {
3217 ALOGV("parseSampleAuxiliaryInformationSizes");
3218 // 14496-12 8.7.12
3219 uint8_t version;
3220 if (mDataSource->readAt(
3221 offset, &version, sizeof(version))
3222 < (ssize_t)sizeof(version)) {
3223 return ERROR_IO;
3224 }
3225
3226 if (version != 0) {
3227 return ERROR_UNSUPPORTED;
3228 }
3229 offset++;
3230
3231 uint32_t flags;
3232 if (!mDataSource->getUInt24(offset, &flags)) {
3233 return ERROR_IO;
3234 }
3235 offset += 3;
3236
3237 if (flags & 1) {
3238 uint32_t tmp;
3239 if (!mDataSource->getUInt32(offset, &tmp)) {
3240 return ERROR_MALFORMED;
3241 }
3242 mCurrentAuxInfoType = tmp;
3243 offset += 4;
3244 if (!mDataSource->getUInt32(offset, &tmp)) {
3245 return ERROR_MALFORMED;
3246 }
3247 mCurrentAuxInfoTypeParameter = tmp;
3248 offset += 4;
3249 }
3250
3251 uint8_t defsize;
3252 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3253 return ERROR_MALFORMED;
3254 }
3255 mCurrentDefaultSampleInfoSize = defsize;
3256 offset++;
3257
3258 uint32_t smplcnt;
3259 if (!mDataSource->getUInt32(offset, &smplcnt)) {
3260 return ERROR_MALFORMED;
3261 }
3262 mCurrentSampleInfoCount = smplcnt;
3263 offset += 4;
3264
3265 if (mCurrentDefaultSampleInfoSize != 0) {
3266 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3267 return OK;
3268 }
3269 if (smplcnt > mCurrentSampleInfoAllocSize) {
3270 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3271 mCurrentSampleInfoAllocSize = smplcnt;
3272 }
3273
3274 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3275 return OK;
3276 }
3277
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)3278 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3279 off64_t offset, off64_t /* size */) {
3280 ALOGV("parseSampleAuxiliaryInformationOffsets");
3281 // 14496-12 8.7.13
3282 uint8_t version;
3283 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3284 return ERROR_IO;
3285 }
3286 offset++;
3287
3288 uint32_t flags;
3289 if (!mDataSource->getUInt24(offset, &flags)) {
3290 return ERROR_IO;
3291 }
3292 offset += 3;
3293
3294 uint32_t entrycount;
3295 if (!mDataSource->getUInt32(offset, &entrycount)) {
3296 return ERROR_IO;
3297 }
3298 offset += 4;
3299
3300 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3301 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3302 mCurrentSampleInfoOffsetsAllocSize = entrycount;
3303 }
3304 mCurrentSampleInfoOffsetCount = entrycount;
3305
3306 for (size_t i = 0; i < entrycount; i++) {
3307 if (version == 0) {
3308 uint32_t tmp;
3309 if (!mDataSource->getUInt32(offset, &tmp)) {
3310 return ERROR_IO;
3311 }
3312 mCurrentSampleInfoOffsets[i] = tmp;
3313 offset += 4;
3314 } else {
3315 uint64_t tmp;
3316 if (!mDataSource->getUInt64(offset, &tmp)) {
3317 return ERROR_IO;
3318 }
3319 mCurrentSampleInfoOffsets[i] = tmp;
3320 offset += 8;
3321 }
3322 }
3323
3324 // parse clear/encrypted data
3325
3326 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3327
3328 drmoffset += mCurrentMoofOffset;
3329 int ivlength;
3330 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3331
3332 // read CencSampleAuxiliaryDataFormats
3333 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3334 Sample *smpl = &mCurrentSamples.editItemAt(i);
3335
3336 memset(smpl->iv, 0, 16);
3337 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3338 return ERROR_IO;
3339 }
3340
3341 drmoffset += ivlength;
3342
3343 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3344 if (smplinfosize == 0) {
3345 smplinfosize = mCurrentSampleInfoSizes[i];
3346 }
3347 if (smplinfosize > ivlength) {
3348 uint16_t numsubsamples;
3349 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3350 return ERROR_IO;
3351 }
3352 drmoffset += 2;
3353 for (size_t j = 0; j < numsubsamples; j++) {
3354 uint16_t numclear;
3355 uint32_t numencrypted;
3356 if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3357 return ERROR_IO;
3358 }
3359 drmoffset += 2;
3360 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3361 return ERROR_IO;
3362 }
3363 drmoffset += 4;
3364 smpl->clearsizes.add(numclear);
3365 smpl->encryptedsizes.add(numencrypted);
3366 }
3367 } else {
3368 smpl->clearsizes.add(0);
3369 smpl->encryptedsizes.add(smpl->size);
3370 }
3371 }
3372
3373
3374 return OK;
3375 }
3376
parseTrackFragmentHeader(off64_t offset,off64_t size)3377 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3378
3379 if (size < 8) {
3380 return -EINVAL;
3381 }
3382
3383 uint32_t flags;
3384 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3385 return ERROR_MALFORMED;
3386 }
3387
3388 if (flags & 0xff000000) {
3389 return -EINVAL;
3390 }
3391
3392 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3393 return ERROR_MALFORMED;
3394 }
3395
3396 if (mLastParsedTrackId != mTrackId) {
3397 // this is not the right track, skip it
3398 return OK;
3399 }
3400
3401 mTrackFragmentHeaderInfo.mFlags = flags;
3402 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3403 offset += 8;
3404 size -= 8;
3405
3406 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3407
3408 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3409 if (size < 8) {
3410 return -EINVAL;
3411 }
3412
3413 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3414 return ERROR_MALFORMED;
3415 }
3416 offset += 8;
3417 size -= 8;
3418 }
3419
3420 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3421 if (size < 4) {
3422 return -EINVAL;
3423 }
3424
3425 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3426 return ERROR_MALFORMED;
3427 }
3428 offset += 4;
3429 size -= 4;
3430 }
3431
3432 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3433 if (size < 4) {
3434 return -EINVAL;
3435 }
3436
3437 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3438 return ERROR_MALFORMED;
3439 }
3440 offset += 4;
3441 size -= 4;
3442 }
3443
3444 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3445 if (size < 4) {
3446 return -EINVAL;
3447 }
3448
3449 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3450 return ERROR_MALFORMED;
3451 }
3452 offset += 4;
3453 size -= 4;
3454 }
3455
3456 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3457 if (size < 4) {
3458 return -EINVAL;
3459 }
3460
3461 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3462 return ERROR_MALFORMED;
3463 }
3464 offset += 4;
3465 size -= 4;
3466 }
3467
3468 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3469 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3470 }
3471
3472 mTrackFragmentHeaderInfo.mDataOffset = 0;
3473 return OK;
3474 }
3475
parseTrackFragmentRun(off64_t offset,off64_t size)3476 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3477
3478 ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3479 if (size < 8) {
3480 return -EINVAL;
3481 }
3482
3483 enum {
3484 kDataOffsetPresent = 0x01,
3485 kFirstSampleFlagsPresent = 0x04,
3486 kSampleDurationPresent = 0x100,
3487 kSampleSizePresent = 0x200,
3488 kSampleFlagsPresent = 0x400,
3489 kSampleCompositionTimeOffsetPresent = 0x800,
3490 };
3491
3492 uint32_t flags;
3493 if (!mDataSource->getUInt32(offset, &flags)) {
3494 return ERROR_MALFORMED;
3495 }
3496 ALOGV("fragment run flags: %08x", flags);
3497
3498 if (flags & 0xff000000) {
3499 return -EINVAL;
3500 }
3501
3502 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3503 // These two shall not be used together.
3504 return -EINVAL;
3505 }
3506
3507 uint32_t sampleCount;
3508 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3509 return ERROR_MALFORMED;
3510 }
3511 offset += 8;
3512 size -= 8;
3513
3514 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3515
3516 uint32_t firstSampleFlags = 0;
3517
3518 if (flags & kDataOffsetPresent) {
3519 if (size < 4) {
3520 return -EINVAL;
3521 }
3522
3523 int32_t dataOffsetDelta;
3524 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3525 return ERROR_MALFORMED;
3526 }
3527
3528 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3529
3530 offset += 4;
3531 size -= 4;
3532 }
3533
3534 if (flags & kFirstSampleFlagsPresent) {
3535 if (size < 4) {
3536 return -EINVAL;
3537 }
3538
3539 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3540 return ERROR_MALFORMED;
3541 }
3542 offset += 4;
3543 size -= 4;
3544 }
3545
3546 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3547 sampleCtsOffset = 0;
3548
3549 size_t bytesPerSample = 0;
3550 if (flags & kSampleDurationPresent) {
3551 bytesPerSample += 4;
3552 } else if (mTrackFragmentHeaderInfo.mFlags
3553 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3554 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3555 } else if (mTrex) {
3556 sampleDuration = mTrex->default_sample_duration;
3557 }
3558
3559 if (flags & kSampleSizePresent) {
3560 bytesPerSample += 4;
3561 } else if (mTrackFragmentHeaderInfo.mFlags
3562 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3563 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3564 } else {
3565 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3566 }
3567
3568 if (flags & kSampleFlagsPresent) {
3569 bytesPerSample += 4;
3570 } else if (mTrackFragmentHeaderInfo.mFlags
3571 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3572 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3573 } else {
3574 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3575 }
3576
3577 if (flags & kSampleCompositionTimeOffsetPresent) {
3578 bytesPerSample += 4;
3579 } else {
3580 sampleCtsOffset = 0;
3581 }
3582
3583 if (size < (off64_t)sampleCount * bytesPerSample) {
3584 return -EINVAL;
3585 }
3586
3587 Sample tmp;
3588 for (uint32_t i = 0; i < sampleCount; ++i) {
3589 if (flags & kSampleDurationPresent) {
3590 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3591 return ERROR_MALFORMED;
3592 }
3593 offset += 4;
3594 }
3595
3596 if (flags & kSampleSizePresent) {
3597 if (!mDataSource->getUInt32(offset, &sampleSize)) {
3598 return ERROR_MALFORMED;
3599 }
3600 offset += 4;
3601 }
3602
3603 if (flags & kSampleFlagsPresent) {
3604 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3605 return ERROR_MALFORMED;
3606 }
3607 offset += 4;
3608 }
3609
3610 if (flags & kSampleCompositionTimeOffsetPresent) {
3611 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3612 return ERROR_MALFORMED;
3613 }
3614 offset += 4;
3615 }
3616
3617 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
3618 " flags 0x%08x", i + 1,
3619 dataOffset, sampleSize, sampleDuration,
3620 (flags & kFirstSampleFlagsPresent) && i == 0
3621 ? firstSampleFlags : sampleFlags);
3622 tmp.offset = dataOffset;
3623 tmp.size = sampleSize;
3624 tmp.duration = sampleDuration;
3625 tmp.compositionOffset = sampleCtsOffset;
3626 mCurrentSamples.add(tmp);
3627
3628 dataOffset += sampleSize;
3629 }
3630
3631 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3632
3633 return OK;
3634 }
3635
getFormat()3636 sp<MetaData> MPEG4Source::getFormat() {
3637 Mutex::Autolock autoLock(mLock);
3638
3639 return mFormat;
3640 }
3641
parseNALSize(const uint8_t * data) const3642 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3643 switch (mNALLengthSize) {
3644 case 1:
3645 return *data;
3646 case 2:
3647 return U16_AT(data);
3648 case 3:
3649 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3650 case 4:
3651 return U32_AT(data);
3652 }
3653
3654 // This cannot happen, mNALLengthSize springs to life by adding 1 to
3655 // a 2-bit integer.
3656 CHECK(!"Should not be here.");
3657
3658 return 0;
3659 }
3660
read(MediaBuffer ** out,const ReadOptions * options)3661 status_t MPEG4Source::read(
3662 MediaBuffer **out, const ReadOptions *options) {
3663 Mutex::Autolock autoLock(mLock);
3664
3665 CHECK(mStarted);
3666
3667 if (mFirstMoofOffset > 0) {
3668 return fragmentedRead(out, options);
3669 }
3670
3671 *out = NULL;
3672
3673 int64_t targetSampleTimeUs = -1;
3674
3675 int64_t seekTimeUs;
3676 ReadOptions::SeekMode mode;
3677 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3678 uint32_t findFlags = 0;
3679 switch (mode) {
3680 case ReadOptions::SEEK_PREVIOUS_SYNC:
3681 findFlags = SampleTable::kFlagBefore;
3682 break;
3683 case ReadOptions::SEEK_NEXT_SYNC:
3684 findFlags = SampleTable::kFlagAfter;
3685 break;
3686 case ReadOptions::SEEK_CLOSEST_SYNC:
3687 case ReadOptions::SEEK_CLOSEST:
3688 findFlags = SampleTable::kFlagClosest;
3689 break;
3690 default:
3691 CHECK(!"Should not be here.");
3692 break;
3693 }
3694
3695 uint32_t sampleIndex;
3696 status_t err = mSampleTable->findSampleAtTime(
3697 seekTimeUs, 1000000, mTimescale,
3698 &sampleIndex, findFlags);
3699
3700 if (mode == ReadOptions::SEEK_CLOSEST) {
3701 // We found the closest sample already, now we want the sync
3702 // sample preceding it (or the sample itself of course), even
3703 // if the subsequent sync sample is closer.
3704 findFlags = SampleTable::kFlagBefore;
3705 }
3706
3707 uint32_t syncSampleIndex;
3708 if (err == OK) {
3709 err = mSampleTable->findSyncSampleNear(
3710 sampleIndex, &syncSampleIndex, findFlags);
3711 }
3712
3713 uint32_t sampleTime;
3714 if (err == OK) {
3715 err = mSampleTable->getMetaDataForSample(
3716 sampleIndex, NULL, NULL, &sampleTime);
3717 }
3718
3719 if (err != OK) {
3720 if (err == ERROR_OUT_OF_RANGE) {
3721 // An attempt to seek past the end of the stream would
3722 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3723 // this all the way to the MediaPlayer would cause abnormal
3724 // termination. Legacy behaviour appears to be to behave as if
3725 // we had seeked to the end of stream, ending normally.
3726 err = ERROR_END_OF_STREAM;
3727 }
3728 ALOGV("end of stream");
3729 return err;
3730 }
3731
3732 if (mode == ReadOptions::SEEK_CLOSEST) {
3733 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3734 }
3735
3736 #if 0
3737 uint32_t syncSampleTime;
3738 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3739 syncSampleIndex, NULL, NULL, &syncSampleTime));
3740
3741 ALOGI("seek to time %lld us => sample at time %lld us, "
3742 "sync sample at time %lld us",
3743 seekTimeUs,
3744 sampleTime * 1000000ll / mTimescale,
3745 syncSampleTime * 1000000ll / mTimescale);
3746 #endif
3747
3748 mCurrentSampleIndex = syncSampleIndex;
3749 if (mBuffer != NULL) {
3750 mBuffer->release();
3751 mBuffer = NULL;
3752 }
3753
3754 // fall through
3755 }
3756
3757 off64_t offset;
3758 size_t size;
3759 uint32_t cts, stts;
3760 bool isSyncSample;
3761 bool newBuffer = false;
3762 if (mBuffer == NULL) {
3763 newBuffer = true;
3764
3765 status_t err =
3766 mSampleTable->getMetaDataForSample(
3767 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3768
3769 if (err != OK) {
3770 return err;
3771 }
3772
3773 err = mGroup->acquire_buffer(&mBuffer);
3774
3775 if (err != OK) {
3776 CHECK(mBuffer == NULL);
3777 return err;
3778 }
3779 }
3780
3781 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3782 if (newBuffer) {
3783 ssize_t num_bytes_read =
3784 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3785
3786 if (num_bytes_read < (ssize_t)size) {
3787 mBuffer->release();
3788 mBuffer = NULL;
3789
3790 return ERROR_IO;
3791 }
3792
3793 CHECK(mBuffer != NULL);
3794 mBuffer->set_range(0, size);
3795 mBuffer->meta_data()->clear();
3796 mBuffer->meta_data()->setInt64(
3797 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3798 mBuffer->meta_data()->setInt64(
3799 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3800
3801 if (targetSampleTimeUs >= 0) {
3802 mBuffer->meta_data()->setInt64(
3803 kKeyTargetTime, targetSampleTimeUs);
3804 }
3805
3806 if (isSyncSample) {
3807 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3808 }
3809
3810 ++mCurrentSampleIndex;
3811 }
3812
3813 if (!mIsAVC && !mIsHEVC) {
3814 *out = mBuffer;
3815 mBuffer = NULL;
3816
3817 return OK;
3818 }
3819
3820 // Each NAL unit is split up into its constituent fragments and
3821 // each one of them returned in its own buffer.
3822
3823 CHECK(mBuffer->range_length() >= mNALLengthSize);
3824
3825 const uint8_t *src =
3826 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3827
3828 size_t nal_size = parseNALSize(src);
3829 if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3830 ALOGE("incomplete NAL unit.");
3831
3832 mBuffer->release();
3833 mBuffer = NULL;
3834
3835 return ERROR_MALFORMED;
3836 }
3837
3838 MediaBuffer *clone = mBuffer->clone();
3839 CHECK(clone != NULL);
3840 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3841
3842 CHECK(mBuffer != NULL);
3843 mBuffer->set_range(
3844 mBuffer->range_offset() + mNALLengthSize + nal_size,
3845 mBuffer->range_length() - mNALLengthSize - nal_size);
3846
3847 if (mBuffer->range_length() == 0) {
3848 mBuffer->release();
3849 mBuffer = NULL;
3850 }
3851
3852 *out = clone;
3853
3854 return OK;
3855 } else {
3856 // Whole NAL units are returned but each fragment is prefixed by
3857 // the start code (0x00 00 00 01).
3858 ssize_t num_bytes_read = 0;
3859 int32_t drm = 0;
3860 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3861 if (usesDRM) {
3862 num_bytes_read =
3863 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3864 } else {
3865 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3866 }
3867
3868 if (num_bytes_read < (ssize_t)size) {
3869 mBuffer->release();
3870 mBuffer = NULL;
3871
3872 return ERROR_IO;
3873 }
3874
3875 if (usesDRM) {
3876 CHECK(mBuffer != NULL);
3877 mBuffer->set_range(0, size);
3878
3879 } else {
3880 uint8_t *dstData = (uint8_t *)mBuffer->data();
3881 size_t srcOffset = 0;
3882 size_t dstOffset = 0;
3883
3884 while (srcOffset < size) {
3885 bool isMalFormed = (srcOffset + mNALLengthSize > size);
3886 size_t nalLength = 0;
3887 if (!isMalFormed) {
3888 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3889 srcOffset += mNALLengthSize;
3890 isMalFormed = srcOffset + nalLength > size;
3891 }
3892
3893 if (isMalFormed) {
3894 ALOGE("Video is malformed");
3895 mBuffer->release();
3896 mBuffer = NULL;
3897 return ERROR_MALFORMED;
3898 }
3899
3900 if (nalLength == 0) {
3901 continue;
3902 }
3903
3904 CHECK(dstOffset + 4 <= mBuffer->size());
3905
3906 dstData[dstOffset++] = 0;
3907 dstData[dstOffset++] = 0;
3908 dstData[dstOffset++] = 0;
3909 dstData[dstOffset++] = 1;
3910 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3911 srcOffset += nalLength;
3912 dstOffset += nalLength;
3913 }
3914 CHECK_EQ(srcOffset, size);
3915 CHECK(mBuffer != NULL);
3916 mBuffer->set_range(0, dstOffset);
3917 }
3918
3919 mBuffer->meta_data()->clear();
3920 mBuffer->meta_data()->setInt64(
3921 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3922 mBuffer->meta_data()->setInt64(
3923 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3924
3925 if (targetSampleTimeUs >= 0) {
3926 mBuffer->meta_data()->setInt64(
3927 kKeyTargetTime, targetSampleTimeUs);
3928 }
3929
3930 if (isSyncSample) {
3931 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3932 }
3933
3934 ++mCurrentSampleIndex;
3935
3936 *out = mBuffer;
3937 mBuffer = NULL;
3938
3939 return OK;
3940 }
3941 }
3942
fragmentedRead(MediaBuffer ** out,const ReadOptions * options)3943 status_t MPEG4Source::fragmentedRead(
3944 MediaBuffer **out, const ReadOptions *options) {
3945
3946 ALOGV("MPEG4Source::fragmentedRead");
3947
3948 CHECK(mStarted);
3949
3950 *out = NULL;
3951
3952 int64_t targetSampleTimeUs = -1;
3953
3954 int64_t seekTimeUs;
3955 ReadOptions::SeekMode mode;
3956 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3957
3958 int numSidxEntries = mSegments.size();
3959 if (numSidxEntries != 0) {
3960 int64_t totalTime = 0;
3961 off64_t totalOffset = mFirstMoofOffset;
3962 for (int i = 0; i < numSidxEntries; i++) {
3963 const SidxEntry *se = &mSegments[i];
3964 if (totalTime + se->mDurationUs > seekTimeUs) {
3965 // The requested time is somewhere in this segment
3966 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
3967 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3968 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3969 // requested next sync, or closest sync and it was closer to the end of
3970 // this segment
3971 totalTime += se->mDurationUs;
3972 totalOffset += se->mSize;
3973 }
3974 break;
3975 }
3976 totalTime += se->mDurationUs;
3977 totalOffset += se->mSize;
3978 }
3979 mCurrentMoofOffset = totalOffset;
3980 mCurrentSamples.clear();
3981 mCurrentSampleIndex = 0;
3982 parseChunk(&totalOffset);
3983 mCurrentTime = totalTime * mTimescale / 1000000ll;
3984 } else {
3985 // without sidx boxes, we can only seek to 0
3986 mCurrentMoofOffset = mFirstMoofOffset;
3987 mCurrentSamples.clear();
3988 mCurrentSampleIndex = 0;
3989 off64_t tmp = mCurrentMoofOffset;
3990 parseChunk(&tmp);
3991 mCurrentTime = 0;
3992 }
3993
3994 if (mBuffer != NULL) {
3995 mBuffer->release();
3996 mBuffer = NULL;
3997 }
3998
3999 // fall through
4000 }
4001
4002 off64_t offset = 0;
4003 size_t size = 0;
4004 uint32_t cts = 0;
4005 bool isSyncSample = false;
4006 bool newBuffer = false;
4007 if (mBuffer == NULL) {
4008 newBuffer = true;
4009
4010 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4011 // move to next fragment if there is one
4012 if (mNextMoofOffset <= mCurrentMoofOffset) {
4013 return ERROR_END_OF_STREAM;
4014 }
4015 off64_t nextMoof = mNextMoofOffset;
4016 mCurrentMoofOffset = nextMoof;
4017 mCurrentSamples.clear();
4018 mCurrentSampleIndex = 0;
4019 parseChunk(&nextMoof);
4020 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4021 return ERROR_END_OF_STREAM;
4022 }
4023 }
4024
4025 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4026 offset = smpl->offset;
4027 size = smpl->size;
4028 cts = mCurrentTime + smpl->compositionOffset;
4029 mCurrentTime += smpl->duration;
4030 isSyncSample = (mCurrentSampleIndex == 0); // XXX
4031
4032 status_t err = mGroup->acquire_buffer(&mBuffer);
4033
4034 if (err != OK) {
4035 CHECK(mBuffer == NULL);
4036 ALOGV("acquire_buffer returned %d", err);
4037 return err;
4038 }
4039 }
4040
4041 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4042 const sp<MetaData> bufmeta = mBuffer->meta_data();
4043 bufmeta->clear();
4044 if (smpl->encryptedsizes.size()) {
4045 // store clear/encrypted lengths in metadata
4046 bufmeta->setData(kKeyPlainSizes, 0,
4047 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4048 bufmeta->setData(kKeyEncryptedSizes, 0,
4049 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4050 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4051 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4052 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4053 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4054 }
4055
4056 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4057 if (newBuffer) {
4058 ssize_t num_bytes_read =
4059 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4060
4061 if (num_bytes_read < (ssize_t)size) {
4062 mBuffer->release();
4063 mBuffer = NULL;
4064
4065 ALOGV("i/o error");
4066 return ERROR_IO;
4067 }
4068
4069 CHECK(mBuffer != NULL);
4070 mBuffer->set_range(0, size);
4071 mBuffer->meta_data()->setInt64(
4072 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4073 mBuffer->meta_data()->setInt64(
4074 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4075
4076 if (targetSampleTimeUs >= 0) {
4077 mBuffer->meta_data()->setInt64(
4078 kKeyTargetTime, targetSampleTimeUs);
4079 }
4080
4081 if (isSyncSample) {
4082 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4083 }
4084
4085 ++mCurrentSampleIndex;
4086 }
4087
4088 if (!mIsAVC && !mIsHEVC) {
4089 *out = mBuffer;
4090 mBuffer = NULL;
4091
4092 return OK;
4093 }
4094
4095 // Each NAL unit is split up into its constituent fragments and
4096 // each one of them returned in its own buffer.
4097
4098 CHECK(mBuffer->range_length() >= mNALLengthSize);
4099
4100 const uint8_t *src =
4101 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4102
4103 size_t nal_size = parseNALSize(src);
4104 if (mBuffer->range_length() < mNALLengthSize + nal_size) {
4105 ALOGE("incomplete NAL unit.");
4106
4107 mBuffer->release();
4108 mBuffer = NULL;
4109
4110 return ERROR_MALFORMED;
4111 }
4112
4113 MediaBuffer *clone = mBuffer->clone();
4114 CHECK(clone != NULL);
4115 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4116
4117 CHECK(mBuffer != NULL);
4118 mBuffer->set_range(
4119 mBuffer->range_offset() + mNALLengthSize + nal_size,
4120 mBuffer->range_length() - mNALLengthSize - nal_size);
4121
4122 if (mBuffer->range_length() == 0) {
4123 mBuffer->release();
4124 mBuffer = NULL;
4125 }
4126
4127 *out = clone;
4128
4129 return OK;
4130 } else {
4131 ALOGV("whole NAL");
4132 // Whole NAL units are returned but each fragment is prefixed by
4133 // the start code (0x00 00 00 01).
4134 ssize_t num_bytes_read = 0;
4135 int32_t drm = 0;
4136 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4137 if (usesDRM) {
4138 num_bytes_read =
4139 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4140 } else {
4141 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4142 }
4143
4144 if (num_bytes_read < (ssize_t)size) {
4145 mBuffer->release();
4146 mBuffer = NULL;
4147
4148 ALOGV("i/o error");
4149 return ERROR_IO;
4150 }
4151
4152 if (usesDRM) {
4153 CHECK(mBuffer != NULL);
4154 mBuffer->set_range(0, size);
4155
4156 } else {
4157 uint8_t *dstData = (uint8_t *)mBuffer->data();
4158 size_t srcOffset = 0;
4159 size_t dstOffset = 0;
4160
4161 while (srcOffset < size) {
4162 bool isMalFormed = (srcOffset + mNALLengthSize > size);
4163 size_t nalLength = 0;
4164 if (!isMalFormed) {
4165 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4166 srcOffset += mNALLengthSize;
4167 isMalFormed = srcOffset + nalLength > size;
4168 }
4169
4170 if (isMalFormed) {
4171 ALOGE("Video is malformed");
4172 mBuffer->release();
4173 mBuffer = NULL;
4174 return ERROR_MALFORMED;
4175 }
4176
4177 if (nalLength == 0) {
4178 continue;
4179 }
4180
4181 CHECK(dstOffset + 4 <= mBuffer->size());
4182
4183 dstData[dstOffset++] = 0;
4184 dstData[dstOffset++] = 0;
4185 dstData[dstOffset++] = 0;
4186 dstData[dstOffset++] = 1;
4187 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4188 srcOffset += nalLength;
4189 dstOffset += nalLength;
4190 }
4191 CHECK_EQ(srcOffset, size);
4192 CHECK(mBuffer != NULL);
4193 mBuffer->set_range(0, dstOffset);
4194 }
4195
4196 mBuffer->meta_data()->setInt64(
4197 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4198 mBuffer->meta_data()->setInt64(
4199 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4200
4201 if (targetSampleTimeUs >= 0) {
4202 mBuffer->meta_data()->setInt64(
4203 kKeyTargetTime, targetSampleTimeUs);
4204 }
4205
4206 if (isSyncSample) {
4207 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4208 }
4209
4210 ++mCurrentSampleIndex;
4211
4212 *out = mBuffer;
4213 mBuffer = NULL;
4214
4215 return OK;
4216 }
4217 }
4218
findTrackByMimePrefix(const char * mimePrefix)4219 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4220 const char *mimePrefix) {
4221 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4222 const char *mime;
4223 if (track->meta != NULL
4224 && track->meta->findCString(kKeyMIMEType, &mime)
4225 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4226 return track;
4227 }
4228 }
4229
4230 return NULL;
4231 }
4232
LegacySniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence)4233 static bool LegacySniffMPEG4(
4234 const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4235 uint8_t header[8];
4236
4237 ssize_t n = source->readAt(4, header, sizeof(header));
4238 if (n < (ssize_t)sizeof(header)) {
4239 return false;
4240 }
4241
4242 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4243 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4244 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4245 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4246 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4247 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4248 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4249 *confidence = 0.4;
4250
4251 return true;
4252 }
4253
4254 return false;
4255 }
4256
isCompatibleBrand(uint32_t fourcc)4257 static bool isCompatibleBrand(uint32_t fourcc) {
4258 static const uint32_t kCompatibleBrands[] = {
4259 FOURCC('i', 's', 'o', 'm'),
4260 FOURCC('i', 's', 'o', '2'),
4261 FOURCC('a', 'v', 'c', '1'),
4262 FOURCC('h', 'v', 'c', '1'),
4263 FOURCC('h', 'e', 'v', '1'),
4264 FOURCC('3', 'g', 'p', '4'),
4265 FOURCC('m', 'p', '4', '1'),
4266 FOURCC('m', 'p', '4', '2'),
4267
4268 // Won't promise that the following file types can be played.
4269 // Just give these file types a chance.
4270 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime
4271 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP
4272
4273 FOURCC('3', 'g', '2', 'a'), // 3GPP2
4274 FOURCC('3', 'g', '2', 'b'),
4275 };
4276
4277 for (size_t i = 0;
4278 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4279 ++i) {
4280 if (kCompatibleBrands[i] == fourcc) {
4281 return true;
4282 }
4283 }
4284
4285 return false;
4286 }
4287
4288 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
4289 // compatible brand is present.
4290 // Also try to identify where this file's metadata ends
4291 // (end of the 'moov' atom) and report it to the caller as part of
4292 // the metadata.
BetterSniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4293 static bool BetterSniffMPEG4(
4294 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4295 sp<AMessage> *meta) {
4296 // We scan up to 128 bytes to identify this file as an MP4.
4297 static const off64_t kMaxScanOffset = 128ll;
4298
4299 off64_t offset = 0ll;
4300 bool foundGoodFileType = false;
4301 off64_t moovAtomEndOffset = -1ll;
4302 bool done = false;
4303
4304 while (!done && offset < kMaxScanOffset) {
4305 uint32_t hdr[2];
4306 if (source->readAt(offset, hdr, 8) < 8) {
4307 return false;
4308 }
4309
4310 uint64_t chunkSize = ntohl(hdr[0]);
4311 uint32_t chunkType = ntohl(hdr[1]);
4312 off64_t chunkDataOffset = offset + 8;
4313
4314 if (chunkSize == 1) {
4315 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4316 return false;
4317 }
4318
4319 chunkSize = ntoh64(chunkSize);
4320 chunkDataOffset += 8;
4321
4322 if (chunkSize < 16) {
4323 // The smallest valid chunk is 16 bytes long in this case.
4324 return false;
4325 }
4326 } else if (chunkSize < 8) {
4327 // The smallest valid chunk is 8 bytes long.
4328 return false;
4329 }
4330
4331 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4332
4333 char chunkstring[5];
4334 MakeFourCCString(chunkType, chunkstring);
4335 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset);
4336 switch (chunkType) {
4337 case FOURCC('f', 't', 'y', 'p'):
4338 {
4339 if (chunkDataSize < 8) {
4340 return false;
4341 }
4342
4343 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4344 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4345 if (i == 1) {
4346 // Skip this index, it refers to the minorVersion,
4347 // not a brand.
4348 continue;
4349 }
4350
4351 uint32_t brand;
4352 if (source->readAt(
4353 chunkDataOffset + 4 * i, &brand, 4) < 4) {
4354 return false;
4355 }
4356
4357 brand = ntohl(brand);
4358
4359 if (isCompatibleBrand(brand)) {
4360 foundGoodFileType = true;
4361 break;
4362 }
4363 }
4364
4365 if (!foundGoodFileType) {
4366 return false;
4367 }
4368
4369 break;
4370 }
4371
4372 case FOURCC('m', 'o', 'o', 'v'):
4373 {
4374 moovAtomEndOffset = offset + chunkSize;
4375
4376 done = true;
4377 break;
4378 }
4379
4380 default:
4381 break;
4382 }
4383
4384 offset += chunkSize;
4385 }
4386
4387 if (!foundGoodFileType) {
4388 return false;
4389 }
4390
4391 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4392 *confidence = 0.4f;
4393
4394 if (moovAtomEndOffset >= 0) {
4395 *meta = new AMessage;
4396 (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4397
4398 ALOGV("found metadata size: %lld", moovAtomEndOffset);
4399 }
4400
4401 return true;
4402 }
4403
SniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4404 bool SniffMPEG4(
4405 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4406 sp<AMessage> *meta) {
4407 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4408 return true;
4409 }
4410
4411 if (LegacySniffMPEG4(source, mimeType, confidence)) {
4412 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4413 return true;
4414 }
4415
4416 return false;
4417 }
4418
4419 } // namespace android
4420