1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include <utils/Log.h>
27 
28 #include "include/MPEG4Extractor.h"
29 #include "include/SampleTable.h"
30 #include "include/ESDS.h"
31 
32 #include <media/stagefright/foundation/ABitReader.h>
33 #include <media/stagefright/foundation/ABuffer.h>
34 #include <media/stagefright/foundation/ADebug.h>
35 #include <media/stagefright/foundation/AMessage.h>
36 #include <media/stagefright/MediaBuffer.h>
37 #include <media/stagefright/MediaBufferGroup.h>
38 #include <media/stagefright/MediaDefs.h>
39 #include <media/stagefright/MediaSource.h>
40 #include <media/stagefright/MetaData.h>
41 #include <utils/String8.h>
42 
43 #include <byteswap.h>
44 #include "include/ID3.h"
45 
46 namespace android {
47 
48 class MPEG4Source : public MediaSource {
49 public:
50     // Caller retains ownership of both "dataSource" and "sampleTable".
51     MPEG4Source(const sp<MPEG4Extractor> &owner,
52                 const sp<MetaData> &format,
53                 const sp<DataSource> &dataSource,
54                 int32_t timeScale,
55                 const sp<SampleTable> &sampleTable,
56                 Vector<SidxEntry> &sidx,
57                 const Trex *trex,
58                 off64_t firstMoofOffset);
59 
60     virtual status_t start(MetaData *params = NULL);
61     virtual status_t stop();
62 
63     virtual sp<MetaData> getFormat();
64 
65     virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
66     virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
67 
68 protected:
69     virtual ~MPEG4Source();
70 
71 private:
72     Mutex mLock;
73 
74     // keep the MPEG4Extractor around, since we're referencing its data
75     sp<MPEG4Extractor> mOwner;
76     sp<MetaData> mFormat;
77     sp<DataSource> mDataSource;
78     int32_t mTimescale;
79     sp<SampleTable> mSampleTable;
80     uint32_t mCurrentSampleIndex;
81     uint32_t mCurrentFragmentIndex;
82     Vector<SidxEntry> &mSegments;
83     const Trex *mTrex;
84     off64_t mFirstMoofOffset;
85     off64_t mCurrentMoofOffset;
86     off64_t mNextMoofOffset;
87     uint32_t mCurrentTime;
88     int32_t mLastParsedTrackId;
89     int32_t mTrackId;
90 
91     int32_t mCryptoMode;    // passed in from extractor
92     int32_t mDefaultIVSize; // passed in from extractor
93     uint8_t mCryptoKey[16]; // passed in from extractor
94     uint32_t mCurrentAuxInfoType;
95     uint32_t mCurrentAuxInfoTypeParameter;
96     int32_t mCurrentDefaultSampleInfoSize;
97     uint32_t mCurrentSampleInfoCount;
98     uint32_t mCurrentSampleInfoAllocSize;
99     uint8_t* mCurrentSampleInfoSizes;
100     uint32_t mCurrentSampleInfoOffsetCount;
101     uint32_t mCurrentSampleInfoOffsetsAllocSize;
102     uint64_t* mCurrentSampleInfoOffsets;
103 
104     bool mIsAVC;
105     bool mIsHEVC;
106     size_t mNALLengthSize;
107 
108     bool mStarted;
109 
110     MediaBufferGroup *mGroup;
111 
112     MediaBuffer *mBuffer;
113 
114     bool mWantsNALFragments;
115 
116     uint8_t *mSrcBuffer;
117 
118     size_t parseNALSize(const uint8_t *data) const;
119     status_t parseChunk(off64_t *offset);
120     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
121     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
122     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
123     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
124 
125     struct TrackFragmentHeaderInfo {
126         enum Flags {
127             kBaseDataOffsetPresent         = 0x01,
128             kSampleDescriptionIndexPresent = 0x02,
129             kDefaultSampleDurationPresent  = 0x08,
130             kDefaultSampleSizePresent      = 0x10,
131             kDefaultSampleFlagsPresent     = 0x20,
132             kDurationIsEmpty               = 0x10000,
133         };
134 
135         uint32_t mTrackID;
136         uint32_t mFlags;
137         uint64_t mBaseDataOffset;
138         uint32_t mSampleDescriptionIndex;
139         uint32_t mDefaultSampleDuration;
140         uint32_t mDefaultSampleSize;
141         uint32_t mDefaultSampleFlags;
142 
143         uint64_t mDataOffset;
144     };
145     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
146 
147     struct Sample {
148         off64_t offset;
149         size_t size;
150         uint32_t duration;
151         int32_t compositionOffset;
152         uint8_t iv[16];
153         Vector<size_t> clearsizes;
154         Vector<size_t> encryptedsizes;
155     };
156     Vector<Sample> mCurrentSamples;
157 
158     MPEG4Source(const MPEG4Source &);
159     MPEG4Source &operator=(const MPEG4Source &);
160 };
161 
162 // This custom data source wraps an existing one and satisfies requests
163 // falling entirely within a cached range from the cache while forwarding
164 // all remaining requests to the wrapped datasource.
165 // This is used to cache the full sampletable metadata for a single track,
166 // possibly wrapping multiple times to cover all tracks, i.e.
167 // Each MPEG4DataSource caches the sampletable metadata for a single track.
168 
169 struct MPEG4DataSource : public DataSource {
170     MPEG4DataSource(const sp<DataSource> &source);
171 
172     virtual status_t initCheck() const;
173     virtual ssize_t readAt(off64_t offset, void *data, size_t size);
174     virtual status_t getSize(off64_t *size);
175     virtual uint32_t flags();
176 
177     status_t setCachedRange(off64_t offset, size_t size);
178 
179 protected:
180     virtual ~MPEG4DataSource();
181 
182 private:
183     Mutex mLock;
184 
185     sp<DataSource> mSource;
186     off64_t mCachedOffset;
187     size_t mCachedSize;
188     uint8_t *mCache;
189 
190     void clearCache();
191 
192     MPEG4DataSource(const MPEG4DataSource &);
193     MPEG4DataSource &operator=(const MPEG4DataSource &);
194 };
195 
MPEG4DataSource(const sp<DataSource> & source)196 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
197     : mSource(source),
198       mCachedOffset(0),
199       mCachedSize(0),
200       mCache(NULL) {
201 }
202 
~MPEG4DataSource()203 MPEG4DataSource::~MPEG4DataSource() {
204     clearCache();
205 }
206 
clearCache()207 void MPEG4DataSource::clearCache() {
208     if (mCache) {
209         free(mCache);
210         mCache = NULL;
211     }
212 
213     mCachedOffset = 0;
214     mCachedSize = 0;
215 }
216 
initCheck() const217 status_t MPEG4DataSource::initCheck() const {
218     return mSource->initCheck();
219 }
220 
readAt(off64_t offset,void * data,size_t size)221 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
222     Mutex::Autolock autoLock(mLock);
223 
224     if (offset >= mCachedOffset
225             && offset + size <= mCachedOffset + mCachedSize) {
226         memcpy(data, &mCache[offset - mCachedOffset], size);
227         return size;
228     }
229 
230     return mSource->readAt(offset, data, size);
231 }
232 
getSize(off64_t * size)233 status_t MPEG4DataSource::getSize(off64_t *size) {
234     return mSource->getSize(size);
235 }
236 
flags()237 uint32_t MPEG4DataSource::flags() {
238     return mSource->flags();
239 }
240 
setCachedRange(off64_t offset,size_t size)241 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
242     Mutex::Autolock autoLock(mLock);
243 
244     clearCache();
245 
246     mCache = (uint8_t *)malloc(size);
247 
248     if (mCache == NULL) {
249         return -ENOMEM;
250     }
251 
252     mCachedOffset = offset;
253     mCachedSize = size;
254 
255     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
256 
257     if (err < (ssize_t)size) {
258         clearCache();
259 
260         return ERROR_IO;
261     }
262 
263     return OK;
264 }
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 
hexdump(const void * _data,size_t size)268 static void hexdump(const void *_data, size_t size) {
269     const uint8_t *data = (const uint8_t *)_data;
270     size_t offset = 0;
271     while (offset < size) {
272         printf("0x%04zx  ", offset);
273 
274         size_t n = size - offset;
275         if (n > 16) {
276             n = 16;
277         }
278 
279         for (size_t i = 0; i < 16; ++i) {
280             if (i == 8) {
281                 printf(" ");
282             }
283 
284             if (offset + i < size) {
285                 printf("%02x ", data[offset + i]);
286             } else {
287                 printf("   ");
288             }
289         }
290 
291         printf(" ");
292 
293         for (size_t i = 0; i < n; ++i) {
294             if (isprint(data[offset + i])) {
295                 printf("%c", data[offset + i]);
296             } else {
297                 printf(".");
298             }
299         }
300 
301         printf("\n");
302 
303         offset += 16;
304     }
305 }
306 
FourCC2MIME(uint32_t fourcc)307 static const char *FourCC2MIME(uint32_t fourcc) {
308     switch (fourcc) {
309         case FOURCC('m', 'p', '4', 'a'):
310             return MEDIA_MIMETYPE_AUDIO_AAC;
311 
312         case FOURCC('s', 'a', 'm', 'r'):
313             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
314 
315         case FOURCC('s', 'a', 'w', 'b'):
316             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
317 
318         case FOURCC('m', 'p', '4', 'v'):
319             return MEDIA_MIMETYPE_VIDEO_MPEG4;
320 
321         case FOURCC('s', '2', '6', '3'):
322         case FOURCC('h', '2', '6', '3'):
323         case FOURCC('H', '2', '6', '3'):
324             return MEDIA_MIMETYPE_VIDEO_H263;
325 
326         case FOURCC('a', 'v', 'c', '1'):
327             return MEDIA_MIMETYPE_VIDEO_AVC;
328 
329         case FOURCC('h', 'v', 'c', '1'):
330         case FOURCC('h', 'e', 'v', '1'):
331             return MEDIA_MIMETYPE_VIDEO_HEVC;
332         default:
333             CHECK(!"should not be here.");
334             return NULL;
335     }
336 }
337 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)338 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340         // AMR NB audio is always mono, 8kHz
341         *channels = 1;
342         *rate = 8000;
343         return true;
344     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345         // AMR WB audio is always mono, 16kHz
346         *channels = 1;
347         *rate = 16000;
348         return true;
349     }
350     return false;
351 }
352 
MPEG4Extractor(const sp<DataSource> & source)353 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
354     : mMoofOffset(0),
355       mDataSource(source),
356       mInitCheck(NO_INIT),
357       mHasVideo(false),
358       mHeaderTimescale(0),
359       mFirstTrack(NULL),
360       mLastTrack(NULL),
361       mFileMetaData(new MetaData),
362       mFirstSINF(NULL),
363       mIsDrm(false) {
364 }
365 
~MPEG4Extractor()366 MPEG4Extractor::~MPEG4Extractor() {
367     Track *track = mFirstTrack;
368     while (track) {
369         Track *next = track->next;
370 
371         delete track;
372         track = next;
373     }
374     mFirstTrack = mLastTrack = NULL;
375 
376     SINF *sinf = mFirstSINF;
377     while (sinf) {
378         SINF *next = sinf->next;
379         delete[] sinf->IPMPData;
380         delete sinf;
381         sinf = next;
382     }
383     mFirstSINF = NULL;
384 
385     for (size_t i = 0; i < mPssh.size(); i++) {
386         delete [] mPssh[i].data;
387     }
388 }
389 
flags() const390 uint32_t MPEG4Extractor::flags() const {
391     return CAN_PAUSE |
392             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
393                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
394 }
395 
getMetaData()396 sp<MetaData> MPEG4Extractor::getMetaData() {
397     status_t err;
398     if ((err = readMetaData()) != OK) {
399         return new MetaData;
400     }
401 
402     return mFileMetaData;
403 }
404 
countTracks()405 size_t MPEG4Extractor::countTracks() {
406     status_t err;
407     if ((err = readMetaData()) != OK) {
408         ALOGV("MPEG4Extractor::countTracks: no tracks");
409         return 0;
410     }
411 
412     size_t n = 0;
413     Track *track = mFirstTrack;
414     while (track) {
415         ++n;
416         track = track->next;
417     }
418 
419     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
420     return n;
421 }
422 
getTrackMetaData(size_t index,uint32_t flags)423 sp<MetaData> MPEG4Extractor::getTrackMetaData(
424         size_t index, uint32_t flags) {
425     status_t err;
426     if ((err = readMetaData()) != OK) {
427         return NULL;
428     }
429 
430     Track *track = mFirstTrack;
431     while (index > 0) {
432         if (track == NULL) {
433             return NULL;
434         }
435 
436         track = track->next;
437         --index;
438     }
439 
440     if (track == NULL) {
441         return NULL;
442     }
443 
444     if ((flags & kIncludeExtensiveMetaData)
445             && !track->includes_expensive_metadata) {
446         track->includes_expensive_metadata = true;
447 
448         const char *mime;
449         CHECK(track->meta->findCString(kKeyMIMEType, &mime));
450         if (!strncasecmp("video/", mime, 6)) {
451             if (mMoofOffset > 0) {
452                 int64_t duration;
453                 if (track->meta->findInt64(kKeyDuration, &duration)) {
454                     // nothing fancy, just pick a frame near 1/4th of the duration
455                     track->meta->setInt64(
456                             kKeyThumbnailTime, duration / 4);
457                 }
458             } else {
459                 uint32_t sampleIndex;
460                 uint32_t sampleTime;
461                 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
462                         && track->sampleTable->getMetaDataForSample(
463                             sampleIndex, NULL /* offset */, NULL /* size */,
464                             &sampleTime) == OK) {
465                     track->meta->setInt64(
466                             kKeyThumbnailTime,
467                             ((int64_t)sampleTime * 1000000) / track->timescale);
468                 }
469             }
470         }
471     }
472 
473     return track->meta;
474 }
475 
MakeFourCCString(uint32_t x,char * s)476 static void MakeFourCCString(uint32_t x, char *s) {
477     s[0] = x >> 24;
478     s[1] = (x >> 16) & 0xff;
479     s[2] = (x >> 8) & 0xff;
480     s[3] = x & 0xff;
481     s[4] = '\0';
482 }
483 
readMetaData()484 status_t MPEG4Extractor::readMetaData() {
485     if (mInitCheck != NO_INIT) {
486         return mInitCheck;
487     }
488 
489     off64_t offset = 0;
490     status_t err;
491     while (true) {
492         off64_t orig_offset = offset;
493         err = parseChunk(&offset, 0);
494 
495         if (err != OK && err != UNKNOWN_ERROR) {
496             break;
497         } else if (offset <= orig_offset) {
498             // only continue parsing if the offset was advanced,
499             // otherwise we might end up in an infinite loop
500             ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
501             err = ERROR_MALFORMED;
502             break;
503         } else if (err == OK) {
504             continue;
505         }
506 
507         uint32_t hdr[2];
508         if (mDataSource->readAt(offset, hdr, 8) < 8) {
509             break;
510         }
511         uint32_t chunk_type = ntohl(hdr[1]);
512         if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
513             // store the offset of the first segment
514             mMoofOffset = offset;
515         } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
516             // keep parsing until we get to the data
517             continue;
518         }
519         break;
520     }
521 
522     if (mInitCheck == OK) {
523         if (mHasVideo) {
524             mFileMetaData->setCString(
525                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
526         } else {
527             mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
528         }
529     } else {
530         mInitCheck = err;
531     }
532 
533     CHECK_NE(err, (status_t)NO_INIT);
534 
535     // copy pssh data into file metadata
536     int psshsize = 0;
537     for (size_t i = 0; i < mPssh.size(); i++) {
538         psshsize += 20 + mPssh[i].datalen;
539     }
540     if (psshsize) {
541         char *buf = (char*)malloc(psshsize);
542         char *ptr = buf;
543         for (size_t i = 0; i < mPssh.size(); i++) {
544             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
545             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
546             ptr += (20 + mPssh[i].datalen);
547         }
548         mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
549         free(buf);
550     }
551     return mInitCheck;
552 }
553 
getDrmTrackInfo(size_t trackID,int * len)554 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
555     if (mFirstSINF == NULL) {
556         return NULL;
557     }
558 
559     SINF *sinf = mFirstSINF;
560     while (sinf && (trackID != sinf->trackID)) {
561         sinf = sinf->next;
562     }
563 
564     if (sinf == NULL) {
565         return NULL;
566     }
567 
568     *len = sinf->len;
569     return sinf->IPMPData;
570 }
571 
572 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
readSize(off64_t offset,const sp<DataSource> DataSource,uint8_t * numOfBytes)573 static int32_t readSize(off64_t offset,
574         const sp<DataSource> DataSource, uint8_t *numOfBytes) {
575     uint32_t size = 0;
576     uint8_t data;
577     bool moreData = true;
578     *numOfBytes = 0;
579 
580     while (moreData) {
581         if (DataSource->readAt(offset, &data, 1) < 1) {
582             return -1;
583         }
584         offset ++;
585         moreData = (data >= 128) ? true : false;
586         size = (size << 7) | (data & 0x7f); // Take last 7 bits
587         (*numOfBytes) ++;
588     }
589 
590     return size;
591 }
592 
parseDrmSINF(off64_t *,off64_t data_offset)593 status_t MPEG4Extractor::parseDrmSINF(
594         off64_t * /* offset */, off64_t data_offset) {
595     uint8_t updateIdTag;
596     if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
597         return ERROR_IO;
598     }
599     data_offset ++;
600 
601     if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
602         return ERROR_MALFORMED;
603     }
604 
605     uint8_t numOfBytes;
606     int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
607     if (size < 0) {
608         return ERROR_IO;
609     }
610     int32_t classSize = size;
611     data_offset += numOfBytes;
612 
613     while(size >= 11 ) {
614         uint8_t descriptorTag;
615         if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
616             return ERROR_IO;
617         }
618         data_offset ++;
619 
620         if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
621             return ERROR_MALFORMED;
622         }
623 
624         uint8_t buffer[8];
625         //ObjectDescriptorID and ObjectDescriptor url flag
626         if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
627             return ERROR_IO;
628         }
629         data_offset += 2;
630 
631         if ((buffer[1] >> 5) & 0x0001) { //url flag is set
632             return ERROR_MALFORMED;
633         }
634 
635         if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
636             return ERROR_IO;
637         }
638         data_offset += 8;
639 
640         if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
641                 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
642             return ERROR_MALFORMED;
643         }
644 
645         SINF *sinf = new SINF;
646         sinf->trackID = U16_AT(&buffer[3]);
647         sinf->IPMPDescriptorID = buffer[7];
648         sinf->next = mFirstSINF;
649         mFirstSINF = sinf;
650 
651         size -= (8 + 2 + 1);
652     }
653 
654     if (size != 0) {
655         return ERROR_MALFORMED;
656     }
657 
658     if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
659         return ERROR_IO;
660     }
661     data_offset ++;
662 
663     if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
664         return ERROR_MALFORMED;
665     }
666 
667     size = readSize(data_offset, mDataSource, &numOfBytes);
668     if (size < 0) {
669         return ERROR_IO;
670     }
671     classSize = size;
672     data_offset += numOfBytes;
673 
674     while (size > 0) {
675         uint8_t tag;
676         int32_t dataLen;
677         if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
678             return ERROR_IO;
679         }
680         data_offset ++;
681 
682         if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
683             uint8_t id;
684             dataLen = readSize(data_offset, mDataSource, &numOfBytes);
685             if (dataLen < 0) {
686                 return ERROR_IO;
687             } else if (dataLen < 4) {
688                 return ERROR_MALFORMED;
689             }
690             data_offset += numOfBytes;
691 
692             if (mDataSource->readAt(data_offset, &id, 1) < 1) {
693                 return ERROR_IO;
694             }
695             data_offset ++;
696 
697             SINF *sinf = mFirstSINF;
698             while (sinf && (sinf->IPMPDescriptorID != id)) {
699                 sinf = sinf->next;
700             }
701             if (sinf == NULL) {
702                 return ERROR_MALFORMED;
703             }
704             sinf->len = dataLen - 3;
705             sinf->IPMPData = new (std::nothrow) char[sinf->len];
706             if (sinf->IPMPData == NULL) {
707                 return ERROR_MALFORMED;
708             }
709             data_offset += 2;
710 
711             if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
712                 return ERROR_IO;
713             }
714             data_offset += sinf->len;
715 
716             size -= (dataLen + numOfBytes + 1);
717         }
718     }
719 
720     if (size != 0) {
721         return ERROR_MALFORMED;
722     }
723 
724     return UNKNOWN_ERROR;  // Return a dummy error.
725 }
726 
727 struct PathAdder {
PathAdderandroid::PathAdder728     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
729         : mPath(path) {
730         mPath->push(chunkType);
731     }
732 
~PathAdderandroid::PathAdder733     ~PathAdder() {
734         mPath->pop();
735     }
736 
737 private:
738     Vector<uint32_t> *mPath;
739 
740     PathAdder(const PathAdder &);
741     PathAdder &operator=(const PathAdder &);
742 };
743 
underMetaDataPath(const Vector<uint32_t> & path)744 static bool underMetaDataPath(const Vector<uint32_t> &path) {
745     return path.size() >= 5
746         && path[0] == FOURCC('m', 'o', 'o', 'v')
747         && path[1] == FOURCC('u', 'd', 't', 'a')
748         && path[2] == FOURCC('m', 'e', 't', 'a')
749         && path[3] == FOURCC('i', 'l', 's', 't');
750 }
751 
752 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)753 static void convertTimeToDate(int64_t time_1904, String8 *s) {
754     time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
755 
756     char tmp[32];
757     strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
758 
759     s->setTo(tmp);
760 }
761 
parseChunk(off64_t * offset,int depth)762 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
763     ALOGV("entering parseChunk %lld/%d", *offset, depth);
764     uint32_t hdr[2];
765     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
766         return ERROR_IO;
767     }
768     uint64_t chunk_size = ntohl(hdr[0]);
769     uint32_t chunk_type = ntohl(hdr[1]);
770     off64_t data_offset = *offset + 8;
771 
772     if (chunk_size == 1) {
773         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
774             return ERROR_IO;
775         }
776         chunk_size = ntoh64(chunk_size);
777         data_offset += 8;
778 
779         if (chunk_size < 16) {
780             // The smallest valid chunk is 16 bytes long in this case.
781             return ERROR_MALFORMED;
782         }
783     } else if (chunk_size == 0) {
784         if (depth == 0) {
785             // atom extends to end of file
786             off64_t sourceSize;
787             if (mDataSource->getSize(&sourceSize) == OK) {
788                 chunk_size = (sourceSize - *offset);
789             } else {
790                 // XXX could we just pick a "sufficiently large" value here?
791                 ALOGE("atom size is 0, and data source has no size");
792                 return ERROR_MALFORMED;
793             }
794         } else {
795             // not allowed for non-toplevel atoms, skip it
796             *offset += 4;
797             return OK;
798         }
799     } else if (chunk_size < 8) {
800         // The smallest valid chunk is 8 bytes long.
801         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
802         return ERROR_MALFORMED;
803     }
804 
805     char chunk[5];
806     MakeFourCCString(chunk_type, chunk);
807     ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
808 
809 #if 0
810     static const char kWhitespace[] = "                                        ";
811     const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
812     printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
813 
814     char buffer[256];
815     size_t n = chunk_size;
816     if (n > sizeof(buffer)) {
817         n = sizeof(buffer);
818     }
819     if (mDataSource->readAt(*offset, buffer, n)
820             < (ssize_t)n) {
821         return ERROR_IO;
822     }
823 
824     hexdump(buffer, n);
825 #endif
826 
827     PathAdder autoAdder(&mPath, chunk_type);
828 
829     off64_t chunk_data_size = *offset + chunk_size - data_offset;
830 
831     if (chunk_type != FOURCC('c', 'p', 'r', 't')
832             && chunk_type != FOURCC('c', 'o', 'v', 'r')
833             && mPath.size() == 5 && underMetaDataPath(mPath)) {
834         off64_t stop_offset = *offset + chunk_size;
835         *offset = data_offset;
836         while (*offset < stop_offset) {
837             status_t err = parseChunk(offset, depth + 1);
838             if (err != OK) {
839                 return err;
840             }
841         }
842 
843         if (*offset != stop_offset) {
844             return ERROR_MALFORMED;
845         }
846 
847         return OK;
848     }
849 
850     switch(chunk_type) {
851         case FOURCC('m', 'o', 'o', 'v'):
852         case FOURCC('t', 'r', 'a', 'k'):
853         case FOURCC('m', 'd', 'i', 'a'):
854         case FOURCC('m', 'i', 'n', 'f'):
855         case FOURCC('d', 'i', 'n', 'f'):
856         case FOURCC('s', 't', 'b', 'l'):
857         case FOURCC('m', 'v', 'e', 'x'):
858         case FOURCC('m', 'o', 'o', 'f'):
859         case FOURCC('t', 'r', 'a', 'f'):
860         case FOURCC('m', 'f', 'r', 'a'):
861         case FOURCC('u', 'd', 't', 'a'):
862         case FOURCC('i', 'l', 's', 't'):
863         case FOURCC('s', 'i', 'n', 'f'):
864         case FOURCC('s', 'c', 'h', 'i'):
865         case FOURCC('e', 'd', 't', 's'):
866         {
867             if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
868                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
869 
870                 if (mDataSource->flags()
871                         & (DataSource::kWantsPrefetching
872                             | DataSource::kIsCachingDataSource)) {
873                     sp<MPEG4DataSource> cachedSource =
874                         new MPEG4DataSource(mDataSource);
875 
876                     if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
877                         mDataSource = cachedSource;
878                     }
879                 }
880 
881                 mLastTrack->sampleTable = new SampleTable(mDataSource);
882             }
883 
884             bool isTrack = false;
885             if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
886                 isTrack = true;
887 
888                 Track *track = new Track;
889                 track->next = NULL;
890                 if (mLastTrack) {
891                     mLastTrack->next = track;
892                 } else {
893                     mFirstTrack = track;
894                 }
895                 mLastTrack = track;
896 
897                 track->meta = new MetaData;
898                 track->includes_expensive_metadata = false;
899                 track->skipTrack = false;
900                 track->timescale = 0;
901                 track->meta->setCString(kKeyMIMEType, "application/octet-stream");
902             }
903 
904             off64_t stop_offset = *offset + chunk_size;
905             *offset = data_offset;
906             while (*offset < stop_offset) {
907                 status_t err = parseChunk(offset, depth + 1);
908                 if (err != OK) {
909                     return err;
910                 }
911             }
912 
913             if (*offset != stop_offset) {
914                 return ERROR_MALFORMED;
915             }
916 
917             if (isTrack) {
918                 if (mLastTrack->skipTrack) {
919                     Track *cur = mFirstTrack;
920 
921                     if (cur == mLastTrack) {
922                         delete cur;
923                         mFirstTrack = mLastTrack = NULL;
924                     } else {
925                         while (cur && cur->next != mLastTrack) {
926                             cur = cur->next;
927                         }
928                         cur->next = NULL;
929                         delete mLastTrack;
930                         mLastTrack = cur;
931                     }
932 
933                     return OK;
934                 }
935 
936                 status_t err = verifyTrack(mLastTrack);
937 
938                 if (err != OK) {
939                     return err;
940                 }
941             } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
942                 mInitCheck = OK;
943 
944                 if (!mIsDrm) {
945                     return UNKNOWN_ERROR;  // Return a dummy error.
946                 } else {
947                     return OK;
948                 }
949             }
950             break;
951         }
952 
953         case FOURCC('e', 'l', 's', 't'):
954         {
955             *offset += chunk_size;
956 
957             // See 14496-12 8.6.6
958             uint8_t version;
959             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
960                 return ERROR_IO;
961             }
962 
963             uint32_t entry_count;
964             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
965                 return ERROR_IO;
966             }
967 
968             if (entry_count != 1) {
969                 // we only support a single entry at the moment, for gapless playback
970                 ALOGW("ignoring edit list with %d entries", entry_count);
971             } else if (mHeaderTimescale == 0) {
972                 ALOGW("ignoring edit list because timescale is 0");
973             } else {
974                 off64_t entriesoffset = data_offset + 8;
975                 uint64_t segment_duration;
976                 int64_t media_time;
977 
978                 if (version == 1) {
979                     if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
980                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
981                         return ERROR_IO;
982                     }
983                 } else if (version == 0) {
984                     uint32_t sd;
985                     int32_t mt;
986                     if (!mDataSource->getUInt32(entriesoffset, &sd) ||
987                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
988                         return ERROR_IO;
989                     }
990                     segment_duration = sd;
991                     media_time = mt;
992                 } else {
993                     return ERROR_IO;
994                 }
995 
996                 uint64_t halfscale = mHeaderTimescale / 2;
997                 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
998                 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
999 
1000                 int64_t duration;
1001                 int32_t samplerate;
1002                 if (!mLastTrack) {
1003                     return ERROR_MALFORMED;
1004                 }
1005                 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1006                         mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1007 
1008                     int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1009                     mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1010 
1011                     int64_t paddingus = duration - (segment_duration + media_time);
1012                     if (paddingus < 0) {
1013                         // track duration from media header (which is what kKeyDuration is) might
1014                         // be slightly shorter than the segment duration, which would make the
1015                         // padding negative. Clamp to zero.
1016                         paddingus = 0;
1017                     }
1018                     int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1019                     mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1020                 }
1021             }
1022             break;
1023         }
1024 
1025         case FOURCC('f', 'r', 'm', 'a'):
1026         {
1027             *offset += chunk_size;
1028 
1029             uint32_t original_fourcc;
1030             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1031                 return ERROR_IO;
1032             }
1033             original_fourcc = ntohl(original_fourcc);
1034             ALOGV("read original format: %d", original_fourcc);
1035             mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1036             uint32_t num_channels = 0;
1037             uint32_t sample_rate = 0;
1038             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1039                 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1040                 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1041             }
1042             break;
1043         }
1044 
1045         case FOURCC('t', 'e', 'n', 'c'):
1046         {
1047             *offset += chunk_size;
1048 
1049             if (chunk_size < 32) {
1050                 return ERROR_MALFORMED;
1051             }
1052 
1053             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1054             // default IV size, 16 bytes default KeyID
1055             // (ISO 23001-7)
1056             char buf[4];
1057             memset(buf, 0, 4);
1058             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1059                 return ERROR_IO;
1060             }
1061             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1062             if (defaultAlgorithmId > 1) {
1063                 // only 0 (clear) and 1 (AES-128) are valid
1064                 return ERROR_MALFORMED;
1065             }
1066 
1067             memset(buf, 0, 4);
1068             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1069                 return ERROR_IO;
1070             }
1071             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1072 
1073             if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1074                     (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1075                 // only unencrypted data must have 0 IV size
1076                 return ERROR_MALFORMED;
1077             } else if (defaultIVSize != 0 &&
1078                     defaultIVSize != 8 &&
1079                     defaultIVSize != 16) {
1080                 // only supported sizes are 0, 8 and 16
1081                 return ERROR_MALFORMED;
1082             }
1083 
1084             uint8_t defaultKeyId[16];
1085 
1086             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1087                 return ERROR_IO;
1088             }
1089 
1090             mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1091             mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1092             mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1093             break;
1094         }
1095 
1096         case FOURCC('t', 'k', 'h', 'd'):
1097         {
1098             *offset += chunk_size;
1099 
1100             status_t err;
1101             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1102                 return err;
1103             }
1104 
1105             break;
1106         }
1107 
1108         case FOURCC('p', 's', 's', 'h'):
1109         {
1110             *offset += chunk_size;
1111 
1112             PsshInfo pssh;
1113 
1114             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1115                 return ERROR_IO;
1116             }
1117 
1118             uint32_t psshdatalen = 0;
1119             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1120                 return ERROR_IO;
1121             }
1122             pssh.datalen = ntohl(psshdatalen);
1123             ALOGV("pssh data size: %d", pssh.datalen);
1124             if (pssh.datalen + 20 > chunk_size) {
1125                 // pssh data length exceeds size of containing box
1126                 return ERROR_MALFORMED;
1127             }
1128 
1129             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1130             if (pssh.data == NULL) {
1131                 return ERROR_MALFORMED;
1132             }
1133             ALOGV("allocated pssh @ %p", pssh.data);
1134             ssize_t requested = (ssize_t) pssh.datalen;
1135             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1136                 return ERROR_IO;
1137             }
1138             mPssh.push_back(pssh);
1139 
1140             break;
1141         }
1142 
1143         case FOURCC('m', 'd', 'h', 'd'):
1144         {
1145             *offset += chunk_size;
1146 
1147             if (chunk_data_size < 4 || mLastTrack == NULL) {
1148                 return ERROR_MALFORMED;
1149             }
1150 
1151             uint8_t version;
1152             if (mDataSource->readAt(
1153                         data_offset, &version, sizeof(version))
1154                     < (ssize_t)sizeof(version)) {
1155                 return ERROR_IO;
1156             }
1157 
1158             off64_t timescale_offset;
1159 
1160             if (version == 1) {
1161                 timescale_offset = data_offset + 4 + 16;
1162             } else if (version == 0) {
1163                 timescale_offset = data_offset + 4 + 8;
1164             } else {
1165                 return ERROR_IO;
1166             }
1167 
1168             uint32_t timescale;
1169             if (mDataSource->readAt(
1170                         timescale_offset, &timescale, sizeof(timescale))
1171                     < (ssize_t)sizeof(timescale)) {
1172                 return ERROR_IO;
1173             }
1174 
1175             mLastTrack->timescale = ntohl(timescale);
1176 
1177             // 14496-12 says all ones means indeterminate, but some files seem to use
1178             // 0 instead. We treat both the same.
1179             int64_t duration = 0;
1180             if (version == 1) {
1181                 if (mDataSource->readAt(
1182                             timescale_offset + 4, &duration, sizeof(duration))
1183                         < (ssize_t)sizeof(duration)) {
1184                     return ERROR_IO;
1185                 }
1186                 if (duration != -1) {
1187                     duration = ntoh64(duration);
1188                 }
1189             } else {
1190                 uint32_t duration32;
1191                 if (mDataSource->readAt(
1192                             timescale_offset + 4, &duration32, sizeof(duration32))
1193                         < (ssize_t)sizeof(duration32)) {
1194                     return ERROR_IO;
1195                 }
1196                 if (duration32 != 0xffffffff) {
1197                     duration = ntohl(duration32);
1198                 }
1199             }
1200             if (duration != 0) {
1201                 mLastTrack->meta->setInt64(
1202                         kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1203             }
1204 
1205             uint8_t lang[2];
1206             off64_t lang_offset;
1207             if (version == 1) {
1208                 lang_offset = timescale_offset + 4 + 8;
1209             } else if (version == 0) {
1210                 lang_offset = timescale_offset + 4 + 4;
1211             } else {
1212                 return ERROR_IO;
1213             }
1214 
1215             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1216                     < (ssize_t)sizeof(lang)) {
1217                 return ERROR_IO;
1218             }
1219 
1220             // To get the ISO-639-2/T three character language code
1221             // 1 bit pad followed by 3 5-bits characters. Each character
1222             // is packed as the difference between its ASCII value and 0x60.
1223             char lang_code[4];
1224             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1225             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1226             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1227             lang_code[3] = '\0';
1228 
1229             mLastTrack->meta->setCString(
1230                     kKeyMediaLanguage, lang_code);
1231 
1232             break;
1233         }
1234 
1235         case FOURCC('s', 't', 's', 'd'):
1236         {
1237             if (chunk_data_size < 8) {
1238                 return ERROR_MALFORMED;
1239             }
1240 
1241             uint8_t buffer[8];
1242             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1243                 return ERROR_MALFORMED;
1244             }
1245 
1246             if (mDataSource->readAt(
1247                         data_offset, buffer, 8) < 8) {
1248                 return ERROR_IO;
1249             }
1250 
1251             if (U32_AT(buffer) != 0) {
1252                 // Should be version 0, flags 0.
1253                 return ERROR_MALFORMED;
1254             }
1255 
1256             uint32_t entry_count = U32_AT(&buffer[4]);
1257 
1258             if (entry_count > 1) {
1259                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1260                 // multiple text display formats. These formats will be used to
1261                 // display the timed text.
1262                 // For encrypted files, there may also be more than one entry.
1263                 const char *mime;
1264                 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1265                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1266                         strcasecmp(mime, "application/octet-stream")) {
1267                     // For now we only support a single type of media per track.
1268                     mLastTrack->skipTrack = true;
1269                     *offset += chunk_size;
1270                     break;
1271                 }
1272             }
1273             off64_t stop_offset = *offset + chunk_size;
1274             *offset = data_offset + 8;
1275             for (uint32_t i = 0; i < entry_count; ++i) {
1276                 status_t err = parseChunk(offset, depth + 1);
1277                 if (err != OK) {
1278                     return err;
1279                 }
1280             }
1281 
1282             if (*offset != stop_offset) {
1283                 return ERROR_MALFORMED;
1284             }
1285             break;
1286         }
1287 
1288         case FOURCC('m', 'p', '4', 'a'):
1289         case FOURCC('e', 'n', 'c', 'a'):
1290         case FOURCC('s', 'a', 'm', 'r'):
1291         case FOURCC('s', 'a', 'w', 'b'):
1292         {
1293             uint8_t buffer[8 + 20];
1294             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1295                 // Basic AudioSampleEntry size.
1296                 return ERROR_MALFORMED;
1297             }
1298 
1299             if (mDataSource->readAt(
1300                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1301                 return ERROR_IO;
1302             }
1303 
1304             uint16_t data_ref_index = U16_AT(&buffer[6]);
1305             uint32_t num_channels = U16_AT(&buffer[16]);
1306 
1307             uint16_t sample_size = U16_AT(&buffer[18]);
1308             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1309 
1310             if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1311                 // if the chunk type is enca, we'll get the type from the sinf/frma box later
1312                 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1313                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1314             }
1315             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1316                    chunk, num_channels, sample_size, sample_rate);
1317             mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1318             mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1319 
1320             off64_t stop_offset = *offset + chunk_size;
1321             *offset = data_offset + sizeof(buffer);
1322             while (*offset < stop_offset) {
1323                 status_t err = parseChunk(offset, depth + 1);
1324                 if (err != OK) {
1325                     return err;
1326                 }
1327             }
1328 
1329             if (*offset != stop_offset) {
1330                 return ERROR_MALFORMED;
1331             }
1332             break;
1333         }
1334 
1335         case FOURCC('m', 'p', '4', 'v'):
1336         case FOURCC('e', 'n', 'c', 'v'):
1337         case FOURCC('s', '2', '6', '3'):
1338         case FOURCC('H', '2', '6', '3'):
1339         case FOURCC('h', '2', '6', '3'):
1340         case FOURCC('a', 'v', 'c', '1'):
1341         case FOURCC('h', 'v', 'c', '1'):
1342         case FOURCC('h', 'e', 'v', '1'):
1343         {
1344             mHasVideo = true;
1345 
1346             uint8_t buffer[78];
1347             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1348                 // Basic VideoSampleEntry size.
1349                 return ERROR_MALFORMED;
1350             }
1351 
1352             if (mDataSource->readAt(
1353                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1354                 return ERROR_IO;
1355             }
1356 
1357             uint16_t data_ref_index = U16_AT(&buffer[6]);
1358             uint16_t width = U16_AT(&buffer[6 + 18]);
1359             uint16_t height = U16_AT(&buffer[6 + 20]);
1360 
1361             // The video sample is not standard-compliant if it has invalid dimension.
1362             // Use some default width and height value, and
1363             // let the decoder figure out the actual width and height (and thus
1364             // be prepared for INFO_FOMRAT_CHANGED event).
1365             if (width == 0)  width  = 352;
1366             if (height == 0) height = 288;
1367 
1368             // printf("*** coding='%s' width=%d height=%d\n",
1369             //        chunk, width, height);
1370 
1371             if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1372                 // if the chunk type is encv, we'll get the type from the sinf/frma box later
1373                 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1374             }
1375             mLastTrack->meta->setInt32(kKeyWidth, width);
1376             mLastTrack->meta->setInt32(kKeyHeight, height);
1377 
1378             off64_t stop_offset = *offset + chunk_size;
1379             *offset = data_offset + sizeof(buffer);
1380             while (*offset < stop_offset) {
1381                 status_t err = parseChunk(offset, depth + 1);
1382                 if (err != OK) {
1383                     return err;
1384                 }
1385             }
1386 
1387             if (*offset != stop_offset) {
1388                 return ERROR_MALFORMED;
1389             }
1390             break;
1391         }
1392 
1393         case FOURCC('s', 't', 'c', 'o'):
1394         case FOURCC('c', 'o', '6', '4'):
1395         {
1396             status_t err =
1397                 mLastTrack->sampleTable->setChunkOffsetParams(
1398                         chunk_type, data_offset, chunk_data_size);
1399 
1400             *offset += chunk_size;
1401 
1402             if (err != OK) {
1403                 return err;
1404             }
1405 
1406             break;
1407         }
1408 
1409         case FOURCC('s', 't', 's', 'c'):
1410         {
1411             status_t err =
1412                 mLastTrack->sampleTable->setSampleToChunkParams(
1413                         data_offset, chunk_data_size);
1414 
1415             *offset += chunk_size;
1416 
1417             if (err != OK) {
1418                 return err;
1419             }
1420 
1421             break;
1422         }
1423 
1424         case FOURCC('s', 't', 's', 'z'):
1425         case FOURCC('s', 't', 'z', '2'):
1426         {
1427             status_t err =
1428                 mLastTrack->sampleTable->setSampleSizeParams(
1429                         chunk_type, data_offset, chunk_data_size);
1430 
1431             *offset += chunk_size;
1432 
1433             if (err != OK) {
1434                 return err;
1435             }
1436 
1437             size_t max_size;
1438             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1439 
1440             if (err != OK) {
1441                 return err;
1442             }
1443 
1444             if (max_size != 0) {
1445                 // Assume that a given buffer only contains at most 10 chunks,
1446                 // each chunk originally prefixed with a 2 byte length will
1447                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1448                 // and thus will grow by 2 bytes per chunk.
1449                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1450             } else {
1451                 // No size was specified. Pick a conservatively large size.
1452                 int32_t width, height;
1453                 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1454                     !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1455                     ALOGE("No width or height, assuming worst case 1080p");
1456                     width = 1920;
1457                     height = 1080;
1458                 }
1459 
1460                 const char *mime;
1461                 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1462                 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1463                     // AVC requires compression ratio of at least 2, and uses
1464                     // macroblocks
1465                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1466                 } else {
1467                     // For all other formats there is no minimum compression
1468                     // ratio. Use compression ratio of 1.
1469                     max_size = width * height * 3 / 2;
1470                 }
1471                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1472             }
1473 
1474             // NOTE: setting another piece of metadata invalidates any pointers (such as the
1475             // mimetype) previously obtained, so don't cache them.
1476             const char *mime;
1477             CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1478             // Calculate average frame rate.
1479             if (!strncasecmp("video/", mime, 6)) {
1480                 size_t nSamples = mLastTrack->sampleTable->countSamples();
1481                 int64_t durationUs;
1482                 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1483                     if (durationUs > 0) {
1484                         int32_t frameRate = (nSamples * 1000000LL +
1485                                     (durationUs >> 1)) / durationUs;
1486                         mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1487                     }
1488                 }
1489             }
1490 
1491             break;
1492         }
1493 
1494         case FOURCC('s', 't', 't', 's'):
1495         {
1496             *offset += chunk_size;
1497 
1498             status_t err =
1499                 mLastTrack->sampleTable->setTimeToSampleParams(
1500                         data_offset, chunk_data_size);
1501 
1502             if (err != OK) {
1503                 return err;
1504             }
1505 
1506             break;
1507         }
1508 
1509         case FOURCC('c', 't', 't', 's'):
1510         {
1511             *offset += chunk_size;
1512 
1513             status_t err =
1514                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1515                         data_offset, chunk_data_size);
1516 
1517             if (err != OK) {
1518                 return err;
1519             }
1520 
1521             break;
1522         }
1523 
1524         case FOURCC('s', 't', 's', 's'):
1525         {
1526             *offset += chunk_size;
1527 
1528             status_t err =
1529                 mLastTrack->sampleTable->setSyncSampleParams(
1530                         data_offset, chunk_data_size);
1531 
1532             if (err != OK) {
1533                 return err;
1534             }
1535 
1536             break;
1537         }
1538 
1539         // @xyz
1540         case FOURCC('\xA9', 'x', 'y', 'z'):
1541         {
1542             *offset += chunk_size;
1543 
1544             // Best case the total data length inside "@xyz" box
1545             // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1546             // where "\x00\x04" is the text string length with value = 4,
1547             // "\0x15\xc7" is the language code = en, and "0+0" is a
1548             // location (string) value with longitude = 0 and latitude = 0.
1549             if (chunk_data_size < 8) {
1550                 return ERROR_MALFORMED;
1551             }
1552 
1553             // Worst case the location string length would be 18,
1554             // for instance +90.0000-180.0000, without the trailing "/" and
1555             // the string length + language code.
1556             char buffer[18];
1557 
1558             // Substracting 5 from the data size is because the text string length +
1559             // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1560             off64_t location_length = chunk_data_size - 5;
1561             if (location_length >= (off64_t) sizeof(buffer)) {
1562                 return ERROR_MALFORMED;
1563             }
1564 
1565             if (mDataSource->readAt(
1566                         data_offset + 4, buffer, location_length) < location_length) {
1567                 return ERROR_IO;
1568             }
1569 
1570             buffer[location_length] = '\0';
1571             mFileMetaData->setCString(kKeyLocation, buffer);
1572             break;
1573         }
1574 
1575         case FOURCC('e', 's', 'd', 's'):
1576         {
1577             *offset += chunk_size;
1578 
1579             if (chunk_data_size < 4) {
1580                 return ERROR_MALFORMED;
1581             }
1582 
1583             uint8_t buffer[256];
1584             if (chunk_data_size > (off64_t)sizeof(buffer)) {
1585                 return ERROR_BUFFER_TOO_SMALL;
1586             }
1587 
1588             if (mDataSource->readAt(
1589                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
1590                 return ERROR_IO;
1591             }
1592 
1593             if (U32_AT(buffer) != 0) {
1594                 // Should be version 0, flags 0.
1595                 return ERROR_MALFORMED;
1596             }
1597 
1598             mLastTrack->meta->setData(
1599                     kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1600 
1601             if (mPath.size() >= 2
1602                     && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1603                 // Information from the ESDS must be relied on for proper
1604                 // setup of sample rate and channel count for MPEG4 Audio.
1605                 // The generic header appears to only contain generic
1606                 // information...
1607 
1608                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1609                         &buffer[4], chunk_data_size - 4);
1610 
1611                 if (err != OK) {
1612                     return err;
1613                 }
1614             }
1615 
1616             break;
1617         }
1618 
1619         case FOURCC('a', 'v', 'c', 'C'):
1620         {
1621             *offset += chunk_size;
1622 
1623             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1624 
1625             if (mDataSource->readAt(
1626                         data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1627                 return ERROR_IO;
1628             }
1629 
1630             mLastTrack->meta->setData(
1631                     kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1632 
1633             break;
1634         }
1635         case FOURCC('h', 'v', 'c', 'C'):
1636         {
1637             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1638 
1639             if (mDataSource->readAt(
1640                         data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1641                 return ERROR_IO;
1642             }
1643 
1644             mLastTrack->meta->setData(
1645                     kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1646 
1647             *offset += chunk_size;
1648             break;
1649         }
1650 
1651         case FOURCC('d', '2', '6', '3'):
1652         {
1653             *offset += chunk_size;
1654             /*
1655              * d263 contains a fixed 7 bytes part:
1656              *   vendor - 4 bytes
1657              *   version - 1 byte
1658              *   level - 1 byte
1659              *   profile - 1 byte
1660              * optionally, "d263" box itself may contain a 16-byte
1661              * bit rate box (bitr)
1662              *   average bit rate - 4 bytes
1663              *   max bit rate - 4 bytes
1664              */
1665             char buffer[23];
1666             if (chunk_data_size != 7 &&
1667                 chunk_data_size != 23) {
1668                 ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1669                 return ERROR_MALFORMED;
1670             }
1671 
1672             if (mDataSource->readAt(
1673                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
1674                 return ERROR_IO;
1675             }
1676 
1677             mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1678 
1679             break;
1680         }
1681 
1682         case FOURCC('m', 'e', 't', 'a'):
1683         {
1684             uint8_t buffer[4];
1685             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1686                 *offset += chunk_size;
1687                 return ERROR_MALFORMED;
1688             }
1689 
1690             if (mDataSource->readAt(
1691                         data_offset, buffer, 4) < 4) {
1692                 *offset += chunk_size;
1693                 return ERROR_IO;
1694             }
1695 
1696             if (U32_AT(buffer) != 0) {
1697                 // Should be version 0, flags 0.
1698 
1699                 // If it's not, let's assume this is one of those
1700                 // apparently malformed chunks that don't have flags
1701                 // and completely different semantics than what's
1702                 // in the MPEG4 specs and skip it.
1703                 *offset += chunk_size;
1704                 return OK;
1705             }
1706 
1707             off64_t stop_offset = *offset + chunk_size;
1708             *offset = data_offset + sizeof(buffer);
1709             while (*offset < stop_offset) {
1710                 status_t err = parseChunk(offset, depth + 1);
1711                 if (err != OK) {
1712                     return err;
1713                 }
1714             }
1715 
1716             if (*offset != stop_offset) {
1717                 return ERROR_MALFORMED;
1718             }
1719             break;
1720         }
1721 
1722         case FOURCC('m', 'e', 'a', 'n'):
1723         case FOURCC('n', 'a', 'm', 'e'):
1724         case FOURCC('d', 'a', 't', 'a'):
1725         {
1726             *offset += chunk_size;
1727 
1728             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1729                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1730 
1731                 if (err != OK) {
1732                     return err;
1733                 }
1734             }
1735 
1736             break;
1737         }
1738 
1739         case FOURCC('m', 'v', 'h', 'd'):
1740         {
1741             *offset += chunk_size;
1742 
1743             if (chunk_data_size < 32) {
1744                 return ERROR_MALFORMED;
1745             }
1746 
1747             uint8_t header[32];
1748             if (mDataSource->readAt(
1749                         data_offset, header, sizeof(header))
1750                     < (ssize_t)sizeof(header)) {
1751                 return ERROR_IO;
1752             }
1753 
1754             uint64_t creationTime;
1755             uint64_t duration = 0;
1756             if (header[0] == 1) {
1757                 creationTime = U64_AT(&header[4]);
1758                 mHeaderTimescale = U32_AT(&header[20]);
1759                 duration = U64_AT(&header[24]);
1760                 if (duration == 0xffffffffffffffff) {
1761                     duration = 0;
1762                 }
1763             } else if (header[0] != 0) {
1764                 return ERROR_MALFORMED;
1765             } else {
1766                 creationTime = U32_AT(&header[4]);
1767                 mHeaderTimescale = U32_AT(&header[12]);
1768                 uint32_t d32 = U32_AT(&header[16]);
1769                 if (d32 == 0xffffffff) {
1770                     d32 = 0;
1771                 }
1772                 duration = d32;
1773             }
1774             if (duration != 0) {
1775                 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1776             }
1777 
1778             String8 s;
1779             convertTimeToDate(creationTime, &s);
1780 
1781             mFileMetaData->setCString(kKeyDate, s.string());
1782 
1783             break;
1784         }
1785 
1786         case FOURCC('m', 'e', 'h', 'd'):
1787         {
1788             *offset += chunk_size;
1789 
1790             if (chunk_data_size < 8) {
1791                 return ERROR_MALFORMED;
1792             }
1793 
1794             uint8_t flags[4];
1795             if (mDataSource->readAt(
1796                         data_offset, flags, sizeof(flags))
1797                     < (ssize_t)sizeof(flags)) {
1798                 return ERROR_IO;
1799             }
1800 
1801             uint64_t duration = 0;
1802             if (flags[0] == 1) {
1803                 // 64 bit
1804                 if (chunk_data_size < 12) {
1805                     return ERROR_MALFORMED;
1806                 }
1807                 mDataSource->getUInt64(data_offset + 4, &duration);
1808                 if (duration == 0xffffffffffffffff) {
1809                     duration = 0;
1810                 }
1811             } else if (flags[0] == 0) {
1812                 // 32 bit
1813                 uint32_t d32;
1814                 mDataSource->getUInt32(data_offset + 4, &d32);
1815                 if (d32 == 0xffffffff) {
1816                     d32 = 0;
1817                 }
1818                 duration = d32;
1819             } else {
1820                 return ERROR_MALFORMED;
1821             }
1822 
1823             if (duration != 0) {
1824                 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1825             }
1826 
1827             break;
1828         }
1829 
1830         case FOURCC('m', 'd', 'a', 't'):
1831         {
1832             ALOGV("mdat chunk, drm: %d", mIsDrm);
1833             if (!mIsDrm) {
1834                 *offset += chunk_size;
1835                 break;
1836             }
1837 
1838             if (chunk_size < 8) {
1839                 return ERROR_MALFORMED;
1840             }
1841 
1842             return parseDrmSINF(offset, data_offset);
1843         }
1844 
1845         case FOURCC('h', 'd', 'l', 'r'):
1846         {
1847             *offset += chunk_size;
1848 
1849             uint32_t buffer;
1850             if (mDataSource->readAt(
1851                         data_offset + 8, &buffer, 4) < 4) {
1852                 return ERROR_IO;
1853             }
1854 
1855             uint32_t type = ntohl(buffer);
1856             // For the 3GPP file format, the handler-type within the 'hdlr' box
1857             // shall be 'text'. We also want to support 'sbtl' handler type
1858             // for a practical reason as various MPEG4 containers use it.
1859             if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1860                 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1861             }
1862 
1863             break;
1864         }
1865 
1866         case FOURCC('t', 'r', 'e', 'x'):
1867         {
1868             *offset += chunk_size;
1869 
1870             if (chunk_data_size < 24) {
1871                 return ERROR_IO;
1872             }
1873             uint32_t duration;
1874             Trex trex;
1875             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
1876                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
1877                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
1878                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
1879                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
1880                 return ERROR_IO;
1881             }
1882             mTrex.add(trex);
1883             break;
1884         }
1885 
1886         case FOURCC('t', 'x', '3', 'g'):
1887         {
1888             uint32_t type;
1889             const void *data;
1890             size_t size = 0;
1891             if (!mLastTrack->meta->findData(
1892                     kKeyTextFormatData, &type, &data, &size)) {
1893                 size = 0;
1894             }
1895 
1896             uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
1897             if (buffer == NULL) {
1898                 return ERROR_MALFORMED;
1899             }
1900 
1901             if (size > 0) {
1902                 memcpy(buffer, data, size);
1903             }
1904 
1905             if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1906                     < chunk_size) {
1907                 delete[] buffer;
1908                 buffer = NULL;
1909 
1910                 // advance read pointer so we don't end up reading this again
1911                 *offset += chunk_size;
1912                 return ERROR_IO;
1913             }
1914 
1915             mLastTrack->meta->setData(
1916                     kKeyTextFormatData, 0, buffer, size + chunk_size);
1917 
1918             delete[] buffer;
1919 
1920             *offset += chunk_size;
1921             break;
1922         }
1923 
1924         case FOURCC('c', 'o', 'v', 'r'):
1925         {
1926             *offset += chunk_size;
1927 
1928             if (mFileMetaData != NULL) {
1929                 ALOGV("chunk_data_size = %lld and data_offset = %lld",
1930                         chunk_data_size, data_offset);
1931                 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1932                 if (mDataSource->readAt(
1933                     data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1934                     return ERROR_IO;
1935                 }
1936                 const int kSkipBytesOfDataBox = 16;
1937                 mFileMetaData->setData(
1938                     kKeyAlbumArt, MetaData::TYPE_NONE,
1939                     buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1940             }
1941 
1942             break;
1943         }
1944 
1945         case FOURCC('t', 'i', 't', 'l'):
1946         case FOURCC('p', 'e', 'r', 'f'):
1947         case FOURCC('a', 'u', 't', 'h'):
1948         case FOURCC('g', 'n', 'r', 'e'):
1949         case FOURCC('a', 'l', 'b', 'm'):
1950         case FOURCC('y', 'r', 'r', 'c'):
1951         {
1952             *offset += chunk_size;
1953 
1954             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1955 
1956             if (err != OK) {
1957                 return err;
1958             }
1959 
1960             break;
1961         }
1962 
1963         case FOURCC('I', 'D', '3', '2'):
1964         {
1965             *offset += chunk_size;
1966 
1967             if (chunk_data_size < 6) {
1968                 return ERROR_MALFORMED;
1969             }
1970 
1971             parseID3v2MetaData(data_offset + 6);
1972 
1973             break;
1974         }
1975 
1976         case FOURCC('-', '-', '-', '-'):
1977         {
1978             mLastCommentMean.clear();
1979             mLastCommentName.clear();
1980             mLastCommentData.clear();
1981             *offset += chunk_size;
1982             break;
1983         }
1984 
1985         case FOURCC('s', 'i', 'd', 'x'):
1986         {
1987             parseSegmentIndex(data_offset, chunk_data_size);
1988             *offset += chunk_size;
1989             return UNKNOWN_ERROR; // stop parsing after sidx
1990         }
1991 
1992         default:
1993         {
1994             *offset += chunk_size;
1995             break;
1996         }
1997     }
1998 
1999     return OK;
2000 }
2001 
parseSegmentIndex(off64_t offset,size_t size)2002 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2003   ALOGV("MPEG4Extractor::parseSegmentIndex");
2004 
2005     if (size < 12) {
2006       return -EINVAL;
2007     }
2008 
2009     uint32_t flags;
2010     if (!mDataSource->getUInt32(offset, &flags)) {
2011         return ERROR_MALFORMED;
2012     }
2013 
2014     uint32_t version = flags >> 24;
2015     flags &= 0xffffff;
2016 
2017     ALOGV("sidx version %d", version);
2018 
2019     uint32_t referenceId;
2020     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2021         return ERROR_MALFORMED;
2022     }
2023 
2024     uint32_t timeScale;
2025     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2026         return ERROR_MALFORMED;
2027     }
2028     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2029 
2030     uint64_t earliestPresentationTime;
2031     uint64_t firstOffset;
2032 
2033     offset += 12;
2034     size -= 12;
2035 
2036     if (version == 0) {
2037         if (size < 8) {
2038             return -EINVAL;
2039         }
2040         uint32_t tmp;
2041         if (!mDataSource->getUInt32(offset, &tmp)) {
2042             return ERROR_MALFORMED;
2043         }
2044         earliestPresentationTime = tmp;
2045         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2046             return ERROR_MALFORMED;
2047         }
2048         firstOffset = tmp;
2049         offset += 8;
2050         size -= 8;
2051     } else {
2052         if (size < 16) {
2053             return -EINVAL;
2054         }
2055         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2056             return ERROR_MALFORMED;
2057         }
2058         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2059             return ERROR_MALFORMED;
2060         }
2061         offset += 16;
2062         size -= 16;
2063     }
2064     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2065 
2066     if (size < 4) {
2067         return -EINVAL;
2068     }
2069 
2070     uint16_t referenceCount;
2071     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2072         return ERROR_MALFORMED;
2073     }
2074     offset += 4;
2075     size -= 4;
2076     ALOGV("refcount: %d", referenceCount);
2077 
2078     if (size < referenceCount * 12) {
2079         return -EINVAL;
2080     }
2081 
2082     uint64_t total_duration = 0;
2083     for (unsigned int i = 0; i < referenceCount; i++) {
2084         uint32_t d1, d2, d3;
2085 
2086         if (!mDataSource->getUInt32(offset, &d1) ||     // size
2087             !mDataSource->getUInt32(offset + 4, &d2) || // duration
2088             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2089             return ERROR_MALFORMED;
2090         }
2091 
2092         if (d1 & 0x80000000) {
2093             ALOGW("sub-sidx boxes not supported yet");
2094         }
2095         bool sap = d3 & 0x80000000;
2096         uint32_t saptype = (d3 >> 28) & 7;
2097         if (!sap || (saptype != 1 && saptype != 2)) {
2098             // type 1 and 2 are sync samples
2099             ALOGW("not a stream access point, or unsupported type: %08x", d3);
2100         }
2101         total_duration += d2;
2102         offset += 12;
2103         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2104         SidxEntry se;
2105         se.mSize = d1 & 0x7fffffff;
2106         se.mDurationUs = 1000000LL * d2 / timeScale;
2107         mSidxEntries.add(se);
2108     }
2109 
2110     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2111 
2112     int64_t metaDuration;
2113     if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2114         mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2115     }
2116     return OK;
2117 }
2118 
2119 
2120 
parseTrackHeader(off64_t data_offset,off64_t data_size)2121 status_t MPEG4Extractor::parseTrackHeader(
2122         off64_t data_offset, off64_t data_size) {
2123     if (data_size < 4) {
2124         return ERROR_MALFORMED;
2125     }
2126 
2127     uint8_t version;
2128     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2129         return ERROR_IO;
2130     }
2131 
2132     size_t dynSize = (version == 1) ? 36 : 24;
2133 
2134     uint8_t buffer[36 + 60];
2135 
2136     if (data_size != (off64_t)dynSize + 60) {
2137         return ERROR_MALFORMED;
2138     }
2139 
2140     if (mDataSource->readAt(
2141                 data_offset, buffer, data_size) < (ssize_t)data_size) {
2142         return ERROR_IO;
2143     }
2144 
2145     uint64_t ctime, mtime, duration;
2146     int32_t id;
2147 
2148     if (version == 1) {
2149         ctime = U64_AT(&buffer[4]);
2150         mtime = U64_AT(&buffer[12]);
2151         id = U32_AT(&buffer[20]);
2152         duration = U64_AT(&buffer[28]);
2153     } else if (version == 0) {
2154         ctime = U32_AT(&buffer[4]);
2155         mtime = U32_AT(&buffer[8]);
2156         id = U32_AT(&buffer[12]);
2157         duration = U32_AT(&buffer[20]);
2158     } else {
2159         return ERROR_UNSUPPORTED;
2160     }
2161 
2162     mLastTrack->meta->setInt32(kKeyTrackID, id);
2163 
2164     size_t matrixOffset = dynSize + 16;
2165     int32_t a00 = U32_AT(&buffer[matrixOffset]);
2166     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2167     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2168     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2169     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2170     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2171 
2172 #if 0
2173     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2174          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2175     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2176          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2177 #endif
2178 
2179     uint32_t rotationDegrees;
2180 
2181     static const int32_t kFixedOne = 0x10000;
2182     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2183         // Identity, no rotation
2184         rotationDegrees = 0;
2185     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2186         rotationDegrees = 90;
2187     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2188         rotationDegrees = 270;
2189     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2190         rotationDegrees = 180;
2191     } else {
2192         ALOGW("We only support 0,90,180,270 degree rotation matrices");
2193         rotationDegrees = 0;
2194     }
2195 
2196     if (rotationDegrees != 0) {
2197         mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2198     }
2199 
2200     // Handle presentation display size, which could be different
2201     // from the image size indicated by kKeyWidth and kKeyHeight.
2202     uint32_t width = U32_AT(&buffer[dynSize + 52]);
2203     uint32_t height = U32_AT(&buffer[dynSize + 56]);
2204     mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2205     mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2206 
2207     return OK;
2208 }
2209 
parseITunesMetaData(off64_t offset,size_t size)2210 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2211     if (size < 4) {
2212         return ERROR_MALFORMED;
2213     }
2214 
2215     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2216     if (buffer == NULL) {
2217         return ERROR_MALFORMED;
2218     }
2219     if (mDataSource->readAt(
2220                 offset, buffer, size) != (ssize_t)size) {
2221         delete[] buffer;
2222         buffer = NULL;
2223 
2224         return ERROR_IO;
2225     }
2226 
2227     uint32_t flags = U32_AT(buffer);
2228 
2229     uint32_t metadataKey = 0;
2230     char chunk[5];
2231     MakeFourCCString(mPath[4], chunk);
2232     ALOGV("meta: %s @ %lld", chunk, offset);
2233     switch (mPath[4]) {
2234         case FOURCC(0xa9, 'a', 'l', 'b'):
2235         {
2236             metadataKey = kKeyAlbum;
2237             break;
2238         }
2239         case FOURCC(0xa9, 'A', 'R', 'T'):
2240         {
2241             metadataKey = kKeyArtist;
2242             break;
2243         }
2244         case FOURCC('a', 'A', 'R', 'T'):
2245         {
2246             metadataKey = kKeyAlbumArtist;
2247             break;
2248         }
2249         case FOURCC(0xa9, 'd', 'a', 'y'):
2250         {
2251             metadataKey = kKeyYear;
2252             break;
2253         }
2254         case FOURCC(0xa9, 'n', 'a', 'm'):
2255         {
2256             metadataKey = kKeyTitle;
2257             break;
2258         }
2259         case FOURCC(0xa9, 'w', 'r', 't'):
2260         {
2261             metadataKey = kKeyWriter;
2262             break;
2263         }
2264         case FOURCC('c', 'o', 'v', 'r'):
2265         {
2266             metadataKey = kKeyAlbumArt;
2267             break;
2268         }
2269         case FOURCC('g', 'n', 'r', 'e'):
2270         {
2271             metadataKey = kKeyGenre;
2272             break;
2273         }
2274         case FOURCC(0xa9, 'g', 'e', 'n'):
2275         {
2276             metadataKey = kKeyGenre;
2277             break;
2278         }
2279         case FOURCC('c', 'p', 'i', 'l'):
2280         {
2281             if (size == 9 && flags == 21) {
2282                 char tmp[16];
2283                 sprintf(tmp, "%d",
2284                         (int)buffer[size - 1]);
2285 
2286                 mFileMetaData->setCString(kKeyCompilation, tmp);
2287             }
2288             break;
2289         }
2290         case FOURCC('t', 'r', 'k', 'n'):
2291         {
2292             if (size == 16 && flags == 0) {
2293                 char tmp[16];
2294                 uint16_t* pTrack = (uint16_t*)&buffer[10];
2295                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2296                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2297 
2298                 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2299             }
2300             break;
2301         }
2302         case FOURCC('d', 'i', 's', 'k'):
2303         {
2304             if ((size == 14 || size == 16) && flags == 0) {
2305                 char tmp[16];
2306                 uint16_t* pDisc = (uint16_t*)&buffer[10];
2307                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2308                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2309 
2310                 mFileMetaData->setCString(kKeyDiscNumber, tmp);
2311             }
2312             break;
2313         }
2314         case FOURCC('-', '-', '-', '-'):
2315         {
2316             buffer[size] = '\0';
2317             switch (mPath[5]) {
2318                 case FOURCC('m', 'e', 'a', 'n'):
2319                     mLastCommentMean.setTo((const char *)buffer + 4);
2320                     break;
2321                 case FOURCC('n', 'a', 'm', 'e'):
2322                     mLastCommentName.setTo((const char *)buffer + 4);
2323                     break;
2324                 case FOURCC('d', 'a', 't', 'a'):
2325                     mLastCommentData.setTo((const char *)buffer + 8);
2326                     break;
2327             }
2328 
2329             // Once we have a set of mean/name/data info, go ahead and process
2330             // it to see if its something we are interested in.  Whether or not
2331             // were are interested in the specific tag, make sure to clear out
2332             // the set so we can be ready to process another tuple should one
2333             // show up later in the file.
2334             if ((mLastCommentMean.length() != 0) &&
2335                 (mLastCommentName.length() != 0) &&
2336                 (mLastCommentData.length() != 0)) {
2337 
2338                 if (mLastCommentMean == "com.apple.iTunes"
2339                         && mLastCommentName == "iTunSMPB") {
2340                     int32_t delay, padding;
2341                     if (sscanf(mLastCommentData,
2342                                " %*x %x %x %*x", &delay, &padding) == 2) {
2343                         mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2344                         mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2345                     }
2346                 }
2347 
2348                 mLastCommentMean.clear();
2349                 mLastCommentName.clear();
2350                 mLastCommentData.clear();
2351             }
2352             break;
2353         }
2354 
2355         default:
2356             break;
2357     }
2358 
2359     if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2360         if (metadataKey == kKeyAlbumArt) {
2361             mFileMetaData->setData(
2362                     kKeyAlbumArt, MetaData::TYPE_NONE,
2363                     buffer + 8, size - 8);
2364         } else if (metadataKey == kKeyGenre) {
2365             if (flags == 0) {
2366                 // uint8_t genre code, iTunes genre codes are
2367                 // the standard id3 codes, except they start
2368                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
2369                 // We use standard id3 numbering, so subtract 1.
2370                 int genrecode = (int)buffer[size - 1];
2371                 genrecode--;
2372                 if (genrecode < 0) {
2373                     genrecode = 255; // reserved for 'unknown genre'
2374                 }
2375                 char genre[10];
2376                 sprintf(genre, "%d", genrecode);
2377 
2378                 mFileMetaData->setCString(metadataKey, genre);
2379             } else if (flags == 1) {
2380                 // custom genre string
2381                 buffer[size] = '\0';
2382 
2383                 mFileMetaData->setCString(
2384                         metadataKey, (const char *)buffer + 8);
2385             }
2386         } else {
2387             buffer[size] = '\0';
2388 
2389             mFileMetaData->setCString(
2390                     metadataKey, (const char *)buffer + 8);
2391         }
2392     }
2393 
2394     delete[] buffer;
2395     buffer = NULL;
2396 
2397     return OK;
2398 }
2399 
parse3GPPMetaData(off64_t offset,size_t size,int depth)2400 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2401     if (size < 4) {
2402         return ERROR_MALFORMED;
2403     }
2404 
2405     uint8_t *buffer = new (std::nothrow) uint8_t[size];
2406     if (buffer == NULL) {
2407         return ERROR_MALFORMED;
2408     }
2409     if (mDataSource->readAt(
2410                 offset, buffer, size) != (ssize_t)size) {
2411         delete[] buffer;
2412         buffer = NULL;
2413 
2414         return ERROR_IO;
2415     }
2416 
2417     uint32_t metadataKey = 0;
2418     switch (mPath[depth]) {
2419         case FOURCC('t', 'i', 't', 'l'):
2420         {
2421             metadataKey = kKeyTitle;
2422             break;
2423         }
2424         case FOURCC('p', 'e', 'r', 'f'):
2425         {
2426             metadataKey = kKeyArtist;
2427             break;
2428         }
2429         case FOURCC('a', 'u', 't', 'h'):
2430         {
2431             metadataKey = kKeyWriter;
2432             break;
2433         }
2434         case FOURCC('g', 'n', 'r', 'e'):
2435         {
2436             metadataKey = kKeyGenre;
2437             break;
2438         }
2439         case FOURCC('a', 'l', 'b', 'm'):
2440         {
2441             if (buffer[size - 1] != '\0') {
2442               char tmp[4];
2443               sprintf(tmp, "%u", buffer[size - 1]);
2444 
2445               mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2446             }
2447 
2448             metadataKey = kKeyAlbum;
2449             break;
2450         }
2451         case FOURCC('y', 'r', 'r', 'c'):
2452         {
2453             char tmp[5];
2454             uint16_t year = U16_AT(&buffer[4]);
2455 
2456             if (year < 10000) {
2457                 sprintf(tmp, "%u", year);
2458 
2459                 mFileMetaData->setCString(kKeyYear, tmp);
2460             }
2461             break;
2462         }
2463 
2464         default:
2465             break;
2466     }
2467 
2468     if (metadataKey > 0) {
2469         bool isUTF8 = true; // Common case
2470         char16_t *framedata = NULL;
2471         int len16 = 0; // Number of UTF-16 characters
2472 
2473         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2474         if (size - 6 >= 4) {
2475             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2476             framedata = (char16_t *)(buffer + 6);
2477             if (0xfffe == *framedata) {
2478                 // endianness marker (BOM) doesn't match host endianness
2479                 for (int i = 0; i < len16; i++) {
2480                     framedata[i] = bswap_16(framedata[i]);
2481                 }
2482                 // BOM is now swapped to 0xfeff, we will execute next block too
2483             }
2484 
2485             if (0xfeff == *framedata) {
2486                 // Remove the BOM
2487                 framedata++;
2488                 len16--;
2489                 isUTF8 = false;
2490             }
2491             // else normal non-zero-length UTF-8 string
2492             // we can't handle UTF-16 without BOM as there is no other
2493             // indication of encoding.
2494         }
2495 
2496         if (isUTF8) {
2497             mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2498         } else {
2499             // Convert from UTF-16 string to UTF-8 string.
2500             String8 tmpUTF8str(framedata, len16);
2501             mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2502         }
2503     }
2504 
2505     delete[] buffer;
2506     buffer = NULL;
2507 
2508     return OK;
2509 }
2510 
parseID3v2MetaData(off64_t offset)2511 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2512     ID3 id3(mDataSource, true /* ignorev1 */, offset);
2513 
2514     if (id3.isValid()) {
2515         struct Map {
2516             int key;
2517             const char *tag1;
2518             const char *tag2;
2519         };
2520         static const Map kMap[] = {
2521             { kKeyAlbum, "TALB", "TAL" },
2522             { kKeyArtist, "TPE1", "TP1" },
2523             { kKeyAlbumArtist, "TPE2", "TP2" },
2524             { kKeyComposer, "TCOM", "TCM" },
2525             { kKeyGenre, "TCON", "TCO" },
2526             { kKeyTitle, "TIT2", "TT2" },
2527             { kKeyYear, "TYE", "TYER" },
2528             { kKeyAuthor, "TXT", "TEXT" },
2529             { kKeyCDTrackNumber, "TRK", "TRCK" },
2530             { kKeyDiscNumber, "TPA", "TPOS" },
2531             { kKeyCompilation, "TCP", "TCMP" },
2532         };
2533         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2534 
2535         for (size_t i = 0; i < kNumMapEntries; ++i) {
2536             if (!mFileMetaData->hasData(kMap[i].key)) {
2537                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2538                 if (it->done()) {
2539                     delete it;
2540                     it = new ID3::Iterator(id3, kMap[i].tag2);
2541                 }
2542 
2543                 if (it->done()) {
2544                     delete it;
2545                     continue;
2546                 }
2547 
2548                 String8 s;
2549                 it->getString(&s);
2550                 delete it;
2551 
2552                 mFileMetaData->setCString(kMap[i].key, s);
2553             }
2554         }
2555 
2556         size_t dataSize;
2557         String8 mime;
2558         const void *data = id3.getAlbumArt(&dataSize, &mime);
2559 
2560         if (data) {
2561             mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2562             mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2563         }
2564     }
2565 }
2566 
getTrack(size_t index)2567 sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2568     status_t err;
2569     if ((err = readMetaData()) != OK) {
2570         return NULL;
2571     }
2572 
2573     Track *track = mFirstTrack;
2574     while (index > 0) {
2575         if (track == NULL) {
2576             return NULL;
2577         }
2578 
2579         track = track->next;
2580         --index;
2581     }
2582 
2583     if (track == NULL) {
2584         return NULL;
2585     }
2586 
2587 
2588     Trex *trex = NULL;
2589     int32_t trackId;
2590     if (track->meta->findInt32(kKeyTrackID, &trackId)) {
2591         for (size_t i = 0; i < mTrex.size(); i++) {
2592             Trex *t = &mTrex.editItemAt(index);
2593             if (t->track_ID == (uint32_t) trackId) {
2594                 trex = t;
2595                 break;
2596             }
2597         }
2598     }
2599 
2600     ALOGV("getTrack called, pssh: %zu", mPssh.size());
2601 
2602     return new MPEG4Source(this,
2603             track->meta, mDataSource, track->timescale, track->sampleTable,
2604             mSidxEntries, trex, mMoofOffset);
2605 }
2606 
2607 // static
verifyTrack(Track * track)2608 status_t MPEG4Extractor::verifyTrack(Track *track) {
2609     const char *mime;
2610     CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2611 
2612     uint32_t type;
2613     const void *data;
2614     size_t size;
2615     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2616         if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2617                 || type != kTypeAVCC) {
2618             return ERROR_MALFORMED;
2619         }
2620     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2621         if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2622                     || type != kTypeHVCC) {
2623             return ERROR_MALFORMED;
2624         }
2625     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2626             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2627         if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2628                 || type != kTypeESDS) {
2629             return ERROR_MALFORMED;
2630         }
2631     }
2632 
2633     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2634         // Make sure we have all the metadata we need.
2635         ALOGE("stbl atom missing/invalid.");
2636         return ERROR_MALFORMED;
2637     }
2638 
2639     return OK;
2640 }
2641 
2642 typedef enum {
2643     //AOT_NONE             = -1,
2644     //AOT_NULL_OBJECT      = 0,
2645     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2646     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2647     //AOT_AAC_SSR          = 3,
2648     //AOT_AAC_LTP          = 4,
2649     AOT_SBR              = 5,
2650     //AOT_AAC_SCAL         = 6,
2651     //AOT_TWIN_VQ          = 7,
2652     //AOT_CELP             = 8,
2653     //AOT_HVXC             = 9,
2654     //AOT_RSVD_10          = 10, /**< (reserved)                                */
2655     //AOT_RSVD_11          = 11, /**< (reserved)                                */
2656     //AOT_TTSI             = 12, /**< TTSI Object                               */
2657     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2658     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2659     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2660     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2661     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2662     //AOT_RSVD_18          = 18, /**< (reserved)                                */
2663     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2664     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2665     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2666     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2667     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2668     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2669     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2670     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2671     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2672     //AOT_RSVD_28          = 28, /**< might become SSC                          */
2673     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2674     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2675 
2676     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2677 
2678     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2679     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2680     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2681     //AOT_RSVD_35          = 35, /**< might become DST                          */
2682     //AOT_RSVD_36          = 36, /**< might become ALS                          */
2683     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2684     //AOT_SLS              = 38, /**< SLS                                       */
2685     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2686 
2687     //AOT_USAC             = 42, /**< USAC                                      */
2688     //AOT_SAOC             = 43, /**< SAOC                                      */
2689     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2690 
2691     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2692 } AUDIO_OBJECT_TYPE;
2693 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)2694 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2695         const void *esds_data, size_t esds_size) {
2696     ESDS esds(esds_data, esds_size);
2697 
2698     uint8_t objectTypeIndication;
2699     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2700         return ERROR_MALFORMED;
2701     }
2702 
2703     if (objectTypeIndication == 0xe1) {
2704         // This isn't MPEG4 audio at all, it's QCELP 14k...
2705         mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2706         return OK;
2707     }
2708 
2709     if (objectTypeIndication  == 0x6b) {
2710         // The media subtype is MP3 audio
2711         // Our software MP3 audio decoder may not be able to handle
2712         // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2713         ALOGE("MP3 track in MP4/3GPP file is not supported");
2714         return ERROR_UNSUPPORTED;
2715     }
2716 
2717     const uint8_t *csd;
2718     size_t csd_size;
2719     if (esds.getCodecSpecificInfo(
2720                 (const void **)&csd, &csd_size) != OK) {
2721         return ERROR_MALFORMED;
2722     }
2723 
2724 #if 0
2725     printf("ESD of size %d\n", csd_size);
2726     hexdump(csd, csd_size);
2727 #endif
2728 
2729     if (csd_size == 0) {
2730         // There's no further information, i.e. no codec specific data
2731         // Let's assume that the information provided in the mpeg4 headers
2732         // is accurate and hope for the best.
2733 
2734         return OK;
2735     }
2736 
2737     if (csd_size < 2) {
2738         return ERROR_MALFORMED;
2739     }
2740 
2741     static uint32_t kSamplingRate[] = {
2742         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2743         16000, 12000, 11025, 8000, 7350
2744     };
2745 
2746     ABitReader br(csd, csd_size);
2747     uint32_t objectType = br.getBits(5);
2748 
2749     if (objectType == 31) {  // AAC-ELD => additional 6 bits
2750         objectType = 32 + br.getBits(6);
2751     }
2752 
2753     //keep AOT type
2754     mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2755 
2756     uint32_t freqIndex = br.getBits(4);
2757 
2758     int32_t sampleRate = 0;
2759     int32_t numChannels = 0;
2760     if (freqIndex == 15) {
2761         if (csd_size < 5) {
2762             return ERROR_MALFORMED;
2763         }
2764         sampleRate = br.getBits(24);
2765         numChannels = br.getBits(4);
2766     } else {
2767         numChannels = br.getBits(4);
2768 
2769         if (freqIndex == 13 || freqIndex == 14) {
2770             return ERROR_MALFORMED;
2771         }
2772 
2773         sampleRate = kSamplingRate[freqIndex];
2774     }
2775 
2776     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2777         uint32_t extFreqIndex = br.getBits(4);
2778         int32_t extSampleRate;
2779         if (extFreqIndex == 15) {
2780             if (csd_size < 8) {
2781                 return ERROR_MALFORMED;
2782             }
2783             extSampleRate = br.getBits(24);
2784         } else {
2785             if (extFreqIndex == 13 || extFreqIndex == 14) {
2786                 return ERROR_MALFORMED;
2787             }
2788             extSampleRate = kSamplingRate[extFreqIndex];
2789         }
2790         //TODO: save the extension sampling rate value in meta data =>
2791         //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2792     }
2793 
2794     switch (numChannels) {
2795         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2796         case 0:
2797         case 1:// FC
2798         case 2:// FL FR
2799         case 3:// FC, FL FR
2800         case 4:// FC, FL FR, RC
2801         case 5:// FC, FL FR, SL SR
2802         case 6:// FC, FL FR, SL SR, LFE
2803             //numChannels already contains the right value
2804             break;
2805         case 11:// FC, FL FR, SL SR, RC, LFE
2806             numChannels = 7;
2807             break;
2808         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2809         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2810         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2811             numChannels = 8;
2812             break;
2813         default:
2814             return ERROR_UNSUPPORTED;
2815     }
2816 
2817     {
2818         if (objectType == AOT_SBR || objectType == AOT_PS) {
2819             objectType = br.getBits(5);
2820 
2821             if (objectType == AOT_ESCAPE) {
2822                 objectType = 32 + br.getBits(6);
2823             }
2824         }
2825         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2826                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2827                 objectType == AOT_ER_BSAC) {
2828             const int32_t frameLengthFlag = br.getBits(1);
2829 
2830             const int32_t dependsOnCoreCoder = br.getBits(1);
2831 
2832             if (dependsOnCoreCoder ) {
2833                 const int32_t coreCoderDelay = br.getBits(14);
2834             }
2835 
2836             int32_t extensionFlag = -1;
2837             if (br.numBitsLeft() > 0) {
2838                 extensionFlag = br.getBits(1);
2839             } else {
2840                 switch (objectType) {
2841                 // 14496-3 4.5.1.1 extensionFlag
2842                 case AOT_AAC_LC:
2843                     extensionFlag = 0;
2844                     break;
2845                 case AOT_ER_AAC_LC:
2846                 case AOT_ER_AAC_SCAL:
2847                 case AOT_ER_BSAC:
2848                 case AOT_ER_AAC_LD:
2849                     extensionFlag = 1;
2850                     break;
2851                 default:
2852                     TRESPASS();
2853                     break;
2854                 }
2855                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
2856                         extensionFlag, objectType);
2857             }
2858 
2859             if (numChannels == 0) {
2860                 int32_t channelsEffectiveNum = 0;
2861                 int32_t channelsNum = 0;
2862                 const int32_t ElementInstanceTag = br.getBits(4);
2863                 const int32_t Profile = br.getBits(2);
2864                 const int32_t SamplingFrequencyIndex = br.getBits(4);
2865                 const int32_t NumFrontChannelElements = br.getBits(4);
2866                 const int32_t NumSideChannelElements = br.getBits(4);
2867                 const int32_t NumBackChannelElements = br.getBits(4);
2868                 const int32_t NumLfeChannelElements = br.getBits(2);
2869                 const int32_t NumAssocDataElements = br.getBits(3);
2870                 const int32_t NumValidCcElements = br.getBits(4);
2871 
2872                 const int32_t MonoMixdownPresent = br.getBits(1);
2873                 if (MonoMixdownPresent != 0) {
2874                     const int32_t MonoMixdownElementNumber = br.getBits(4);
2875                 }
2876 
2877                 const int32_t StereoMixdownPresent = br.getBits(1);
2878                 if (StereoMixdownPresent != 0) {
2879                     const int32_t StereoMixdownElementNumber = br.getBits(4);
2880                 }
2881 
2882                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2883                 if (MatrixMixdownIndexPresent != 0) {
2884                     const int32_t MatrixMixdownIndex = br.getBits(2);
2885                     const int32_t PseudoSurroundEnable = br.getBits(1);
2886                 }
2887 
2888                 int i;
2889                 for (i=0; i < NumFrontChannelElements; i++) {
2890                     const int32_t FrontElementIsCpe = br.getBits(1);
2891                     const int32_t FrontElementTagSelect = br.getBits(4);
2892                     channelsNum += FrontElementIsCpe ? 2 : 1;
2893                 }
2894 
2895                 for (i=0; i < NumSideChannelElements; i++) {
2896                     const int32_t SideElementIsCpe = br.getBits(1);
2897                     const int32_t SideElementTagSelect = br.getBits(4);
2898                     channelsNum += SideElementIsCpe ? 2 : 1;
2899                 }
2900 
2901                 for (i=0; i < NumBackChannelElements; i++) {
2902                     const int32_t BackElementIsCpe = br.getBits(1);
2903                     const int32_t BackElementTagSelect = br.getBits(4);
2904                     channelsNum += BackElementIsCpe ? 2 : 1;
2905                 }
2906                 channelsEffectiveNum = channelsNum;
2907 
2908                 for (i=0; i < NumLfeChannelElements; i++) {
2909                     const int32_t LfeElementTagSelect = br.getBits(4);
2910                     channelsNum += 1;
2911                 }
2912                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2913                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2914                 numChannels = channelsNum;
2915             }
2916         }
2917     }
2918 
2919     if (numChannels == 0) {
2920         return ERROR_UNSUPPORTED;
2921     }
2922 
2923     int32_t prevSampleRate;
2924     CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2925 
2926     if (prevSampleRate != sampleRate) {
2927         ALOGV("mpeg4 audio sample rate different from previous setting. "
2928              "was: %d, now: %d", prevSampleRate, sampleRate);
2929     }
2930 
2931     mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2932 
2933     int32_t prevChannelCount;
2934     CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2935 
2936     if (prevChannelCount != numChannels) {
2937         ALOGV("mpeg4 audio channel count different from previous setting. "
2938              "was: %d, now: %d", prevChannelCount, numChannels);
2939     }
2940 
2941     mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2942 
2943     return OK;
2944 }
2945 
2946 ////////////////////////////////////////////////////////////////////////////////
2947 
MPEG4Source(const sp<MPEG4Extractor> & owner,const sp<MetaData> & format,const sp<DataSource> & dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset)2948 MPEG4Source::MPEG4Source(
2949         const sp<MPEG4Extractor> &owner,
2950         const sp<MetaData> &format,
2951         const sp<DataSource> &dataSource,
2952         int32_t timeScale,
2953         const sp<SampleTable> &sampleTable,
2954         Vector<SidxEntry> &sidx,
2955         const Trex *trex,
2956         off64_t firstMoofOffset)
2957     : mOwner(owner),
2958       mFormat(format),
2959       mDataSource(dataSource),
2960       mTimescale(timeScale),
2961       mSampleTable(sampleTable),
2962       mCurrentSampleIndex(0),
2963       mCurrentFragmentIndex(0),
2964       mSegments(sidx),
2965       mTrex(trex),
2966       mFirstMoofOffset(firstMoofOffset),
2967       mCurrentMoofOffset(firstMoofOffset),
2968       mCurrentTime(0),
2969       mCurrentSampleInfoAllocSize(0),
2970       mCurrentSampleInfoSizes(NULL),
2971       mCurrentSampleInfoOffsetsAllocSize(0),
2972       mCurrentSampleInfoOffsets(NULL),
2973       mIsAVC(false),
2974       mIsHEVC(false),
2975       mNALLengthSize(0),
2976       mStarted(false),
2977       mGroup(NULL),
2978       mBuffer(NULL),
2979       mWantsNALFragments(false),
2980       mSrcBuffer(NULL) {
2981 
2982     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
2983 
2984     mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2985     mDefaultIVSize = 0;
2986     mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2987     uint32_t keytype;
2988     const void *key;
2989     size_t keysize;
2990     if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2991         CHECK(keysize <= 16);
2992         memset(mCryptoKey, 0, 16);
2993         memcpy(mCryptoKey, key, keysize);
2994     }
2995 
2996     const char *mime;
2997     bool success = mFormat->findCString(kKeyMIMEType, &mime);
2998     CHECK(success);
2999 
3000     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3001     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3002 
3003     if (mIsAVC) {
3004         uint32_t type;
3005         const void *data;
3006         size_t size;
3007         CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3008 
3009         const uint8_t *ptr = (const uint8_t *)data;
3010 
3011         CHECK(size >= 7);
3012         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3013 
3014         // The number of bytes used to encode the length of a NAL unit.
3015         mNALLengthSize = 1 + (ptr[4] & 3);
3016     } else if (mIsHEVC) {
3017         uint32_t type;
3018         const void *data;
3019         size_t size;
3020         CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3021 
3022         const uint8_t *ptr = (const uint8_t *)data;
3023 
3024         CHECK(size >= 7);
3025         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3026 
3027         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3028     }
3029 
3030     CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3031 
3032     if (mFirstMoofOffset != 0) {
3033         off64_t offset = mFirstMoofOffset;
3034         parseChunk(&offset);
3035     }
3036 }
3037 
~MPEG4Source()3038 MPEG4Source::~MPEG4Source() {
3039     if (mStarted) {
3040         stop();
3041     }
3042     free(mCurrentSampleInfoSizes);
3043     free(mCurrentSampleInfoOffsets);
3044 }
3045 
start(MetaData * params)3046 status_t MPEG4Source::start(MetaData *params) {
3047     Mutex::Autolock autoLock(mLock);
3048 
3049     CHECK(!mStarted);
3050 
3051     int32_t val;
3052     if (params && params->findInt32(kKeyWantsNALFragments, &val)
3053         && val != 0) {
3054         mWantsNALFragments = true;
3055     } else {
3056         mWantsNALFragments = false;
3057     }
3058 
3059     mGroup = new MediaBufferGroup;
3060 
3061     int32_t max_size;
3062     CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
3063 
3064     mGroup->add_buffer(new MediaBuffer(max_size));
3065 
3066     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3067     if (mSrcBuffer == NULL) {
3068         // file probably specified a bad max size
3069         return ERROR_MALFORMED;
3070     }
3071 
3072     mStarted = true;
3073 
3074     return OK;
3075 }
3076 
stop()3077 status_t MPEG4Source::stop() {
3078     Mutex::Autolock autoLock(mLock);
3079 
3080     CHECK(mStarted);
3081 
3082     if (mBuffer != NULL) {
3083         mBuffer->release();
3084         mBuffer = NULL;
3085     }
3086 
3087     delete[] mSrcBuffer;
3088     mSrcBuffer = NULL;
3089 
3090     delete mGroup;
3091     mGroup = NULL;
3092 
3093     mStarted = false;
3094     mCurrentSampleIndex = 0;
3095 
3096     return OK;
3097 }
3098 
parseChunk(off64_t * offset)3099 status_t MPEG4Source::parseChunk(off64_t *offset) {
3100     uint32_t hdr[2];
3101     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3102         return ERROR_IO;
3103     }
3104     uint64_t chunk_size = ntohl(hdr[0]);
3105     uint32_t chunk_type = ntohl(hdr[1]);
3106     off64_t data_offset = *offset + 8;
3107 
3108     if (chunk_size == 1) {
3109         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3110             return ERROR_IO;
3111         }
3112         chunk_size = ntoh64(chunk_size);
3113         data_offset += 8;
3114 
3115         if (chunk_size < 16) {
3116             // The smallest valid chunk is 16 bytes long in this case.
3117             return ERROR_MALFORMED;
3118         }
3119     } else if (chunk_size < 8) {
3120         // The smallest valid chunk is 8 bytes long.
3121         return ERROR_MALFORMED;
3122     }
3123 
3124     char chunk[5];
3125     MakeFourCCString(chunk_type, chunk);
3126     ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
3127 
3128     off64_t chunk_data_size = *offset + chunk_size - data_offset;
3129 
3130     switch(chunk_type) {
3131 
3132         case FOURCC('t', 'r', 'a', 'f'):
3133         case FOURCC('m', 'o', 'o', 'f'): {
3134             off64_t stop_offset = *offset + chunk_size;
3135             *offset = data_offset;
3136             while (*offset < stop_offset) {
3137                 status_t err = parseChunk(offset);
3138                 if (err != OK) {
3139                     return err;
3140                 }
3141             }
3142             if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3143                 // *offset points to the box following this moof. Find the next moof from there.
3144 
3145                 while (true) {
3146                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3147                         return ERROR_END_OF_STREAM;
3148                     }
3149                     chunk_size = ntohl(hdr[0]);
3150                     chunk_type = ntohl(hdr[1]);
3151                     if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3152                         mNextMoofOffset = *offset;
3153                         break;
3154                     }
3155                     *offset += chunk_size;
3156                 }
3157             }
3158             break;
3159         }
3160 
3161         case FOURCC('t', 'f', 'h', 'd'): {
3162                 status_t err;
3163                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3164                     return err;
3165                 }
3166                 *offset += chunk_size;
3167                 break;
3168         }
3169 
3170         case FOURCC('t', 'r', 'u', 'n'): {
3171                 status_t err;
3172                 if (mLastParsedTrackId == mTrackId) {
3173                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3174                         return err;
3175                     }
3176                 }
3177 
3178                 *offset += chunk_size;
3179                 break;
3180         }
3181 
3182         case FOURCC('s', 'a', 'i', 'z'): {
3183             status_t err;
3184             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3185                 return err;
3186             }
3187             *offset += chunk_size;
3188             break;
3189         }
3190         case FOURCC('s', 'a', 'i', 'o'): {
3191             status_t err;
3192             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3193                 return err;
3194             }
3195             *offset += chunk_size;
3196             break;
3197         }
3198 
3199         case FOURCC('m', 'd', 'a', 't'): {
3200             // parse DRM info if present
3201             ALOGV("MPEG4Source::parseChunk mdat");
3202             // if saiz/saoi was previously observed, do something with the sampleinfos
3203             *offset += chunk_size;
3204             break;
3205         }
3206 
3207         default: {
3208             *offset += chunk_size;
3209             break;
3210         }
3211     }
3212     return OK;
3213 }
3214 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)3215 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3216         off64_t offset, off64_t /* size */) {
3217     ALOGV("parseSampleAuxiliaryInformationSizes");
3218     // 14496-12 8.7.12
3219     uint8_t version;
3220     if (mDataSource->readAt(
3221             offset, &version, sizeof(version))
3222             < (ssize_t)sizeof(version)) {
3223         return ERROR_IO;
3224     }
3225 
3226     if (version != 0) {
3227         return ERROR_UNSUPPORTED;
3228     }
3229     offset++;
3230 
3231     uint32_t flags;
3232     if (!mDataSource->getUInt24(offset, &flags)) {
3233         return ERROR_IO;
3234     }
3235     offset += 3;
3236 
3237     if (flags & 1) {
3238         uint32_t tmp;
3239         if (!mDataSource->getUInt32(offset, &tmp)) {
3240             return ERROR_MALFORMED;
3241         }
3242         mCurrentAuxInfoType = tmp;
3243         offset += 4;
3244         if (!mDataSource->getUInt32(offset, &tmp)) {
3245             return ERROR_MALFORMED;
3246         }
3247         mCurrentAuxInfoTypeParameter = tmp;
3248         offset += 4;
3249     }
3250 
3251     uint8_t defsize;
3252     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3253         return ERROR_MALFORMED;
3254     }
3255     mCurrentDefaultSampleInfoSize = defsize;
3256     offset++;
3257 
3258     uint32_t smplcnt;
3259     if (!mDataSource->getUInt32(offset, &smplcnt)) {
3260         return ERROR_MALFORMED;
3261     }
3262     mCurrentSampleInfoCount = smplcnt;
3263     offset += 4;
3264 
3265     if (mCurrentDefaultSampleInfoSize != 0) {
3266         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3267         return OK;
3268     }
3269     if (smplcnt > mCurrentSampleInfoAllocSize) {
3270         mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3271         mCurrentSampleInfoAllocSize = smplcnt;
3272     }
3273 
3274     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3275     return OK;
3276 }
3277 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)3278 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3279         off64_t offset, off64_t /* size */) {
3280     ALOGV("parseSampleAuxiliaryInformationOffsets");
3281     // 14496-12 8.7.13
3282     uint8_t version;
3283     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3284         return ERROR_IO;
3285     }
3286     offset++;
3287 
3288     uint32_t flags;
3289     if (!mDataSource->getUInt24(offset, &flags)) {
3290         return ERROR_IO;
3291     }
3292     offset += 3;
3293 
3294     uint32_t entrycount;
3295     if (!mDataSource->getUInt32(offset, &entrycount)) {
3296         return ERROR_IO;
3297     }
3298     offset += 4;
3299 
3300     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3301         mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3302         mCurrentSampleInfoOffsetsAllocSize = entrycount;
3303     }
3304     mCurrentSampleInfoOffsetCount = entrycount;
3305 
3306     for (size_t i = 0; i < entrycount; i++) {
3307         if (version == 0) {
3308             uint32_t tmp;
3309             if (!mDataSource->getUInt32(offset, &tmp)) {
3310                 return ERROR_IO;
3311             }
3312             mCurrentSampleInfoOffsets[i] = tmp;
3313             offset += 4;
3314         } else {
3315             uint64_t tmp;
3316             if (!mDataSource->getUInt64(offset, &tmp)) {
3317                 return ERROR_IO;
3318             }
3319             mCurrentSampleInfoOffsets[i] = tmp;
3320             offset += 8;
3321         }
3322     }
3323 
3324     // parse clear/encrypted data
3325 
3326     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3327 
3328     drmoffset += mCurrentMoofOffset;
3329     int ivlength;
3330     CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3331 
3332     // read CencSampleAuxiliaryDataFormats
3333     for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3334         Sample *smpl = &mCurrentSamples.editItemAt(i);
3335 
3336         memset(smpl->iv, 0, 16);
3337         if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3338             return ERROR_IO;
3339         }
3340 
3341         drmoffset += ivlength;
3342 
3343         int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3344         if (smplinfosize == 0) {
3345             smplinfosize = mCurrentSampleInfoSizes[i];
3346         }
3347         if (smplinfosize > ivlength) {
3348             uint16_t numsubsamples;
3349             if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3350                 return ERROR_IO;
3351             }
3352             drmoffset += 2;
3353             for (size_t j = 0; j < numsubsamples; j++) {
3354                 uint16_t numclear;
3355                 uint32_t numencrypted;
3356                 if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3357                     return ERROR_IO;
3358                 }
3359                 drmoffset += 2;
3360                 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3361                     return ERROR_IO;
3362                 }
3363                 drmoffset += 4;
3364                 smpl->clearsizes.add(numclear);
3365                 smpl->encryptedsizes.add(numencrypted);
3366             }
3367         } else {
3368             smpl->clearsizes.add(0);
3369             smpl->encryptedsizes.add(smpl->size);
3370         }
3371     }
3372 
3373 
3374     return OK;
3375 }
3376 
parseTrackFragmentHeader(off64_t offset,off64_t size)3377 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3378 
3379     if (size < 8) {
3380         return -EINVAL;
3381     }
3382 
3383     uint32_t flags;
3384     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3385         return ERROR_MALFORMED;
3386     }
3387 
3388     if (flags & 0xff000000) {
3389         return -EINVAL;
3390     }
3391 
3392     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3393         return ERROR_MALFORMED;
3394     }
3395 
3396     if (mLastParsedTrackId != mTrackId) {
3397         // this is not the right track, skip it
3398         return OK;
3399     }
3400 
3401     mTrackFragmentHeaderInfo.mFlags = flags;
3402     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3403     offset += 8;
3404     size -= 8;
3405 
3406     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3407 
3408     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3409         if (size < 8) {
3410             return -EINVAL;
3411         }
3412 
3413         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3414             return ERROR_MALFORMED;
3415         }
3416         offset += 8;
3417         size -= 8;
3418     }
3419 
3420     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3421         if (size < 4) {
3422             return -EINVAL;
3423         }
3424 
3425         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3426             return ERROR_MALFORMED;
3427         }
3428         offset += 4;
3429         size -= 4;
3430     }
3431 
3432     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3433         if (size < 4) {
3434             return -EINVAL;
3435         }
3436 
3437         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3438             return ERROR_MALFORMED;
3439         }
3440         offset += 4;
3441         size -= 4;
3442     }
3443 
3444     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3445         if (size < 4) {
3446             return -EINVAL;
3447         }
3448 
3449         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3450             return ERROR_MALFORMED;
3451         }
3452         offset += 4;
3453         size -= 4;
3454     }
3455 
3456     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3457         if (size < 4) {
3458             return -EINVAL;
3459         }
3460 
3461         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3462             return ERROR_MALFORMED;
3463         }
3464         offset += 4;
3465         size -= 4;
3466     }
3467 
3468     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3469         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3470     }
3471 
3472     mTrackFragmentHeaderInfo.mDataOffset = 0;
3473     return OK;
3474 }
3475 
parseTrackFragmentRun(off64_t offset,off64_t size)3476 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3477 
3478     ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3479     if (size < 8) {
3480         return -EINVAL;
3481     }
3482 
3483     enum {
3484         kDataOffsetPresent                  = 0x01,
3485         kFirstSampleFlagsPresent            = 0x04,
3486         kSampleDurationPresent              = 0x100,
3487         kSampleSizePresent                  = 0x200,
3488         kSampleFlagsPresent                 = 0x400,
3489         kSampleCompositionTimeOffsetPresent = 0x800,
3490     };
3491 
3492     uint32_t flags;
3493     if (!mDataSource->getUInt32(offset, &flags)) {
3494         return ERROR_MALFORMED;
3495     }
3496     ALOGV("fragment run flags: %08x", flags);
3497 
3498     if (flags & 0xff000000) {
3499         return -EINVAL;
3500     }
3501 
3502     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3503         // These two shall not be used together.
3504         return -EINVAL;
3505     }
3506 
3507     uint32_t sampleCount;
3508     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3509         return ERROR_MALFORMED;
3510     }
3511     offset += 8;
3512     size -= 8;
3513 
3514     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3515 
3516     uint32_t firstSampleFlags = 0;
3517 
3518     if (flags & kDataOffsetPresent) {
3519         if (size < 4) {
3520             return -EINVAL;
3521         }
3522 
3523         int32_t dataOffsetDelta;
3524         if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3525             return ERROR_MALFORMED;
3526         }
3527 
3528         dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3529 
3530         offset += 4;
3531         size -= 4;
3532     }
3533 
3534     if (flags & kFirstSampleFlagsPresent) {
3535         if (size < 4) {
3536             return -EINVAL;
3537         }
3538 
3539         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3540             return ERROR_MALFORMED;
3541         }
3542         offset += 4;
3543         size -= 4;
3544     }
3545 
3546     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3547              sampleCtsOffset = 0;
3548 
3549     size_t bytesPerSample = 0;
3550     if (flags & kSampleDurationPresent) {
3551         bytesPerSample += 4;
3552     } else if (mTrackFragmentHeaderInfo.mFlags
3553             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3554         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3555     } else if (mTrex) {
3556         sampleDuration = mTrex->default_sample_duration;
3557     }
3558 
3559     if (flags & kSampleSizePresent) {
3560         bytesPerSample += 4;
3561     } else if (mTrackFragmentHeaderInfo.mFlags
3562             & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3563         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3564     } else {
3565         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3566     }
3567 
3568     if (flags & kSampleFlagsPresent) {
3569         bytesPerSample += 4;
3570     } else if (mTrackFragmentHeaderInfo.mFlags
3571             & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3572         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3573     } else {
3574         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3575     }
3576 
3577     if (flags & kSampleCompositionTimeOffsetPresent) {
3578         bytesPerSample += 4;
3579     } else {
3580         sampleCtsOffset = 0;
3581     }
3582 
3583     if (size < (off64_t)sampleCount * bytesPerSample) {
3584         return -EINVAL;
3585     }
3586 
3587     Sample tmp;
3588     for (uint32_t i = 0; i < sampleCount; ++i) {
3589         if (flags & kSampleDurationPresent) {
3590             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3591                 return ERROR_MALFORMED;
3592             }
3593             offset += 4;
3594         }
3595 
3596         if (flags & kSampleSizePresent) {
3597             if (!mDataSource->getUInt32(offset, &sampleSize)) {
3598                 return ERROR_MALFORMED;
3599             }
3600             offset += 4;
3601         }
3602 
3603         if (flags & kSampleFlagsPresent) {
3604             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3605                 return ERROR_MALFORMED;
3606             }
3607             offset += 4;
3608         }
3609 
3610         if (flags & kSampleCompositionTimeOffsetPresent) {
3611             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3612                 return ERROR_MALFORMED;
3613             }
3614             offset += 4;
3615         }
3616 
3617         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
3618               " flags 0x%08x", i + 1,
3619                 dataOffset, sampleSize, sampleDuration,
3620                 (flags & kFirstSampleFlagsPresent) && i == 0
3621                     ? firstSampleFlags : sampleFlags);
3622         tmp.offset = dataOffset;
3623         tmp.size = sampleSize;
3624         tmp.duration = sampleDuration;
3625         tmp.compositionOffset = sampleCtsOffset;
3626         mCurrentSamples.add(tmp);
3627 
3628         dataOffset += sampleSize;
3629     }
3630 
3631     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3632 
3633     return OK;
3634 }
3635 
getFormat()3636 sp<MetaData> MPEG4Source::getFormat() {
3637     Mutex::Autolock autoLock(mLock);
3638 
3639     return mFormat;
3640 }
3641 
parseNALSize(const uint8_t * data) const3642 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3643     switch (mNALLengthSize) {
3644         case 1:
3645             return *data;
3646         case 2:
3647             return U16_AT(data);
3648         case 3:
3649             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3650         case 4:
3651             return U32_AT(data);
3652     }
3653 
3654     // This cannot happen, mNALLengthSize springs to life by adding 1 to
3655     // a 2-bit integer.
3656     CHECK(!"Should not be here.");
3657 
3658     return 0;
3659 }
3660 
read(MediaBuffer ** out,const ReadOptions * options)3661 status_t MPEG4Source::read(
3662         MediaBuffer **out, const ReadOptions *options) {
3663     Mutex::Autolock autoLock(mLock);
3664 
3665     CHECK(mStarted);
3666 
3667     if (mFirstMoofOffset > 0) {
3668         return fragmentedRead(out, options);
3669     }
3670 
3671     *out = NULL;
3672 
3673     int64_t targetSampleTimeUs = -1;
3674 
3675     int64_t seekTimeUs;
3676     ReadOptions::SeekMode mode;
3677     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3678         uint32_t findFlags = 0;
3679         switch (mode) {
3680             case ReadOptions::SEEK_PREVIOUS_SYNC:
3681                 findFlags = SampleTable::kFlagBefore;
3682                 break;
3683             case ReadOptions::SEEK_NEXT_SYNC:
3684                 findFlags = SampleTable::kFlagAfter;
3685                 break;
3686             case ReadOptions::SEEK_CLOSEST_SYNC:
3687             case ReadOptions::SEEK_CLOSEST:
3688                 findFlags = SampleTable::kFlagClosest;
3689                 break;
3690             default:
3691                 CHECK(!"Should not be here.");
3692                 break;
3693         }
3694 
3695         uint32_t sampleIndex;
3696         status_t err = mSampleTable->findSampleAtTime(
3697                 seekTimeUs, 1000000, mTimescale,
3698                 &sampleIndex, findFlags);
3699 
3700         if (mode == ReadOptions::SEEK_CLOSEST) {
3701             // We found the closest sample already, now we want the sync
3702             // sample preceding it (or the sample itself of course), even
3703             // if the subsequent sync sample is closer.
3704             findFlags = SampleTable::kFlagBefore;
3705         }
3706 
3707         uint32_t syncSampleIndex;
3708         if (err == OK) {
3709             err = mSampleTable->findSyncSampleNear(
3710                     sampleIndex, &syncSampleIndex, findFlags);
3711         }
3712 
3713         uint32_t sampleTime;
3714         if (err == OK) {
3715             err = mSampleTable->getMetaDataForSample(
3716                     sampleIndex, NULL, NULL, &sampleTime);
3717         }
3718 
3719         if (err != OK) {
3720             if (err == ERROR_OUT_OF_RANGE) {
3721                 // An attempt to seek past the end of the stream would
3722                 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3723                 // this all the way to the MediaPlayer would cause abnormal
3724                 // termination. Legacy behaviour appears to be to behave as if
3725                 // we had seeked to the end of stream, ending normally.
3726                 err = ERROR_END_OF_STREAM;
3727             }
3728             ALOGV("end of stream");
3729             return err;
3730         }
3731 
3732         if (mode == ReadOptions::SEEK_CLOSEST) {
3733             targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3734         }
3735 
3736 #if 0
3737         uint32_t syncSampleTime;
3738         CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3739                     syncSampleIndex, NULL, NULL, &syncSampleTime));
3740 
3741         ALOGI("seek to time %lld us => sample at time %lld us, "
3742              "sync sample at time %lld us",
3743              seekTimeUs,
3744              sampleTime * 1000000ll / mTimescale,
3745              syncSampleTime * 1000000ll / mTimescale);
3746 #endif
3747 
3748         mCurrentSampleIndex = syncSampleIndex;
3749         if (mBuffer != NULL) {
3750             mBuffer->release();
3751             mBuffer = NULL;
3752         }
3753 
3754         // fall through
3755     }
3756 
3757     off64_t offset;
3758     size_t size;
3759     uint32_t cts, stts;
3760     bool isSyncSample;
3761     bool newBuffer = false;
3762     if (mBuffer == NULL) {
3763         newBuffer = true;
3764 
3765         status_t err =
3766             mSampleTable->getMetaDataForSample(
3767                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3768 
3769         if (err != OK) {
3770             return err;
3771         }
3772 
3773         err = mGroup->acquire_buffer(&mBuffer);
3774 
3775         if (err != OK) {
3776             CHECK(mBuffer == NULL);
3777             return err;
3778         }
3779     }
3780 
3781     if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3782         if (newBuffer) {
3783             ssize_t num_bytes_read =
3784                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3785 
3786             if (num_bytes_read < (ssize_t)size) {
3787                 mBuffer->release();
3788                 mBuffer = NULL;
3789 
3790                 return ERROR_IO;
3791             }
3792 
3793             CHECK(mBuffer != NULL);
3794             mBuffer->set_range(0, size);
3795             mBuffer->meta_data()->clear();
3796             mBuffer->meta_data()->setInt64(
3797                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3798             mBuffer->meta_data()->setInt64(
3799                     kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3800 
3801             if (targetSampleTimeUs >= 0) {
3802                 mBuffer->meta_data()->setInt64(
3803                         kKeyTargetTime, targetSampleTimeUs);
3804             }
3805 
3806             if (isSyncSample) {
3807                 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3808             }
3809 
3810             ++mCurrentSampleIndex;
3811         }
3812 
3813         if (!mIsAVC && !mIsHEVC) {
3814             *out = mBuffer;
3815             mBuffer = NULL;
3816 
3817             return OK;
3818         }
3819 
3820         // Each NAL unit is split up into its constituent fragments and
3821         // each one of them returned in its own buffer.
3822 
3823         CHECK(mBuffer->range_length() >= mNALLengthSize);
3824 
3825         const uint8_t *src =
3826             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3827 
3828         size_t nal_size = parseNALSize(src);
3829         if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3830             ALOGE("incomplete NAL unit.");
3831 
3832             mBuffer->release();
3833             mBuffer = NULL;
3834 
3835             return ERROR_MALFORMED;
3836         }
3837 
3838         MediaBuffer *clone = mBuffer->clone();
3839         CHECK(clone != NULL);
3840         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3841 
3842         CHECK(mBuffer != NULL);
3843         mBuffer->set_range(
3844                 mBuffer->range_offset() + mNALLengthSize + nal_size,
3845                 mBuffer->range_length() - mNALLengthSize - nal_size);
3846 
3847         if (mBuffer->range_length() == 0) {
3848             mBuffer->release();
3849             mBuffer = NULL;
3850         }
3851 
3852         *out = clone;
3853 
3854         return OK;
3855     } else {
3856         // Whole NAL units are returned but each fragment is prefixed by
3857         // the start code (0x00 00 00 01).
3858         ssize_t num_bytes_read = 0;
3859         int32_t drm = 0;
3860         bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3861         if (usesDRM) {
3862             num_bytes_read =
3863                 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3864         } else {
3865             num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3866         }
3867 
3868         if (num_bytes_read < (ssize_t)size) {
3869             mBuffer->release();
3870             mBuffer = NULL;
3871 
3872             return ERROR_IO;
3873         }
3874 
3875         if (usesDRM) {
3876             CHECK(mBuffer != NULL);
3877             mBuffer->set_range(0, size);
3878 
3879         } else {
3880             uint8_t *dstData = (uint8_t *)mBuffer->data();
3881             size_t srcOffset = 0;
3882             size_t dstOffset = 0;
3883 
3884             while (srcOffset < size) {
3885                 bool isMalFormed = (srcOffset + mNALLengthSize > size);
3886                 size_t nalLength = 0;
3887                 if (!isMalFormed) {
3888                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3889                     srcOffset += mNALLengthSize;
3890                     isMalFormed = srcOffset + nalLength > size;
3891                 }
3892 
3893                 if (isMalFormed) {
3894                     ALOGE("Video is malformed");
3895                     mBuffer->release();
3896                     mBuffer = NULL;
3897                     return ERROR_MALFORMED;
3898                 }
3899 
3900                 if (nalLength == 0) {
3901                     continue;
3902                 }
3903 
3904                 CHECK(dstOffset + 4 <= mBuffer->size());
3905 
3906                 dstData[dstOffset++] = 0;
3907                 dstData[dstOffset++] = 0;
3908                 dstData[dstOffset++] = 0;
3909                 dstData[dstOffset++] = 1;
3910                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3911                 srcOffset += nalLength;
3912                 dstOffset += nalLength;
3913             }
3914             CHECK_EQ(srcOffset, size);
3915             CHECK(mBuffer != NULL);
3916             mBuffer->set_range(0, dstOffset);
3917         }
3918 
3919         mBuffer->meta_data()->clear();
3920         mBuffer->meta_data()->setInt64(
3921                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3922         mBuffer->meta_data()->setInt64(
3923                 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3924 
3925         if (targetSampleTimeUs >= 0) {
3926             mBuffer->meta_data()->setInt64(
3927                     kKeyTargetTime, targetSampleTimeUs);
3928         }
3929 
3930         if (isSyncSample) {
3931             mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3932         }
3933 
3934         ++mCurrentSampleIndex;
3935 
3936         *out = mBuffer;
3937         mBuffer = NULL;
3938 
3939         return OK;
3940     }
3941 }
3942 
fragmentedRead(MediaBuffer ** out,const ReadOptions * options)3943 status_t MPEG4Source::fragmentedRead(
3944         MediaBuffer **out, const ReadOptions *options) {
3945 
3946     ALOGV("MPEG4Source::fragmentedRead");
3947 
3948     CHECK(mStarted);
3949 
3950     *out = NULL;
3951 
3952     int64_t targetSampleTimeUs = -1;
3953 
3954     int64_t seekTimeUs;
3955     ReadOptions::SeekMode mode;
3956     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3957 
3958         int numSidxEntries = mSegments.size();
3959         if (numSidxEntries != 0) {
3960             int64_t totalTime = 0;
3961             off64_t totalOffset = mFirstMoofOffset;
3962             for (int i = 0; i < numSidxEntries; i++) {
3963                 const SidxEntry *se = &mSegments[i];
3964                 if (totalTime + se->mDurationUs > seekTimeUs) {
3965                     // The requested time is somewhere in this segment
3966                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
3967                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3968                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3969                         // requested next sync, or closest sync and it was closer to the end of
3970                         // this segment
3971                         totalTime += se->mDurationUs;
3972                         totalOffset += se->mSize;
3973                     }
3974                     break;
3975                 }
3976                 totalTime += se->mDurationUs;
3977                 totalOffset += se->mSize;
3978             }
3979             mCurrentMoofOffset = totalOffset;
3980             mCurrentSamples.clear();
3981             mCurrentSampleIndex = 0;
3982             parseChunk(&totalOffset);
3983             mCurrentTime = totalTime * mTimescale / 1000000ll;
3984         } else {
3985             // without sidx boxes, we can only seek to 0
3986             mCurrentMoofOffset = mFirstMoofOffset;
3987             mCurrentSamples.clear();
3988             mCurrentSampleIndex = 0;
3989             off64_t tmp = mCurrentMoofOffset;
3990             parseChunk(&tmp);
3991             mCurrentTime = 0;
3992         }
3993 
3994         if (mBuffer != NULL) {
3995             mBuffer->release();
3996             mBuffer = NULL;
3997         }
3998 
3999         // fall through
4000     }
4001 
4002     off64_t offset = 0;
4003     size_t size = 0;
4004     uint32_t cts = 0;
4005     bool isSyncSample = false;
4006     bool newBuffer = false;
4007     if (mBuffer == NULL) {
4008         newBuffer = true;
4009 
4010         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4011             // move to next fragment if there is one
4012             if (mNextMoofOffset <= mCurrentMoofOffset) {
4013                 return ERROR_END_OF_STREAM;
4014             }
4015             off64_t nextMoof = mNextMoofOffset;
4016             mCurrentMoofOffset = nextMoof;
4017             mCurrentSamples.clear();
4018             mCurrentSampleIndex = 0;
4019             parseChunk(&nextMoof);
4020             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4021                 return ERROR_END_OF_STREAM;
4022             }
4023         }
4024 
4025         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4026         offset = smpl->offset;
4027         size = smpl->size;
4028         cts = mCurrentTime + smpl->compositionOffset;
4029         mCurrentTime += smpl->duration;
4030         isSyncSample = (mCurrentSampleIndex == 0); // XXX
4031 
4032         status_t err = mGroup->acquire_buffer(&mBuffer);
4033 
4034         if (err != OK) {
4035             CHECK(mBuffer == NULL);
4036             ALOGV("acquire_buffer returned %d", err);
4037             return err;
4038         }
4039     }
4040 
4041     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4042     const sp<MetaData> bufmeta = mBuffer->meta_data();
4043     bufmeta->clear();
4044     if (smpl->encryptedsizes.size()) {
4045         // store clear/encrypted lengths in metadata
4046         bufmeta->setData(kKeyPlainSizes, 0,
4047                 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4048         bufmeta->setData(kKeyEncryptedSizes, 0,
4049                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4050         bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4051         bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4052         bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4053         bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4054     }
4055 
4056     if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4057         if (newBuffer) {
4058             ssize_t num_bytes_read =
4059                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4060 
4061             if (num_bytes_read < (ssize_t)size) {
4062                 mBuffer->release();
4063                 mBuffer = NULL;
4064 
4065                 ALOGV("i/o error");
4066                 return ERROR_IO;
4067             }
4068 
4069             CHECK(mBuffer != NULL);
4070             mBuffer->set_range(0, size);
4071             mBuffer->meta_data()->setInt64(
4072                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4073             mBuffer->meta_data()->setInt64(
4074                     kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4075 
4076             if (targetSampleTimeUs >= 0) {
4077                 mBuffer->meta_data()->setInt64(
4078                         kKeyTargetTime, targetSampleTimeUs);
4079             }
4080 
4081             if (isSyncSample) {
4082                 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4083             }
4084 
4085             ++mCurrentSampleIndex;
4086         }
4087 
4088         if (!mIsAVC && !mIsHEVC) {
4089             *out = mBuffer;
4090             mBuffer = NULL;
4091 
4092             return OK;
4093         }
4094 
4095         // Each NAL unit is split up into its constituent fragments and
4096         // each one of them returned in its own buffer.
4097 
4098         CHECK(mBuffer->range_length() >= mNALLengthSize);
4099 
4100         const uint8_t *src =
4101             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4102 
4103         size_t nal_size = parseNALSize(src);
4104         if (mBuffer->range_length() < mNALLengthSize + nal_size) {
4105             ALOGE("incomplete NAL unit.");
4106 
4107             mBuffer->release();
4108             mBuffer = NULL;
4109 
4110             return ERROR_MALFORMED;
4111         }
4112 
4113         MediaBuffer *clone = mBuffer->clone();
4114         CHECK(clone != NULL);
4115         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4116 
4117         CHECK(mBuffer != NULL);
4118         mBuffer->set_range(
4119                 mBuffer->range_offset() + mNALLengthSize + nal_size,
4120                 mBuffer->range_length() - mNALLengthSize - nal_size);
4121 
4122         if (mBuffer->range_length() == 0) {
4123             mBuffer->release();
4124             mBuffer = NULL;
4125         }
4126 
4127         *out = clone;
4128 
4129         return OK;
4130     } else {
4131         ALOGV("whole NAL");
4132         // Whole NAL units are returned but each fragment is prefixed by
4133         // the start code (0x00 00 00 01).
4134         ssize_t num_bytes_read = 0;
4135         int32_t drm = 0;
4136         bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4137         if (usesDRM) {
4138             num_bytes_read =
4139                 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4140         } else {
4141             num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4142         }
4143 
4144         if (num_bytes_read < (ssize_t)size) {
4145             mBuffer->release();
4146             mBuffer = NULL;
4147 
4148             ALOGV("i/o error");
4149             return ERROR_IO;
4150         }
4151 
4152         if (usesDRM) {
4153             CHECK(mBuffer != NULL);
4154             mBuffer->set_range(0, size);
4155 
4156         } else {
4157             uint8_t *dstData = (uint8_t *)mBuffer->data();
4158             size_t srcOffset = 0;
4159             size_t dstOffset = 0;
4160 
4161             while (srcOffset < size) {
4162                 bool isMalFormed = (srcOffset + mNALLengthSize > size);
4163                 size_t nalLength = 0;
4164                 if (!isMalFormed) {
4165                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4166                     srcOffset += mNALLengthSize;
4167                     isMalFormed = srcOffset + nalLength > size;
4168                 }
4169 
4170                 if (isMalFormed) {
4171                     ALOGE("Video is malformed");
4172                     mBuffer->release();
4173                     mBuffer = NULL;
4174                     return ERROR_MALFORMED;
4175                 }
4176 
4177                 if (nalLength == 0) {
4178                     continue;
4179                 }
4180 
4181                 CHECK(dstOffset + 4 <= mBuffer->size());
4182 
4183                 dstData[dstOffset++] = 0;
4184                 dstData[dstOffset++] = 0;
4185                 dstData[dstOffset++] = 0;
4186                 dstData[dstOffset++] = 1;
4187                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4188                 srcOffset += nalLength;
4189                 dstOffset += nalLength;
4190             }
4191             CHECK_EQ(srcOffset, size);
4192             CHECK(mBuffer != NULL);
4193             mBuffer->set_range(0, dstOffset);
4194         }
4195 
4196         mBuffer->meta_data()->setInt64(
4197                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4198         mBuffer->meta_data()->setInt64(
4199                 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4200 
4201         if (targetSampleTimeUs >= 0) {
4202             mBuffer->meta_data()->setInt64(
4203                     kKeyTargetTime, targetSampleTimeUs);
4204         }
4205 
4206         if (isSyncSample) {
4207             mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4208         }
4209 
4210         ++mCurrentSampleIndex;
4211 
4212         *out = mBuffer;
4213         mBuffer = NULL;
4214 
4215         return OK;
4216     }
4217 }
4218 
findTrackByMimePrefix(const char * mimePrefix)4219 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4220         const char *mimePrefix) {
4221     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4222         const char *mime;
4223         if (track->meta != NULL
4224                 && track->meta->findCString(kKeyMIMEType, &mime)
4225                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4226             return track;
4227         }
4228     }
4229 
4230     return NULL;
4231 }
4232 
LegacySniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence)4233 static bool LegacySniffMPEG4(
4234         const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4235     uint8_t header[8];
4236 
4237     ssize_t n = source->readAt(4, header, sizeof(header));
4238     if (n < (ssize_t)sizeof(header)) {
4239         return false;
4240     }
4241 
4242     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4243         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4244         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4245         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4246         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4247         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4248         *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4249         *confidence = 0.4;
4250 
4251         return true;
4252     }
4253 
4254     return false;
4255 }
4256 
isCompatibleBrand(uint32_t fourcc)4257 static bool isCompatibleBrand(uint32_t fourcc) {
4258     static const uint32_t kCompatibleBrands[] = {
4259         FOURCC('i', 's', 'o', 'm'),
4260         FOURCC('i', 's', 'o', '2'),
4261         FOURCC('a', 'v', 'c', '1'),
4262         FOURCC('h', 'v', 'c', '1'),
4263         FOURCC('h', 'e', 'v', '1'),
4264         FOURCC('3', 'g', 'p', '4'),
4265         FOURCC('m', 'p', '4', '1'),
4266         FOURCC('m', 'p', '4', '2'),
4267 
4268         // Won't promise that the following file types can be played.
4269         // Just give these file types a chance.
4270         FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4271         FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4272 
4273         FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4274         FOURCC('3', 'g', '2', 'b'),
4275     };
4276 
4277     for (size_t i = 0;
4278          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4279          ++i) {
4280         if (kCompatibleBrands[i] == fourcc) {
4281             return true;
4282         }
4283     }
4284 
4285     return false;
4286 }
4287 
4288 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
4289 // compatible brand is present.
4290 // Also try to identify where this file's metadata ends
4291 // (end of the 'moov' atom) and report it to the caller as part of
4292 // the metadata.
BetterSniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4293 static bool BetterSniffMPEG4(
4294         const sp<DataSource> &source, String8 *mimeType, float *confidence,
4295         sp<AMessage> *meta) {
4296     // We scan up to 128 bytes to identify this file as an MP4.
4297     static const off64_t kMaxScanOffset = 128ll;
4298 
4299     off64_t offset = 0ll;
4300     bool foundGoodFileType = false;
4301     off64_t moovAtomEndOffset = -1ll;
4302     bool done = false;
4303 
4304     while (!done && offset < kMaxScanOffset) {
4305         uint32_t hdr[2];
4306         if (source->readAt(offset, hdr, 8) < 8) {
4307             return false;
4308         }
4309 
4310         uint64_t chunkSize = ntohl(hdr[0]);
4311         uint32_t chunkType = ntohl(hdr[1]);
4312         off64_t chunkDataOffset = offset + 8;
4313 
4314         if (chunkSize == 1) {
4315             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4316                 return false;
4317             }
4318 
4319             chunkSize = ntoh64(chunkSize);
4320             chunkDataOffset += 8;
4321 
4322             if (chunkSize < 16) {
4323                 // The smallest valid chunk is 16 bytes long in this case.
4324                 return false;
4325             }
4326         } else if (chunkSize < 8) {
4327             // The smallest valid chunk is 8 bytes long.
4328             return false;
4329         }
4330 
4331         off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4332 
4333         char chunkstring[5];
4334         MakeFourCCString(chunkType, chunkstring);
4335         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset);
4336         switch (chunkType) {
4337             case FOURCC('f', 't', 'y', 'p'):
4338             {
4339                 if (chunkDataSize < 8) {
4340                     return false;
4341                 }
4342 
4343                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4344                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4345                     if (i == 1) {
4346                         // Skip this index, it refers to the minorVersion,
4347                         // not a brand.
4348                         continue;
4349                     }
4350 
4351                     uint32_t brand;
4352                     if (source->readAt(
4353                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
4354                         return false;
4355                     }
4356 
4357                     brand = ntohl(brand);
4358 
4359                     if (isCompatibleBrand(brand)) {
4360                         foundGoodFileType = true;
4361                         break;
4362                     }
4363                 }
4364 
4365                 if (!foundGoodFileType) {
4366                     return false;
4367                 }
4368 
4369                 break;
4370             }
4371 
4372             case FOURCC('m', 'o', 'o', 'v'):
4373             {
4374                 moovAtomEndOffset = offset + chunkSize;
4375 
4376                 done = true;
4377                 break;
4378             }
4379 
4380             default:
4381                 break;
4382         }
4383 
4384         offset += chunkSize;
4385     }
4386 
4387     if (!foundGoodFileType) {
4388         return false;
4389     }
4390 
4391     *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4392     *confidence = 0.4f;
4393 
4394     if (moovAtomEndOffset >= 0) {
4395         *meta = new AMessage;
4396         (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4397 
4398         ALOGV("found metadata size: %lld", moovAtomEndOffset);
4399     }
4400 
4401     return true;
4402 }
4403 
SniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4404 bool SniffMPEG4(
4405         const sp<DataSource> &source, String8 *mimeType, float *confidence,
4406         sp<AMessage> *meta) {
4407     if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4408         return true;
4409     }
4410 
4411     if (LegacySniffMPEG4(source, mimeType, confidence)) {
4412         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4413         return true;
4414     }
4415 
4416     return false;
4417 }
4418 
4419 }  // namespace android
4420