1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include <utils/Log.h>
27 
28 #include "include/MPEG4Extractor.h"
29 #include "include/SampleTable.h"
30 #include "include/ESDS.h"
31 
32 #include <media/stagefright/foundation/ABitReader.h>
33 #include <media/stagefright/foundation/ABuffer.h>
34 #include <media/stagefright/foundation/ADebug.h>
35 #include <media/stagefright/foundation/AMessage.h>
36 #include <media/stagefright/foundation/AUtils.h>
37 #include <media/stagefright/foundation/ColorUtils.h>
38 #include <media/stagefright/MediaBuffer.h>
39 #include <media/stagefright/MediaBufferGroup.h>
40 #include <media/stagefright/MediaDefs.h>
41 #include <media/stagefright/MediaSource.h>
42 #include <media/stagefright/MetaData.h>
43 #include <utils/String8.h>
44 
45 #include <byteswap.h>
46 #include "include/ID3.h"
47 
48 #ifndef UINT32_MAX
49 #define UINT32_MAX       (4294967295U)
50 #endif
51 
52 namespace android {
53 
54 enum {
55     // max track header chunk to return
56     kMaxTrackHeaderSize = 32,
57 };
58 
59 class MPEG4Source : public MediaSource {
60 public:
61     // Caller retains ownership of both "dataSource" and "sampleTable".
62     MPEG4Source(const sp<MPEG4Extractor> &owner,
63                 const sp<MetaData> &format,
64                 const sp<DataSource> &dataSource,
65                 int32_t timeScale,
66                 const sp<SampleTable> &sampleTable,
67                 Vector<SidxEntry> &sidx,
68                 const Trex *trex,
69                 off64_t firstMoofOffset);
70 
71     virtual status_t start(MetaData *params = NULL);
72     virtual status_t stop();
73 
74     virtual sp<MetaData> getFormat();
75 
76     virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
77     virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
78 
79 protected:
80     virtual ~MPEG4Source();
81 
82 private:
83     Mutex mLock;
84 
85     // keep the MPEG4Extractor around, since we're referencing its data
86     sp<MPEG4Extractor> mOwner;
87     sp<MetaData> mFormat;
88     sp<DataSource> mDataSource;
89     int32_t mTimescale;
90     sp<SampleTable> mSampleTable;
91     uint32_t mCurrentSampleIndex;
92     uint32_t mCurrentFragmentIndex;
93     Vector<SidxEntry> &mSegments;
94     const Trex *mTrex;
95     off64_t mFirstMoofOffset;
96     off64_t mCurrentMoofOffset;
97     off64_t mNextMoofOffset;
98     uint32_t mCurrentTime;
99     int32_t mLastParsedTrackId;
100     int32_t mTrackId;
101 
102     int32_t mCryptoMode;    // passed in from extractor
103     int32_t mDefaultIVSize; // passed in from extractor
104     uint8_t mCryptoKey[16]; // passed in from extractor
105     uint32_t mCurrentAuxInfoType;
106     uint32_t mCurrentAuxInfoTypeParameter;
107     int32_t mCurrentDefaultSampleInfoSize;
108     uint32_t mCurrentSampleInfoCount;
109     uint32_t mCurrentSampleInfoAllocSize;
110     uint8_t* mCurrentSampleInfoSizes;
111     uint32_t mCurrentSampleInfoOffsetCount;
112     uint32_t mCurrentSampleInfoOffsetsAllocSize;
113     uint64_t* mCurrentSampleInfoOffsets;
114 
115     bool mIsAVC;
116     bool mIsHEVC;
117     size_t mNALLengthSize;
118 
119     bool mStarted;
120 
121     MediaBufferGroup *mGroup;
122 
123     MediaBuffer *mBuffer;
124 
125     bool mWantsNALFragments;
126 
127     uint8_t *mSrcBuffer;
128 
129     size_t parseNALSize(const uint8_t *data) const;
130     status_t parseChunk(off64_t *offset);
131     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
132     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
133     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
134     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
135 
136     struct TrackFragmentHeaderInfo {
137         enum Flags {
138             kBaseDataOffsetPresent         = 0x01,
139             kSampleDescriptionIndexPresent = 0x02,
140             kDefaultSampleDurationPresent  = 0x08,
141             kDefaultSampleSizePresent      = 0x10,
142             kDefaultSampleFlagsPresent     = 0x20,
143             kDurationIsEmpty               = 0x10000,
144         };
145 
146         uint32_t mTrackID;
147         uint32_t mFlags;
148         uint64_t mBaseDataOffset;
149         uint32_t mSampleDescriptionIndex;
150         uint32_t mDefaultSampleDuration;
151         uint32_t mDefaultSampleSize;
152         uint32_t mDefaultSampleFlags;
153 
154         uint64_t mDataOffset;
155     };
156     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
157 
158     struct Sample {
159         off64_t offset;
160         size_t size;
161         uint32_t duration;
162         int32_t compositionOffset;
163         uint8_t iv[16];
164         Vector<size_t> clearsizes;
165         Vector<size_t> encryptedsizes;
166     };
167     Vector<Sample> mCurrentSamples;
168 
169     MPEG4Source(const MPEG4Source &);
170     MPEG4Source &operator=(const MPEG4Source &);
171 };
172 
173 // This custom data source wraps an existing one and satisfies requests
174 // falling entirely within a cached range from the cache while forwarding
175 // all remaining requests to the wrapped datasource.
176 // This is used to cache the full sampletable metadata for a single track,
177 // possibly wrapping multiple times to cover all tracks, i.e.
178 // Each MPEG4DataSource caches the sampletable metadata for a single track.
179 
180 struct MPEG4DataSource : public DataSource {
181     MPEG4DataSource(const sp<DataSource> &source);
182 
183     virtual status_t initCheck() const;
184     virtual ssize_t readAt(off64_t offset, void *data, size_t size);
185     virtual status_t getSize(off64_t *size);
186     virtual uint32_t flags();
187 
188     status_t setCachedRange(off64_t offset, size_t size);
189 
190 protected:
191     virtual ~MPEG4DataSource();
192 
193 private:
194     Mutex mLock;
195 
196     sp<DataSource> mSource;
197     off64_t mCachedOffset;
198     size_t mCachedSize;
199     uint8_t *mCache;
200 
201     void clearCache();
202 
203     MPEG4DataSource(const MPEG4DataSource &);
204     MPEG4DataSource &operator=(const MPEG4DataSource &);
205 };
206 
MPEG4DataSource(const sp<DataSource> & source)207 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
208     : mSource(source),
209       mCachedOffset(0),
210       mCachedSize(0),
211       mCache(NULL) {
212 }
213 
~MPEG4DataSource()214 MPEG4DataSource::~MPEG4DataSource() {
215     clearCache();
216 }
217 
clearCache()218 void MPEG4DataSource::clearCache() {
219     if (mCache) {
220         free(mCache);
221         mCache = NULL;
222     }
223 
224     mCachedOffset = 0;
225     mCachedSize = 0;
226 }
227 
initCheck() const228 status_t MPEG4DataSource::initCheck() const {
229     return mSource->initCheck();
230 }
231 
readAt(off64_t offset,void * data,size_t size)232 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
233     Mutex::Autolock autoLock(mLock);
234 
235     if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
236         memcpy(data, &mCache[offset - mCachedOffset], size);
237         return size;
238     }
239 
240     return mSource->readAt(offset, data, size);
241 }
242 
getSize(off64_t * size)243 status_t MPEG4DataSource::getSize(off64_t *size) {
244     return mSource->getSize(size);
245 }
246 
flags()247 uint32_t MPEG4DataSource::flags() {
248     return mSource->flags();
249 }
250 
setCachedRange(off64_t offset,size_t size)251 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
252     Mutex::Autolock autoLock(mLock);
253 
254     clearCache();
255 
256     mCache = (uint8_t *)malloc(size);
257 
258     if (mCache == NULL) {
259         return -ENOMEM;
260     }
261 
262     mCachedOffset = offset;
263     mCachedSize = size;
264 
265     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
266 
267     if (err < (ssize_t)size) {
268         clearCache();
269 
270         return ERROR_IO;
271     }
272 
273     return OK;
274 }
275 
276 ////////////////////////////////////////////////////////////////////////////////
277 
278 static const bool kUseHexDump = false;
279 
hexdump(const void * _data,size_t size)280 static void hexdump(const void *_data, size_t size) {
281     const uint8_t *data = (const uint8_t *)_data;
282     size_t offset = 0;
283     while (offset < size) {
284         printf("0x%04zx  ", offset);
285 
286         size_t n = size - offset;
287         if (n > 16) {
288             n = 16;
289         }
290 
291         for (size_t i = 0; i < 16; ++i) {
292             if (i == 8) {
293                 printf(" ");
294             }
295 
296             if (offset + i < size) {
297                 printf("%02x ", data[offset + i]);
298             } else {
299                 printf("   ");
300             }
301         }
302 
303         printf(" ");
304 
305         for (size_t i = 0; i < n; ++i) {
306             if (isprint(data[offset + i])) {
307                 printf("%c", data[offset + i]);
308             } else {
309                 printf(".");
310             }
311         }
312 
313         printf("\n");
314 
315         offset += 16;
316     }
317 }
318 
FourCC2MIME(uint32_t fourcc)319 static const char *FourCC2MIME(uint32_t fourcc) {
320     switch (fourcc) {
321         case FOURCC('m', 'p', '4', 'a'):
322             return MEDIA_MIMETYPE_AUDIO_AAC;
323 
324         case FOURCC('s', 'a', 'm', 'r'):
325             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
326 
327         case FOURCC('s', 'a', 'w', 'b'):
328             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
329 
330         case FOURCC('m', 'p', '4', 'v'):
331             return MEDIA_MIMETYPE_VIDEO_MPEG4;
332 
333         case FOURCC('s', '2', '6', '3'):
334         case FOURCC('h', '2', '6', '3'):
335         case FOURCC('H', '2', '6', '3'):
336             return MEDIA_MIMETYPE_VIDEO_H263;
337 
338         case FOURCC('a', 'v', 'c', '1'):
339             return MEDIA_MIMETYPE_VIDEO_AVC;
340 
341         case FOURCC('h', 'v', 'c', '1'):
342         case FOURCC('h', 'e', 'v', '1'):
343             return MEDIA_MIMETYPE_VIDEO_HEVC;
344         default:
345             CHECK(!"should not be here.");
346             return NULL;
347     }
348 }
349 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)350 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
351     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
352         // AMR NB audio is always mono, 8kHz
353         *channels = 1;
354         *rate = 8000;
355         return true;
356     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
357         // AMR WB audio is always mono, 16kHz
358         *channels = 1;
359         *rate = 16000;
360         return true;
361     }
362     return false;
363 }
364 
MPEG4Extractor(const sp<DataSource> & source)365 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
366     : mMoofOffset(0),
367       mMoofFound(false),
368       mMdatFound(false),
369       mDataSource(source),
370       mInitCheck(NO_INIT),
371       mHasVideo(false),
372       mHeaderTimescale(0),
373       mFirstTrack(NULL),
374       mLastTrack(NULL),
375       mFileMetaData(new MetaData),
376       mFirstSINF(NULL),
377       mIsDrm(false) {
378 }
379 
~MPEG4Extractor()380 MPEG4Extractor::~MPEG4Extractor() {
381     Track *track = mFirstTrack;
382     while (track) {
383         Track *next = track->next;
384 
385         delete track;
386         track = next;
387     }
388     mFirstTrack = mLastTrack = NULL;
389 
390     SINF *sinf = mFirstSINF;
391     while (sinf) {
392         SINF *next = sinf->next;
393         delete[] sinf->IPMPData;
394         delete sinf;
395         sinf = next;
396     }
397     mFirstSINF = NULL;
398 
399     for (size_t i = 0; i < mPssh.size(); i++) {
400         delete [] mPssh[i].data;
401     }
402 }
403 
flags() const404 uint32_t MPEG4Extractor::flags() const {
405     return CAN_PAUSE |
406             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
407                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
408 }
409 
getMetaData()410 sp<MetaData> MPEG4Extractor::getMetaData() {
411     status_t err;
412     if ((err = readMetaData()) != OK) {
413         return new MetaData;
414     }
415 
416     return mFileMetaData;
417 }
418 
countTracks()419 size_t MPEG4Extractor::countTracks() {
420     status_t err;
421     if ((err = readMetaData()) != OK) {
422         ALOGV("MPEG4Extractor::countTracks: no tracks");
423         return 0;
424     }
425 
426     size_t n = 0;
427     Track *track = mFirstTrack;
428     while (track) {
429         ++n;
430         track = track->next;
431     }
432 
433     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
434     return n;
435 }
436 
getTrackMetaData(size_t index,uint32_t flags)437 sp<MetaData> MPEG4Extractor::getTrackMetaData(
438         size_t index, uint32_t flags) {
439     status_t err;
440     if ((err = readMetaData()) != OK) {
441         return NULL;
442     }
443 
444     Track *track = mFirstTrack;
445     while (index > 0) {
446         if (track == NULL) {
447             return NULL;
448         }
449 
450         track = track->next;
451         --index;
452     }
453 
454     if (track == NULL) {
455         return NULL;
456     }
457 
458     if ((flags & kIncludeExtensiveMetaData)
459             && !track->includes_expensive_metadata) {
460         track->includes_expensive_metadata = true;
461 
462         const char *mime;
463         CHECK(track->meta->findCString(kKeyMIMEType, &mime));
464         if (!strncasecmp("video/", mime, 6)) {
465             if (mMoofOffset > 0) {
466                 int64_t duration;
467                 if (track->meta->findInt64(kKeyDuration, &duration)) {
468                     // nothing fancy, just pick a frame near 1/4th of the duration
469                     track->meta->setInt64(
470                             kKeyThumbnailTime, duration / 4);
471                 }
472             } else {
473                 uint32_t sampleIndex;
474                 uint32_t sampleTime;
475                 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
476                         && track->sampleTable->getMetaDataForSample(
477                             sampleIndex, NULL /* offset */, NULL /* size */,
478                             &sampleTime) == OK) {
479                     track->meta->setInt64(
480                             kKeyThumbnailTime,
481                             ((int64_t)sampleTime * 1000000) / track->timescale);
482                 }
483             }
484 
485             // MPEG2 tracks do not provide CSD, so read the stream header
486             if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
487                 off64_t offset;
488                 size_t size;
489                 if (track->sampleTable->getMetaDataForSample(
490                             0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
491                     if (size > kMaxTrackHeaderSize) {
492                         size = kMaxTrackHeaderSize;
493                     }
494                     uint8_t header[kMaxTrackHeaderSize];
495                     if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
496                         track->meta->setData(kKeyStreamHeader, 'mdat', header, size);
497                     }
498                 }
499             }
500         }
501     }
502 
503     return track->meta;
504 }
505 
MakeFourCCString(uint32_t x,char * s)506 static void MakeFourCCString(uint32_t x, char *s) {
507     s[0] = x >> 24;
508     s[1] = (x >> 16) & 0xff;
509     s[2] = (x >> 8) & 0xff;
510     s[3] = x & 0xff;
511     s[4] = '\0';
512 }
513 
readMetaData()514 status_t MPEG4Extractor::readMetaData() {
515     if (mInitCheck != NO_INIT) {
516         return mInitCheck;
517     }
518 
519     off64_t offset = 0;
520     status_t err;
521     bool sawMoovOrSidx = false;
522 
523     while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) {
524         off64_t orig_offset = offset;
525         err = parseChunk(&offset, 0);
526 
527         if (err != OK && err != UNKNOWN_ERROR) {
528             break;
529         } else if (offset <= orig_offset) {
530             // only continue parsing if the offset was advanced,
531             // otherwise we might end up in an infinite loop
532             ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
533             err = ERROR_MALFORMED;
534             break;
535         } else if (err == UNKNOWN_ERROR) {
536             sawMoovOrSidx = true;
537         }
538     }
539 
540     if (mInitCheck == OK) {
541         if (mHasVideo) {
542             mFileMetaData->setCString(
543                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
544         } else {
545             mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
546         }
547     } else {
548         mInitCheck = err;
549     }
550 
551     CHECK_NE(err, (status_t)NO_INIT);
552 
553     // copy pssh data into file metadata
554     uint64_t psshsize = 0;
555     for (size_t i = 0; i < mPssh.size(); i++) {
556         psshsize += 20 + mPssh[i].datalen;
557     }
558     if (psshsize > 0 && psshsize <= UINT32_MAX) {
559         char *buf = (char*)malloc(psshsize);
560         if (!buf) {
561             ALOGE("b/28471206");
562             return NO_MEMORY;
563         }
564         char *ptr = buf;
565         for (size_t i = 0; i < mPssh.size(); i++) {
566             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
567             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
568             ptr += (20 + mPssh[i].datalen);
569         }
570         mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
571         free(buf);
572     }
573     return mInitCheck;
574 }
575 
getDrmTrackInfo(size_t trackID,int * len)576 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
577     if (mFirstSINF == NULL) {
578         return NULL;
579     }
580 
581     SINF *sinf = mFirstSINF;
582     while (sinf && (trackID != sinf->trackID)) {
583         sinf = sinf->next;
584     }
585 
586     if (sinf == NULL) {
587         return NULL;
588     }
589 
590     *len = sinf->len;
591     return sinf->IPMPData;
592 }
593 
594 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
readSize(off64_t offset,const sp<DataSource> DataSource,uint8_t * numOfBytes)595 static int32_t readSize(off64_t offset,
596         const sp<DataSource> DataSource, uint8_t *numOfBytes) {
597     uint32_t size = 0;
598     uint8_t data;
599     bool moreData = true;
600     *numOfBytes = 0;
601 
602     while (moreData) {
603         if (DataSource->readAt(offset, &data, 1) < 1) {
604             return -1;
605         }
606         offset ++;
607         moreData = (data >= 128) ? true : false;
608         size = (size << 7) | (data & 0x7f); // Take last 7 bits
609         (*numOfBytes) ++;
610     }
611 
612     return size;
613 }
614 
parseDrmSINF(off64_t *,off64_t data_offset)615 status_t MPEG4Extractor::parseDrmSINF(
616         off64_t * /* offset */, off64_t data_offset) {
617     uint8_t updateIdTag;
618     if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
619         return ERROR_IO;
620     }
621     data_offset ++;
622 
623     if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
624         return ERROR_MALFORMED;
625     }
626 
627     uint8_t numOfBytes;
628     int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
629     if (size < 0) {
630         return ERROR_IO;
631     }
632     data_offset += numOfBytes;
633 
634     while(size >= 11 ) {
635         uint8_t descriptorTag;
636         if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
637             return ERROR_IO;
638         }
639         data_offset ++;
640 
641         if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
642             return ERROR_MALFORMED;
643         }
644 
645         uint8_t buffer[8];
646         //ObjectDescriptorID and ObjectDescriptor url flag
647         if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
648             return ERROR_IO;
649         }
650         data_offset += 2;
651 
652         if ((buffer[1] >> 5) & 0x0001) { //url flag is set
653             return ERROR_MALFORMED;
654         }
655 
656         if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
657             return ERROR_IO;
658         }
659         data_offset += 8;
660 
661         if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
662                 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
663             return ERROR_MALFORMED;
664         }
665 
666         SINF *sinf = new SINF;
667         sinf->trackID = U16_AT(&buffer[3]);
668         sinf->IPMPDescriptorID = buffer[7];
669         sinf->next = mFirstSINF;
670         mFirstSINF = sinf;
671 
672         size -= (8 + 2 + 1);
673     }
674 
675     if (size != 0) {
676         return ERROR_MALFORMED;
677     }
678 
679     if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
680         return ERROR_IO;
681     }
682     data_offset ++;
683 
684     if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
685         return ERROR_MALFORMED;
686     }
687 
688     size = readSize(data_offset, mDataSource, &numOfBytes);
689     if (size < 0) {
690         return ERROR_IO;
691     }
692     data_offset += numOfBytes;
693 
694     while (size > 0) {
695         uint8_t tag;
696         int32_t dataLen;
697         if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
698             return ERROR_IO;
699         }
700         data_offset ++;
701 
702         if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
703             uint8_t id;
704             dataLen = readSize(data_offset, mDataSource, &numOfBytes);
705             if (dataLen < 0) {
706                 return ERROR_IO;
707             } else if (dataLen < 4) {
708                 return ERROR_MALFORMED;
709             }
710             data_offset += numOfBytes;
711 
712             if (mDataSource->readAt(data_offset, &id, 1) < 1) {
713                 return ERROR_IO;
714             }
715             data_offset ++;
716 
717             SINF *sinf = mFirstSINF;
718             while (sinf && (sinf->IPMPDescriptorID != id)) {
719                 sinf = sinf->next;
720             }
721             if (sinf == NULL) {
722                 return ERROR_MALFORMED;
723             }
724             sinf->len = dataLen - 3;
725             sinf->IPMPData = new (std::nothrow) char[sinf->len];
726             if (sinf->IPMPData == NULL) {
727                 return ERROR_MALFORMED;
728             }
729             data_offset += 2;
730 
731             if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
732                 return ERROR_IO;
733             }
734             data_offset += sinf->len;
735 
736             size -= (dataLen + numOfBytes + 1);
737         }
738     }
739 
740     if (size != 0) {
741         return ERROR_MALFORMED;
742     }
743 
744     return UNKNOWN_ERROR;  // Return a dummy error.
745 }
746 
747 struct PathAdder {
PathAdderandroid::PathAdder748     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
749         : mPath(path) {
750         mPath->push(chunkType);
751     }
752 
~PathAdderandroid::PathAdder753     ~PathAdder() {
754         mPath->pop();
755     }
756 
757 private:
758     Vector<uint32_t> *mPath;
759 
760     PathAdder(const PathAdder &);
761     PathAdder &operator=(const PathAdder &);
762 };
763 
underMetaDataPath(const Vector<uint32_t> & path)764 static bool underMetaDataPath(const Vector<uint32_t> &path) {
765     return path.size() >= 5
766         && path[0] == FOURCC('m', 'o', 'o', 'v')
767         && path[1] == FOURCC('u', 'd', 't', 'a')
768         && path[2] == FOURCC('m', 'e', 't', 'a')
769         && path[3] == FOURCC('i', 'l', 's', 't');
770 }
771 
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)772 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
773     return path.size() >= 2
774             && path[0] == FOURCC('m', 'o', 'o', 'v')
775             && path[1] == FOURCC('m', 'e', 't', 'a')
776             && (depth == 2
777             || (depth == 3
778                     && (path[2] == FOURCC('h', 'd', 'l', 'r')
779                     ||  path[2] == FOURCC('i', 'l', 's', 't')
780                     ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
781 }
782 
783 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)784 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
785     // delta between mpeg4 time and unix epoch time
786     static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
787     if (time_1904 < INT64_MIN + delta) {
788         return false;
789     }
790     time_t time_1970 = time_1904 - delta;
791 
792     char tmp[32];
793     struct tm* tm = gmtime(&time_1970);
794     if (tm != NULL &&
795             strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
796         s->setTo(tmp);
797         return true;
798     }
799     return false;
800 }
801 
parseChunk(off64_t * offset,int depth)802 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
803     ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
804 
805     if (*offset < 0) {
806         ALOGE("b/23540914");
807         return ERROR_MALFORMED;
808     }
809     uint32_t hdr[2];
810     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
811         return ERROR_IO;
812     }
813     uint64_t chunk_size = ntohl(hdr[0]);
814     int32_t chunk_type = ntohl(hdr[1]);
815     off64_t data_offset = *offset + 8;
816 
817     if (chunk_size == 1) {
818         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
819             return ERROR_IO;
820         }
821         chunk_size = ntoh64(chunk_size);
822         data_offset += 8;
823 
824         if (chunk_size < 16) {
825             // The smallest valid chunk is 16 bytes long in this case.
826             return ERROR_MALFORMED;
827         }
828     } else if (chunk_size == 0) {
829         if (depth == 0) {
830             // atom extends to end of file
831             off64_t sourceSize;
832             if (mDataSource->getSize(&sourceSize) == OK) {
833                 chunk_size = (sourceSize - *offset);
834             } else {
835                 // XXX could we just pick a "sufficiently large" value here?
836                 ALOGE("atom size is 0, and data source has no size");
837                 return ERROR_MALFORMED;
838             }
839         } else {
840             // not allowed for non-toplevel atoms, skip it
841             *offset += 4;
842             return OK;
843         }
844     } else if (chunk_size < 8) {
845         // The smallest valid chunk is 8 bytes long.
846         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
847         return ERROR_MALFORMED;
848     }
849 
850     char chunk[5];
851     MakeFourCCString(chunk_type, chunk);
852     ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
853 
854     if (kUseHexDump) {
855         static const char kWhitespace[] = "                                        ";
856         const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
857         printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
858 
859         char buffer[256];
860         size_t n = chunk_size;
861         if (n > sizeof(buffer)) {
862             n = sizeof(buffer);
863         }
864         if (mDataSource->readAt(*offset, buffer, n)
865                 < (ssize_t)n) {
866             return ERROR_IO;
867         }
868 
869         hexdump(buffer, n);
870     }
871 
872     PathAdder autoAdder(&mPath, chunk_type);
873 
874     // (data_offset - *offset) is either 8 or 16
875     off64_t chunk_data_size = chunk_size - (data_offset - *offset);
876     if (chunk_data_size < 0) {
877         ALOGE("b/23540914");
878         return ERROR_MALFORMED;
879     }
880 
881     if (chunk_type != FOURCC('c', 'p', 'r', 't')
882             && chunk_type != FOURCC('c', 'o', 'v', 'r')
883             && mPath.size() == 5 && underMetaDataPath(mPath)) {
884         off64_t stop_offset = *offset + chunk_size;
885         *offset = data_offset;
886         while (*offset < stop_offset) {
887             status_t err = parseChunk(offset, depth + 1);
888             if (err != OK) {
889                 return err;
890             }
891         }
892 
893         if (*offset != stop_offset) {
894             return ERROR_MALFORMED;
895         }
896 
897         return OK;
898     }
899 
900     switch(chunk_type) {
901         case FOURCC('m', 'o', 'o', 'v'):
902         case FOURCC('t', 'r', 'a', 'k'):
903         case FOURCC('m', 'd', 'i', 'a'):
904         case FOURCC('m', 'i', 'n', 'f'):
905         case FOURCC('d', 'i', 'n', 'f'):
906         case FOURCC('s', 't', 'b', 'l'):
907         case FOURCC('m', 'v', 'e', 'x'):
908         case FOURCC('m', 'o', 'o', 'f'):
909         case FOURCC('t', 'r', 'a', 'f'):
910         case FOURCC('m', 'f', 'r', 'a'):
911         case FOURCC('u', 'd', 't', 'a'):
912         case FOURCC('i', 'l', 's', 't'):
913         case FOURCC('s', 'i', 'n', 'f'):
914         case FOURCC('s', 'c', 'h', 'i'):
915         case FOURCC('e', 'd', 't', 's'):
916         {
917             if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
918                 // store the offset of the first segment
919                 mMoofFound = true;
920                 mMoofOffset = *offset;
921             }
922 
923             if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
924                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
925 
926                 if (mDataSource->flags()
927                         & (DataSource::kWantsPrefetching
928                             | DataSource::kIsCachingDataSource)) {
929                     sp<MPEG4DataSource> cachedSource =
930                         new MPEG4DataSource(mDataSource);
931 
932                     if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
933                         mDataSource = cachedSource;
934                     }
935                 }
936 
937                 if (mLastTrack == NULL)
938                     return ERROR_MALFORMED;
939 
940                 mLastTrack->sampleTable = new SampleTable(mDataSource);
941             }
942 
943             bool isTrack = false;
944             if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
945                 isTrack = true;
946 
947                 Track *track = new Track;
948                 track->next = NULL;
949                 if (mLastTrack) {
950                     mLastTrack->next = track;
951                 } else {
952                     mFirstTrack = track;
953                 }
954                 mLastTrack = track;
955 
956                 track->meta = new MetaData;
957                 track->includes_expensive_metadata = false;
958                 track->skipTrack = false;
959                 track->timescale = 0;
960                 track->meta->setCString(kKeyMIMEType, "application/octet-stream");
961             }
962 
963             off64_t stop_offset = *offset + chunk_size;
964             *offset = data_offset;
965             while (*offset < stop_offset) {
966                 status_t err = parseChunk(offset, depth + 1);
967                 if (err != OK) {
968                     return err;
969                 }
970             }
971 
972             if (*offset != stop_offset) {
973                 return ERROR_MALFORMED;
974             }
975 
976             if (isTrack) {
977                 int32_t trackId;
978                 // There must be exact one track header per track.
979                 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
980                     mLastTrack->skipTrack = true;
981                 }
982                 if (mLastTrack->skipTrack) {
983                     Track *cur = mFirstTrack;
984 
985                     if (cur == mLastTrack) {
986                         delete cur;
987                         mFirstTrack = mLastTrack = NULL;
988                     } else {
989                         while (cur && cur->next != mLastTrack) {
990                             cur = cur->next;
991                         }
992                         cur->next = NULL;
993                         delete mLastTrack;
994                         mLastTrack = cur;
995                     }
996 
997                     return OK;
998                 }
999 
1000                 status_t err = verifyTrack(mLastTrack);
1001 
1002                 if (err != OK) {
1003                     return err;
1004                 }
1005             } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
1006                 mInitCheck = OK;
1007 
1008                 if (!mIsDrm) {
1009                     return UNKNOWN_ERROR;  // Return a dummy error.
1010                 } else {
1011                     return OK;
1012                 }
1013             }
1014             break;
1015         }
1016 
1017         case FOURCC('e', 'l', 's', 't'):
1018         {
1019             *offset += chunk_size;
1020 
1021             // See 14496-12 8.6.6
1022             uint8_t version;
1023             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1024                 return ERROR_IO;
1025             }
1026 
1027             uint32_t entry_count;
1028             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1029                 return ERROR_IO;
1030             }
1031 
1032             if (entry_count != 1) {
1033                 // we only support a single entry at the moment, for gapless playback
1034                 ALOGW("ignoring edit list with %d entries", entry_count);
1035             } else if (mHeaderTimescale == 0) {
1036                 ALOGW("ignoring edit list because timescale is 0");
1037             } else {
1038                 off64_t entriesoffset = data_offset + 8;
1039                 uint64_t segment_duration;
1040                 int64_t media_time;
1041 
1042                 if (version == 1) {
1043                     if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1044                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1045                         return ERROR_IO;
1046                     }
1047                 } else if (version == 0) {
1048                     uint32_t sd;
1049                     int32_t mt;
1050                     if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1051                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1052                         return ERROR_IO;
1053                     }
1054                     segment_duration = sd;
1055                     media_time = mt;
1056                 } else {
1057                     return ERROR_IO;
1058                 }
1059 
1060                 uint64_t halfscale = mHeaderTimescale / 2;
1061                 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1062                 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1063 
1064                 int64_t duration;
1065                 int32_t samplerate;
1066                 if (!mLastTrack) {
1067                     return ERROR_MALFORMED;
1068                 }
1069                 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1070                         mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1071 
1072                     int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1073                     mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1074 
1075                     int64_t paddingus = duration - (int64_t)(segment_duration + media_time);
1076                     if (paddingus < 0) {
1077                         // track duration from media header (which is what kKeyDuration is) might
1078                         // be slightly shorter than the segment duration, which would make the
1079                         // padding negative. Clamp to zero.
1080                         paddingus = 0;
1081                     }
1082                     int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1083                     mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1084                 }
1085             }
1086             break;
1087         }
1088 
1089         case FOURCC('f', 'r', 'm', 'a'):
1090         {
1091             *offset += chunk_size;
1092 
1093             uint32_t original_fourcc;
1094             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1095                 return ERROR_IO;
1096             }
1097             original_fourcc = ntohl(original_fourcc);
1098             ALOGV("read original format: %d", original_fourcc);
1099 
1100             if (mLastTrack == NULL)
1101                 return ERROR_MALFORMED;
1102 
1103             mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1104             uint32_t num_channels = 0;
1105             uint32_t sample_rate = 0;
1106             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1107                 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1108                 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1109             }
1110             break;
1111         }
1112 
1113         case FOURCC('t', 'e', 'n', 'c'):
1114         {
1115             *offset += chunk_size;
1116 
1117             if (chunk_size < 32) {
1118                 return ERROR_MALFORMED;
1119             }
1120 
1121             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1122             // default IV size, 16 bytes default KeyID
1123             // (ISO 23001-7)
1124             char buf[4];
1125             memset(buf, 0, 4);
1126             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1127                 return ERROR_IO;
1128             }
1129             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1130             if (defaultAlgorithmId > 1) {
1131                 // only 0 (clear) and 1 (AES-128) are valid
1132                 return ERROR_MALFORMED;
1133             }
1134 
1135             memset(buf, 0, 4);
1136             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1137                 return ERROR_IO;
1138             }
1139             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1140 
1141             if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1142                     (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1143                 // only unencrypted data must have 0 IV size
1144                 return ERROR_MALFORMED;
1145             } else if (defaultIVSize != 0 &&
1146                     defaultIVSize != 8 &&
1147                     defaultIVSize != 16) {
1148                 // only supported sizes are 0, 8 and 16
1149                 return ERROR_MALFORMED;
1150             }
1151 
1152             uint8_t defaultKeyId[16];
1153 
1154             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1155                 return ERROR_IO;
1156             }
1157 
1158             if (mLastTrack == NULL)
1159                 return ERROR_MALFORMED;
1160 
1161             mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1162             mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1163             mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1164             break;
1165         }
1166 
1167         case FOURCC('t', 'k', 'h', 'd'):
1168         {
1169             *offset += chunk_size;
1170 
1171             status_t err;
1172             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1173                 return err;
1174             }
1175 
1176             break;
1177         }
1178 
1179         case FOURCC('p', 's', 's', 'h'):
1180         {
1181             *offset += chunk_size;
1182 
1183             PsshInfo pssh;
1184 
1185             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1186                 return ERROR_IO;
1187             }
1188 
1189             uint32_t psshdatalen = 0;
1190             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1191                 return ERROR_IO;
1192             }
1193             pssh.datalen = ntohl(psshdatalen);
1194             ALOGV("pssh data size: %d", pssh.datalen);
1195             if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1196                 // pssh data length exceeds size of containing box
1197                 return ERROR_MALFORMED;
1198             }
1199 
1200             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1201             if (pssh.data == NULL) {
1202                 return ERROR_MALFORMED;
1203             }
1204             ALOGV("allocated pssh @ %p", pssh.data);
1205             ssize_t requested = (ssize_t) pssh.datalen;
1206             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1207                 return ERROR_IO;
1208             }
1209             mPssh.push_back(pssh);
1210 
1211             break;
1212         }
1213 
1214         case FOURCC('m', 'd', 'h', 'd'):
1215         {
1216             *offset += chunk_size;
1217 
1218             if (chunk_data_size < 4 || mLastTrack == NULL) {
1219                 return ERROR_MALFORMED;
1220             }
1221 
1222             uint8_t version;
1223             if (mDataSource->readAt(
1224                         data_offset, &version, sizeof(version))
1225                     < (ssize_t)sizeof(version)) {
1226                 return ERROR_IO;
1227             }
1228 
1229             off64_t timescale_offset;
1230 
1231             if (version == 1) {
1232                 timescale_offset = data_offset + 4 + 16;
1233             } else if (version == 0) {
1234                 timescale_offset = data_offset + 4 + 8;
1235             } else {
1236                 return ERROR_IO;
1237             }
1238 
1239             uint32_t timescale;
1240             if (mDataSource->readAt(
1241                         timescale_offset, &timescale, sizeof(timescale))
1242                     < (ssize_t)sizeof(timescale)) {
1243                 return ERROR_IO;
1244             }
1245 
1246             if (!timescale) {
1247                 ALOGE("timescale should not be ZERO.");
1248                 return ERROR_MALFORMED;
1249             }
1250 
1251             mLastTrack->timescale = ntohl(timescale);
1252 
1253             // 14496-12 says all ones means indeterminate, but some files seem to use
1254             // 0 instead. We treat both the same.
1255             int64_t duration = 0;
1256             if (version == 1) {
1257                 if (mDataSource->readAt(
1258                             timescale_offset + 4, &duration, sizeof(duration))
1259                         < (ssize_t)sizeof(duration)) {
1260                     return ERROR_IO;
1261                 }
1262                 if (duration != -1) {
1263                     duration = ntoh64(duration);
1264                 }
1265             } else {
1266                 uint32_t duration32;
1267                 if (mDataSource->readAt(
1268                             timescale_offset + 4, &duration32, sizeof(duration32))
1269                         < (ssize_t)sizeof(duration32)) {
1270                     return ERROR_IO;
1271                 }
1272                 if (duration32 != 0xffffffff) {
1273                     duration = ntohl(duration32);
1274                 }
1275             }
1276             if (duration != 0 && mLastTrack->timescale != 0) {
1277                 mLastTrack->meta->setInt64(
1278                         kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1279             }
1280 
1281             uint8_t lang[2];
1282             off64_t lang_offset;
1283             if (version == 1) {
1284                 lang_offset = timescale_offset + 4 + 8;
1285             } else if (version == 0) {
1286                 lang_offset = timescale_offset + 4 + 4;
1287             } else {
1288                 return ERROR_IO;
1289             }
1290 
1291             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1292                     < (ssize_t)sizeof(lang)) {
1293                 return ERROR_IO;
1294             }
1295 
1296             // To get the ISO-639-2/T three character language code
1297             // 1 bit pad followed by 3 5-bits characters. Each character
1298             // is packed as the difference between its ASCII value and 0x60.
1299             char lang_code[4];
1300             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1301             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1302             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1303             lang_code[3] = '\0';
1304 
1305             mLastTrack->meta->setCString(
1306                     kKeyMediaLanguage, lang_code);
1307 
1308             break;
1309         }
1310 
1311         case FOURCC('s', 't', 's', 'd'):
1312         {
1313             if (chunk_data_size < 8) {
1314                 return ERROR_MALFORMED;
1315             }
1316 
1317             uint8_t buffer[8];
1318             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1319                 return ERROR_MALFORMED;
1320             }
1321 
1322             if (mDataSource->readAt(
1323                         data_offset, buffer, 8) < 8) {
1324                 return ERROR_IO;
1325             }
1326 
1327             if (U32_AT(buffer) != 0) {
1328                 // Should be version 0, flags 0.
1329                 return ERROR_MALFORMED;
1330             }
1331 
1332             uint32_t entry_count = U32_AT(&buffer[4]);
1333 
1334             if (entry_count > 1) {
1335                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1336                 // multiple text display formats. These formats will be used to
1337                 // display the timed text.
1338                 // For encrypted files, there may also be more than one entry.
1339                 const char *mime;
1340 
1341                 if (mLastTrack == NULL)
1342                     return ERROR_MALFORMED;
1343 
1344                 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1345                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1346                         strcasecmp(mime, "application/octet-stream")) {
1347                     // For now we only support a single type of media per track.
1348                     mLastTrack->skipTrack = true;
1349                     *offset += chunk_size;
1350                     break;
1351                 }
1352             }
1353             off64_t stop_offset = *offset + chunk_size;
1354             *offset = data_offset + 8;
1355             for (uint32_t i = 0; i < entry_count; ++i) {
1356                 status_t err = parseChunk(offset, depth + 1);
1357                 if (err != OK) {
1358                     return err;
1359                 }
1360             }
1361 
1362             if (*offset != stop_offset) {
1363                 return ERROR_MALFORMED;
1364             }
1365             break;
1366         }
1367 
1368         case FOURCC('m', 'p', '4', 'a'):
1369         case FOURCC('e', 'n', 'c', 'a'):
1370         case FOURCC('s', 'a', 'm', 'r'):
1371         case FOURCC('s', 'a', 'w', 'b'):
1372         {
1373             uint8_t buffer[8 + 20];
1374             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1375                 // Basic AudioSampleEntry size.
1376                 return ERROR_MALFORMED;
1377             }
1378 
1379             if (mDataSource->readAt(
1380                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1381                 return ERROR_IO;
1382             }
1383 
1384             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1385             uint32_t num_channels = U16_AT(&buffer[16]);
1386 
1387             uint16_t sample_size = U16_AT(&buffer[18]);
1388             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1389 
1390             if (mLastTrack == NULL)
1391                 return ERROR_MALFORMED;
1392 
1393             if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1394                 // if the chunk type is enca, we'll get the type from the sinf/frma box later
1395                 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1396                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1397             }
1398             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1399                    chunk, num_channels, sample_size, sample_rate);
1400             mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1401             mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1402 
1403             off64_t stop_offset = *offset + chunk_size;
1404             *offset = data_offset + sizeof(buffer);
1405             while (*offset < stop_offset) {
1406                 status_t err = parseChunk(offset, depth + 1);
1407                 if (err != OK) {
1408                     return err;
1409                 }
1410             }
1411 
1412             if (*offset != stop_offset) {
1413                 return ERROR_MALFORMED;
1414             }
1415             break;
1416         }
1417 
1418         case FOURCC('m', 'p', '4', 'v'):
1419         case FOURCC('e', 'n', 'c', 'v'):
1420         case FOURCC('s', '2', '6', '3'):
1421         case FOURCC('H', '2', '6', '3'):
1422         case FOURCC('h', '2', '6', '3'):
1423         case FOURCC('a', 'v', 'c', '1'):
1424         case FOURCC('h', 'v', 'c', '1'):
1425         case FOURCC('h', 'e', 'v', '1'):
1426         {
1427             mHasVideo = true;
1428 
1429             uint8_t buffer[78];
1430             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1431                 // Basic VideoSampleEntry size.
1432                 return ERROR_MALFORMED;
1433             }
1434 
1435             if (mDataSource->readAt(
1436                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1437                 return ERROR_IO;
1438             }
1439 
1440             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1441             uint16_t width = U16_AT(&buffer[6 + 18]);
1442             uint16_t height = U16_AT(&buffer[6 + 20]);
1443 
1444             // The video sample is not standard-compliant if it has invalid dimension.
1445             // Use some default width and height value, and
1446             // let the decoder figure out the actual width and height (and thus
1447             // be prepared for INFO_FOMRAT_CHANGED event).
1448             if (width == 0)  width  = 352;
1449             if (height == 0) height = 288;
1450 
1451             // printf("*** coding='%s' width=%d height=%d\n",
1452             //        chunk, width, height);
1453 
1454             if (mLastTrack == NULL)
1455                 return ERROR_MALFORMED;
1456 
1457             if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1458                 // if the chunk type is encv, we'll get the type from the sinf/frma box later
1459                 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1460             }
1461             mLastTrack->meta->setInt32(kKeyWidth, width);
1462             mLastTrack->meta->setInt32(kKeyHeight, height);
1463 
1464             off64_t stop_offset = *offset + chunk_size;
1465             *offset = data_offset + sizeof(buffer);
1466             while (*offset < stop_offset) {
1467                 status_t err = parseChunk(offset, depth + 1);
1468                 if (err != OK) {
1469                     return err;
1470                 }
1471             }
1472 
1473             if (*offset != stop_offset) {
1474                 return ERROR_MALFORMED;
1475             }
1476             break;
1477         }
1478 
1479         case FOURCC('s', 't', 'c', 'o'):
1480         case FOURCC('c', 'o', '6', '4'):
1481         {
1482             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1483                 return ERROR_MALFORMED;
1484 
1485             status_t err =
1486                 mLastTrack->sampleTable->setChunkOffsetParams(
1487                         chunk_type, data_offset, chunk_data_size);
1488 
1489             *offset += chunk_size;
1490 
1491             if (err != OK) {
1492                 return err;
1493             }
1494 
1495             break;
1496         }
1497 
1498         case FOURCC('s', 't', 's', 'c'):
1499         {
1500             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1501                 return ERROR_MALFORMED;
1502 
1503             status_t err =
1504                 mLastTrack->sampleTable->setSampleToChunkParams(
1505                         data_offset, chunk_data_size);
1506 
1507             *offset += chunk_size;
1508 
1509             if (err != OK) {
1510                 return err;
1511             }
1512 
1513             break;
1514         }
1515 
1516         case FOURCC('s', 't', 's', 'z'):
1517         case FOURCC('s', 't', 'z', '2'):
1518         {
1519             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1520                 return ERROR_MALFORMED;
1521 
1522             status_t err =
1523                 mLastTrack->sampleTable->setSampleSizeParams(
1524                         chunk_type, data_offset, chunk_data_size);
1525 
1526             *offset += chunk_size;
1527 
1528             if (err != OK) {
1529                 return err;
1530             }
1531 
1532             size_t max_size;
1533             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1534 
1535             if (err != OK) {
1536                 return err;
1537             }
1538 
1539             if (max_size != 0) {
1540                 // Assume that a given buffer only contains at most 10 chunks,
1541                 // each chunk originally prefixed with a 2 byte length will
1542                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1543                 // and thus will grow by 2 bytes per chunk.
1544                 if (max_size > SIZE_MAX - 10 * 2) {
1545                     ALOGE("max sample size too big: %zu", max_size);
1546                     return ERROR_MALFORMED;
1547                 }
1548                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1549             } else {
1550                 // No size was specified. Pick a conservatively large size.
1551                 uint32_t width, height;
1552                 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1553                     !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1554                     ALOGE("No width or height, assuming worst case 1080p");
1555                     width = 1920;
1556                     height = 1080;
1557                 } else {
1558                     // A resolution was specified, check that it's not too big. The values below
1559                     // were chosen so that the calculations below don't cause overflows, they're
1560                     // not indicating that resolutions up to 32kx32k are actually supported.
1561                     if (width > 32768 || height > 32768) {
1562                         ALOGE("can't support %u x %u video", width, height);
1563                         return ERROR_MALFORMED;
1564                     }
1565                 }
1566 
1567                 const char *mime;
1568                 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1569                 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1570                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1571                     // AVC & HEVC requires compression ratio of at least 2, and uses
1572                     // macroblocks
1573                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1574                 } else {
1575                     // For all other formats there is no minimum compression
1576                     // ratio. Use compression ratio of 1.
1577                     max_size = width * height * 3 / 2;
1578                 }
1579                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1580             }
1581 
1582             // NOTE: setting another piece of metadata invalidates any pointers (such as the
1583             // mimetype) previously obtained, so don't cache them.
1584             const char *mime;
1585             CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1586             // Calculate average frame rate.
1587             if (!strncasecmp("video/", mime, 6)) {
1588                 size_t nSamples = mLastTrack->sampleTable->countSamples();
1589                 if (nSamples == 0) {
1590                     int32_t trackId;
1591                     if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
1592                         for (size_t i = 0; i < mTrex.size(); i++) {
1593                             Trex *t = &mTrex.editItemAt(i);
1594                             if (t->track_ID == (uint32_t) trackId) {
1595                                 if (t->default_sample_duration > 0) {
1596                                     int32_t frameRate =
1597                                             mLastTrack->timescale / t->default_sample_duration;
1598                                     mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1599                                 }
1600                                 break;
1601                             }
1602                         }
1603                     }
1604                 } else {
1605                     int64_t durationUs;
1606                     if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1607                         if (durationUs > 0) {
1608                             int32_t frameRate = (nSamples * 1000000LL +
1609                                         (durationUs >> 1)) / durationUs;
1610                             mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1611                         }
1612                     }
1613                 }
1614             }
1615 
1616             break;
1617         }
1618 
1619         case FOURCC('s', 't', 't', 's'):
1620         {
1621             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1622                 return ERROR_MALFORMED;
1623 
1624             *offset += chunk_size;
1625 
1626             status_t err =
1627                 mLastTrack->sampleTable->setTimeToSampleParams(
1628                         data_offset, chunk_data_size);
1629 
1630             if (err != OK) {
1631                 return err;
1632             }
1633 
1634             break;
1635         }
1636 
1637         case FOURCC('c', 't', 't', 's'):
1638         {
1639             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1640                 return ERROR_MALFORMED;
1641 
1642             *offset += chunk_size;
1643 
1644             status_t err =
1645                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1646                         data_offset, chunk_data_size);
1647 
1648             if (err != OK) {
1649                 return err;
1650             }
1651 
1652             break;
1653         }
1654 
1655         case FOURCC('s', 't', 's', 's'):
1656         {
1657             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1658                 return ERROR_MALFORMED;
1659 
1660             *offset += chunk_size;
1661 
1662             status_t err =
1663                 mLastTrack->sampleTable->setSyncSampleParams(
1664                         data_offset, chunk_data_size);
1665 
1666             if (err != OK) {
1667                 return err;
1668             }
1669 
1670             break;
1671         }
1672 
1673         // \xA9xyz
1674         case FOURCC(0xA9, 'x', 'y', 'z'):
1675         {
1676             *offset += chunk_size;
1677 
1678             // Best case the total data length inside "\xA9xyz" box
1679             // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/",
1680             // where "\x00\x04" is the text string length with value = 4,
1681             // "\0x15\xc7" is the language code = en, and "0+0" is a
1682             // location (string) value with longitude = 0 and latitude = 0.
1683             if (chunk_data_size < 8) {
1684                 return ERROR_MALFORMED;
1685             }
1686 
1687             // Worst case the location string length would be 18,
1688             // for instance +90.0000-180.0000, without the trailing "/" and
1689             // the string length + language code.
1690             char buffer[18];
1691 
1692             // Substracting 5 from the data size is because the text string length +
1693             // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1694             off64_t location_length = chunk_data_size - 5;
1695             if (location_length >= (off64_t) sizeof(buffer)) {
1696                 return ERROR_MALFORMED;
1697             }
1698 
1699             if (mDataSource->readAt(
1700                         data_offset + 4, buffer, location_length) < location_length) {
1701                 return ERROR_IO;
1702             }
1703 
1704             buffer[location_length] = '\0';
1705             mFileMetaData->setCString(kKeyLocation, buffer);
1706             break;
1707         }
1708 
1709         case FOURCC('e', 's', 'd', 's'):
1710         {
1711             *offset += chunk_size;
1712 
1713             if (chunk_data_size < 4) {
1714                 return ERROR_MALFORMED;
1715             }
1716 
1717             uint8_t buffer[256];
1718             if (chunk_data_size > (off64_t)sizeof(buffer)) {
1719                 return ERROR_BUFFER_TOO_SMALL;
1720             }
1721 
1722             if (mDataSource->readAt(
1723                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
1724                 return ERROR_IO;
1725             }
1726 
1727             if (U32_AT(buffer) != 0) {
1728                 // Should be version 0, flags 0.
1729                 return ERROR_MALFORMED;
1730             }
1731 
1732             if (mLastTrack == NULL)
1733                 return ERROR_MALFORMED;
1734 
1735             mLastTrack->meta->setData(
1736                     kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1737 
1738             if (mPath.size() >= 2
1739                     && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1740                 // Information from the ESDS must be relied on for proper
1741                 // setup of sample rate and channel count for MPEG4 Audio.
1742                 // The generic header appears to only contain generic
1743                 // information...
1744 
1745                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1746                         &buffer[4], chunk_data_size - 4);
1747 
1748                 if (err != OK) {
1749                     return err;
1750                 }
1751             }
1752             if (mPath.size() >= 2
1753                     && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1754                 // Check if the video is MPEG2
1755                 ESDS esds(&buffer[4], chunk_data_size - 4);
1756 
1757                 uint8_t objectTypeIndication;
1758                 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1759                     if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1760                         mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1761                     }
1762                 }
1763             }
1764             break;
1765         }
1766 
1767         case FOURCC('b', 't', 'r', 't'):
1768         {
1769             *offset += chunk_size;
1770 
1771             uint8_t buffer[12];
1772             if (chunk_data_size != sizeof(buffer)) {
1773                 return ERROR_MALFORMED;
1774             }
1775 
1776             if (mDataSource->readAt(
1777                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
1778                 return ERROR_IO;
1779             }
1780 
1781             uint32_t maxBitrate = U32_AT(&buffer[4]);
1782             uint32_t avgBitrate = U32_AT(&buffer[8]);
1783             if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1784                 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1785             }
1786             if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1787                 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate);
1788             }
1789             break;
1790         }
1791 
1792         case FOURCC('a', 'v', 'c', 'C'):
1793         {
1794             *offset += chunk_size;
1795 
1796             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1797 
1798             if (buffer->data() == NULL) {
1799                 ALOGE("b/28471206");
1800                 return NO_MEMORY;
1801             }
1802 
1803             if (mDataSource->readAt(
1804                         data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1805                 return ERROR_IO;
1806             }
1807 
1808             if (mLastTrack == NULL)
1809                 return ERROR_MALFORMED;
1810 
1811             mLastTrack->meta->setData(
1812                     kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1813 
1814             break;
1815         }
1816         case FOURCC('h', 'v', 'c', 'C'):
1817         {
1818             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1819 
1820             if (buffer->data() == NULL) {
1821                 ALOGE("b/28471206");
1822                 return NO_MEMORY;
1823             }
1824 
1825             if (mDataSource->readAt(
1826                         data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1827                 return ERROR_IO;
1828             }
1829 
1830             if (mLastTrack == NULL)
1831                 return ERROR_MALFORMED;
1832 
1833             mLastTrack->meta->setData(
1834                     kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1835 
1836             *offset += chunk_size;
1837             break;
1838         }
1839 
1840         case FOURCC('d', '2', '6', '3'):
1841         {
1842             *offset += chunk_size;
1843             /*
1844              * d263 contains a fixed 7 bytes part:
1845              *   vendor - 4 bytes
1846              *   version - 1 byte
1847              *   level - 1 byte
1848              *   profile - 1 byte
1849              * optionally, "d263" box itself may contain a 16-byte
1850              * bit rate box (bitr)
1851              *   average bit rate - 4 bytes
1852              *   max bit rate - 4 bytes
1853              */
1854             char buffer[23];
1855             if (chunk_data_size != 7 &&
1856                 chunk_data_size != 23) {
1857                 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1858                 return ERROR_MALFORMED;
1859             }
1860 
1861             if (mDataSource->readAt(
1862                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
1863                 return ERROR_IO;
1864             }
1865 
1866             if (mLastTrack == NULL)
1867                 return ERROR_MALFORMED;
1868 
1869             mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1870 
1871             break;
1872         }
1873 
1874         case FOURCC('m', 'e', 't', 'a'):
1875         {
1876             off64_t stop_offset = *offset + chunk_size;
1877             *offset = data_offset;
1878             bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1879             if (!isParsingMetaKeys) {
1880                 uint8_t buffer[4];
1881                 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1882                     *offset = stop_offset;
1883                     return ERROR_MALFORMED;
1884                 }
1885 
1886                 if (mDataSource->readAt(
1887                             data_offset, buffer, 4) < 4) {
1888                     *offset = stop_offset;
1889                     return ERROR_IO;
1890                 }
1891 
1892                 if (U32_AT(buffer) != 0) {
1893                     // Should be version 0, flags 0.
1894 
1895                     // If it's not, let's assume this is one of those
1896                     // apparently malformed chunks that don't have flags
1897                     // and completely different semantics than what's
1898                     // in the MPEG4 specs and skip it.
1899                     *offset = stop_offset;
1900                     return OK;
1901                 }
1902                 *offset +=  sizeof(buffer);
1903             }
1904 
1905             while (*offset < stop_offset) {
1906                 status_t err = parseChunk(offset, depth + 1);
1907                 if (err != OK) {
1908                     return err;
1909                 }
1910             }
1911 
1912             if (*offset != stop_offset) {
1913                 return ERROR_MALFORMED;
1914             }
1915             break;
1916         }
1917 
1918         case FOURCC('m', 'e', 'a', 'n'):
1919         case FOURCC('n', 'a', 'm', 'e'):
1920         case FOURCC('d', 'a', 't', 'a'):
1921         {
1922             *offset += chunk_size;
1923 
1924             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1925                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1926 
1927                 if (err != OK) {
1928                     return err;
1929                 }
1930             }
1931 
1932             break;
1933         }
1934 
1935         case FOURCC('m', 'v', 'h', 'd'):
1936         {
1937             *offset += chunk_size;
1938 
1939             if (chunk_data_size < 32) {
1940                 return ERROR_MALFORMED;
1941             }
1942 
1943             uint8_t header[32];
1944             if (mDataSource->readAt(
1945                         data_offset, header, sizeof(header))
1946                     < (ssize_t)sizeof(header)) {
1947                 return ERROR_IO;
1948             }
1949 
1950             uint64_t creationTime;
1951             uint64_t duration = 0;
1952             if (header[0] == 1) {
1953                 creationTime = U64_AT(&header[4]);
1954                 mHeaderTimescale = U32_AT(&header[20]);
1955                 duration = U64_AT(&header[24]);
1956                 if (duration == 0xffffffffffffffff) {
1957                     duration = 0;
1958                 }
1959             } else if (header[0] != 0) {
1960                 return ERROR_MALFORMED;
1961             } else {
1962                 creationTime = U32_AT(&header[4]);
1963                 mHeaderTimescale = U32_AT(&header[12]);
1964                 uint32_t d32 = U32_AT(&header[16]);
1965                 if (d32 == 0xffffffff) {
1966                     d32 = 0;
1967                 }
1968                 duration = d32;
1969             }
1970             if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
1971                 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1972             }
1973 
1974             String8 s;
1975             if (convertTimeToDate(creationTime, &s)) {
1976                 mFileMetaData->setCString(kKeyDate, s.string());
1977             }
1978 
1979 
1980             break;
1981         }
1982 
1983         case FOURCC('m', 'e', 'h', 'd'):
1984         {
1985             *offset += chunk_size;
1986 
1987             if (chunk_data_size < 8) {
1988                 return ERROR_MALFORMED;
1989             }
1990 
1991             uint8_t flags[4];
1992             if (mDataSource->readAt(
1993                         data_offset, flags, sizeof(flags))
1994                     < (ssize_t)sizeof(flags)) {
1995                 return ERROR_IO;
1996             }
1997 
1998             uint64_t duration = 0;
1999             if (flags[0] == 1) {
2000                 // 64 bit
2001                 if (chunk_data_size < 12) {
2002                     return ERROR_MALFORMED;
2003                 }
2004                 mDataSource->getUInt64(data_offset + 4, &duration);
2005                 if (duration == 0xffffffffffffffff) {
2006                     duration = 0;
2007                 }
2008             } else if (flags[0] == 0) {
2009                 // 32 bit
2010                 uint32_t d32;
2011                 mDataSource->getUInt32(data_offset + 4, &d32);
2012                 if (d32 == 0xffffffff) {
2013                     d32 = 0;
2014                 }
2015                 duration = d32;
2016             } else {
2017                 return ERROR_MALFORMED;
2018             }
2019 
2020             if (duration != 0 && mHeaderTimescale != 0) {
2021                 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2022             }
2023 
2024             break;
2025         }
2026 
2027         case FOURCC('m', 'd', 'a', 't'):
2028         {
2029             ALOGV("mdat chunk, drm: %d", mIsDrm);
2030 
2031             mMdatFound = true;
2032 
2033             if (!mIsDrm) {
2034                 *offset += chunk_size;
2035                 break;
2036             }
2037 
2038             if (chunk_size < 8) {
2039                 return ERROR_MALFORMED;
2040             }
2041 
2042             return parseDrmSINF(offset, data_offset);
2043         }
2044 
2045         case FOURCC('h', 'd', 'l', 'r'):
2046         {
2047             *offset += chunk_size;
2048 
2049             if (underQTMetaPath(mPath, 3)) {
2050                 break;
2051             }
2052 
2053             uint32_t buffer;
2054             if (mDataSource->readAt(
2055                         data_offset + 8, &buffer, 4) < 4) {
2056                 return ERROR_IO;
2057             }
2058 
2059             uint32_t type = ntohl(buffer);
2060             // For the 3GPP file format, the handler-type within the 'hdlr' box
2061             // shall be 'text'. We also want to support 'sbtl' handler type
2062             // for a practical reason as various MPEG4 containers use it.
2063             if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2064                 if (mLastTrack != NULL) {
2065                     mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2066                 }
2067             }
2068 
2069             break;
2070         }
2071 
2072         case FOURCC('k', 'e', 'y', 's'):
2073         {
2074             *offset += chunk_size;
2075 
2076             if (underQTMetaPath(mPath, 3)) {
2077                 parseQTMetaKey(data_offset, chunk_data_size);
2078             }
2079             break;
2080         }
2081 
2082         case FOURCC('t', 'r', 'e', 'x'):
2083         {
2084             *offset += chunk_size;
2085 
2086             if (chunk_data_size < 24) {
2087                 return ERROR_IO;
2088             }
2089             Trex trex;
2090             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2091                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2092                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2093                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2094                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2095                 return ERROR_IO;
2096             }
2097             mTrex.add(trex);
2098             break;
2099         }
2100 
2101         case FOURCC('t', 'x', '3', 'g'):
2102         {
2103             if (mLastTrack == NULL)
2104                 return ERROR_MALFORMED;
2105 
2106             uint32_t type;
2107             const void *data;
2108             size_t size = 0;
2109             if (!mLastTrack->meta->findData(
2110                     kKeyTextFormatData, &type, &data, &size)) {
2111                 size = 0;
2112             }
2113 
2114             if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2115                 return ERROR_MALFORMED;
2116             }
2117 
2118             uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2119             if (buffer == NULL) {
2120                 return ERROR_MALFORMED;
2121             }
2122 
2123             if (size > 0) {
2124                 memcpy(buffer, data, size);
2125             }
2126 
2127             if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2128                     < chunk_size) {
2129                 delete[] buffer;
2130                 buffer = NULL;
2131 
2132                 // advance read pointer so we don't end up reading this again
2133                 *offset += chunk_size;
2134                 return ERROR_IO;
2135             }
2136 
2137             mLastTrack->meta->setData(
2138                     kKeyTextFormatData, 0, buffer, size + chunk_size);
2139 
2140             delete[] buffer;
2141 
2142             *offset += chunk_size;
2143             break;
2144         }
2145 
2146         case FOURCC('c', 'o', 'v', 'r'):
2147         {
2148             *offset += chunk_size;
2149 
2150             if (mFileMetaData != NULL) {
2151                 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2152                       chunk_data_size, data_offset);
2153 
2154                 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2155                     return ERROR_MALFORMED;
2156                 }
2157                 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
2158                 if (buffer->data() == NULL) {
2159                     ALOGE("b/28471206");
2160                     return NO_MEMORY;
2161                 }
2162                 if (mDataSource->readAt(
2163                     data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
2164                     return ERROR_IO;
2165                 }
2166                 const int kSkipBytesOfDataBox = 16;
2167                 if (chunk_data_size <= kSkipBytesOfDataBox) {
2168                     return ERROR_MALFORMED;
2169                 }
2170 
2171                 mFileMetaData->setData(
2172                     kKeyAlbumArt, MetaData::TYPE_NONE,
2173                     buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2174             }
2175 
2176             break;
2177         }
2178 
2179         case FOURCC('c', 'o', 'l', 'r'):
2180         {
2181             *offset += chunk_size;
2182             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2183             // ignore otherwise
2184             if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2185                 status_t err = parseColorInfo(data_offset, chunk_data_size);
2186                 if (err != OK) {
2187                     return err;
2188                 }
2189             }
2190 
2191             break;
2192         }
2193 
2194         case FOURCC('t', 'i', 't', 'l'):
2195         case FOURCC('p', 'e', 'r', 'f'):
2196         case FOURCC('a', 'u', 't', 'h'):
2197         case FOURCC('g', 'n', 'r', 'e'):
2198         case FOURCC('a', 'l', 'b', 'm'):
2199         case FOURCC('y', 'r', 'r', 'c'):
2200         {
2201             *offset += chunk_size;
2202 
2203             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2204 
2205             if (err != OK) {
2206                 return err;
2207             }
2208 
2209             break;
2210         }
2211 
2212         case FOURCC('I', 'D', '3', '2'):
2213         {
2214             *offset += chunk_size;
2215 
2216             if (chunk_data_size < 6) {
2217                 return ERROR_MALFORMED;
2218             }
2219 
2220             parseID3v2MetaData(data_offset + 6);
2221 
2222             break;
2223         }
2224 
2225         case FOURCC('-', '-', '-', '-'):
2226         {
2227             mLastCommentMean.clear();
2228             mLastCommentName.clear();
2229             mLastCommentData.clear();
2230             *offset += chunk_size;
2231             break;
2232         }
2233 
2234         case FOURCC('s', 'i', 'd', 'x'):
2235         {
2236             parseSegmentIndex(data_offset, chunk_data_size);
2237             *offset += chunk_size;
2238             return UNKNOWN_ERROR; // stop parsing after sidx
2239         }
2240 
2241         default:
2242         {
2243             // check if we're parsing 'ilst' for meta keys
2244             // if so, treat type as a number (key-id).
2245             if (underQTMetaPath(mPath, 3)) {
2246                 parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2247             }
2248 
2249             *offset += chunk_size;
2250             break;
2251         }
2252     }
2253 
2254     return OK;
2255 }
2256 
parseSegmentIndex(off64_t offset,size_t size)2257 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2258   ALOGV("MPEG4Extractor::parseSegmentIndex");
2259 
2260     if (size < 12) {
2261       return -EINVAL;
2262     }
2263 
2264     uint32_t flags;
2265     if (!mDataSource->getUInt32(offset, &flags)) {
2266         return ERROR_MALFORMED;
2267     }
2268 
2269     uint32_t version = flags >> 24;
2270     flags &= 0xffffff;
2271 
2272     ALOGV("sidx version %d", version);
2273 
2274     uint32_t referenceId;
2275     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2276         return ERROR_MALFORMED;
2277     }
2278 
2279     uint32_t timeScale;
2280     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2281         return ERROR_MALFORMED;
2282     }
2283     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2284     if (timeScale == 0)
2285         return ERROR_MALFORMED;
2286 
2287     uint64_t earliestPresentationTime;
2288     uint64_t firstOffset;
2289 
2290     offset += 12;
2291     size -= 12;
2292 
2293     if (version == 0) {
2294         if (size < 8) {
2295             return -EINVAL;
2296         }
2297         uint32_t tmp;
2298         if (!mDataSource->getUInt32(offset, &tmp)) {
2299             return ERROR_MALFORMED;
2300         }
2301         earliestPresentationTime = tmp;
2302         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2303             return ERROR_MALFORMED;
2304         }
2305         firstOffset = tmp;
2306         offset += 8;
2307         size -= 8;
2308     } else {
2309         if (size < 16) {
2310             return -EINVAL;
2311         }
2312         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2313             return ERROR_MALFORMED;
2314         }
2315         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2316             return ERROR_MALFORMED;
2317         }
2318         offset += 16;
2319         size -= 16;
2320     }
2321     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2322 
2323     if (size < 4) {
2324         return -EINVAL;
2325     }
2326 
2327     uint16_t referenceCount;
2328     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2329         return ERROR_MALFORMED;
2330     }
2331     offset += 4;
2332     size -= 4;
2333     ALOGV("refcount: %d", referenceCount);
2334 
2335     if (size < referenceCount * 12) {
2336         return -EINVAL;
2337     }
2338 
2339     uint64_t total_duration = 0;
2340     for (unsigned int i = 0; i < referenceCount; i++) {
2341         uint32_t d1, d2, d3;
2342 
2343         if (!mDataSource->getUInt32(offset, &d1) ||     // size
2344             !mDataSource->getUInt32(offset + 4, &d2) || // duration
2345             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2346             return ERROR_MALFORMED;
2347         }
2348 
2349         if (d1 & 0x80000000) {
2350             ALOGW("sub-sidx boxes not supported yet");
2351         }
2352         bool sap = d3 & 0x80000000;
2353         uint32_t saptype = (d3 >> 28) & 7;
2354         if (!sap || (saptype != 1 && saptype != 2)) {
2355             // type 1 and 2 are sync samples
2356             ALOGW("not a stream access point, or unsupported type: %08x", d3);
2357         }
2358         total_duration += d2;
2359         offset += 12;
2360         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2361         SidxEntry se;
2362         se.mSize = d1 & 0x7fffffff;
2363         se.mDurationUs = 1000000LL * d2 / timeScale;
2364         mSidxEntries.add(se);
2365     }
2366 
2367     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2368 
2369     if (mLastTrack == NULL)
2370         return ERROR_MALFORMED;
2371 
2372     int64_t metaDuration;
2373     if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2374         mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2375     }
2376     return OK;
2377 }
2378 
parseQTMetaKey(off64_t offset,size_t size)2379 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2380     if (size < 8) {
2381         return ERROR_MALFORMED;
2382     }
2383 
2384     uint32_t count;
2385     if (!mDataSource->getUInt32(offset + 4, &count)) {
2386         return ERROR_MALFORMED;
2387     }
2388 
2389     if (mMetaKeyMap.size() > 0) {
2390         ALOGW("'keys' atom seen again, discarding existing entries");
2391         mMetaKeyMap.clear();
2392     }
2393 
2394     off64_t keyOffset = offset + 8;
2395     off64_t stopOffset = offset + size;
2396     for (size_t i = 1; i <= count; i++) {
2397         if (keyOffset + 8 > stopOffset) {
2398             return ERROR_MALFORMED;
2399         }
2400 
2401         uint32_t keySize;
2402         if (!mDataSource->getUInt32(keyOffset, &keySize)
2403                 || keySize < 8
2404                 || keyOffset + keySize > stopOffset) {
2405             return ERROR_MALFORMED;
2406         }
2407 
2408         uint32_t type;
2409         if (!mDataSource->getUInt32(keyOffset + 4, &type)
2410                 || type != FOURCC('m', 'd', 't', 'a')) {
2411             return ERROR_MALFORMED;
2412         }
2413 
2414         keySize -= 8;
2415         keyOffset += 8;
2416 
2417         sp<ABuffer> keyData = new ABuffer(keySize);
2418         if (keyData->data() == NULL) {
2419             return ERROR_MALFORMED;
2420         }
2421         if (mDataSource->readAt(
2422                 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) {
2423             return ERROR_MALFORMED;
2424         }
2425 
2426         AString key((const char *)keyData->data(), keySize);
2427         mMetaKeyMap.add(i, key);
2428 
2429         keyOffset += keySize;
2430     }
2431     return OK;
2432 }
2433 
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)2434 status_t MPEG4Extractor::parseQTMetaVal(
2435         int32_t keyId, off64_t offset, size_t size) {
2436     ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2437     if (index < 0) {
2438         // corresponding key is not present, ignore
2439         return ERROR_MALFORMED;
2440     }
2441 
2442     if (size <= 16) {
2443         return ERROR_MALFORMED;
2444     }
2445     uint32_t dataSize;
2446     if (!mDataSource->getUInt32(offset, &dataSize)
2447             || dataSize > size || dataSize <= 16) {
2448         return ERROR_MALFORMED;
2449     }
2450     uint32_t atomFourCC;
2451     if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2452             || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2453         return ERROR_MALFORMED;
2454     }
2455     uint32_t dataType;
2456     if (!mDataSource->getUInt32(offset + 8, &dataType)
2457             || ((dataType & 0xff000000) != 0)) {
2458         // not well-known type
2459         return ERROR_MALFORMED;
2460     }
2461 
2462     dataSize -= 16;
2463     offset += 16;
2464 
2465     if (dataType == 23 && dataSize >= 4) {
2466         // BE Float32
2467         uint32_t val;
2468         if (!mDataSource->getUInt32(offset, &val)) {
2469             return ERROR_MALFORMED;
2470         }
2471         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2472             mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
2473         }
2474     } else {
2475         // add more keys if needed
2476         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2477     }
2478 
2479     return OK;
2480 }
2481 
parseTrackHeader(off64_t data_offset,off64_t data_size)2482 status_t MPEG4Extractor::parseTrackHeader(
2483         off64_t data_offset, off64_t data_size) {
2484     if (data_size < 4) {
2485         return ERROR_MALFORMED;
2486     }
2487 
2488     uint8_t version;
2489     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2490         return ERROR_IO;
2491     }
2492 
2493     size_t dynSize = (version == 1) ? 36 : 24;
2494 
2495     uint8_t buffer[36 + 60];
2496 
2497     if (data_size != (off64_t)dynSize + 60) {
2498         return ERROR_MALFORMED;
2499     }
2500 
2501     if (mDataSource->readAt(
2502                 data_offset, buffer, data_size) < (ssize_t)data_size) {
2503         return ERROR_IO;
2504     }
2505 
2506     uint64_t ctime __unused, mtime __unused, duration __unused;
2507     int32_t id;
2508 
2509     if (version == 1) {
2510         ctime = U64_AT(&buffer[4]);
2511         mtime = U64_AT(&buffer[12]);
2512         id = U32_AT(&buffer[20]);
2513         duration = U64_AT(&buffer[28]);
2514     } else if (version == 0) {
2515         ctime = U32_AT(&buffer[4]);
2516         mtime = U32_AT(&buffer[8]);
2517         id = U32_AT(&buffer[12]);
2518         duration = U32_AT(&buffer[20]);
2519     } else {
2520         return ERROR_UNSUPPORTED;
2521     }
2522 
2523     if (mLastTrack == NULL)
2524         return ERROR_MALFORMED;
2525 
2526     mLastTrack->meta->setInt32(kKeyTrackID, id);
2527 
2528     size_t matrixOffset = dynSize + 16;
2529     int32_t a00 = U32_AT(&buffer[matrixOffset]);
2530     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2531     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2532     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2533 
2534 #if 0
2535     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2536     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2537 
2538     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2539          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2540     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2541          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2542 #endif
2543 
2544     uint32_t rotationDegrees;
2545 
2546     static const int32_t kFixedOne = 0x10000;
2547     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2548         // Identity, no rotation
2549         rotationDegrees = 0;
2550     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2551         rotationDegrees = 90;
2552     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2553         rotationDegrees = 270;
2554     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2555         rotationDegrees = 180;
2556     } else {
2557         ALOGW("We only support 0,90,180,270 degree rotation matrices");
2558         rotationDegrees = 0;
2559     }
2560 
2561     if (rotationDegrees != 0) {
2562         mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2563     }
2564 
2565     // Handle presentation display size, which could be different
2566     // from the image size indicated by kKeyWidth and kKeyHeight.
2567     uint32_t width = U32_AT(&buffer[dynSize + 52]);
2568     uint32_t height = U32_AT(&buffer[dynSize + 56]);
2569     mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2570     mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2571 
2572     return OK;
2573 }
2574 
parseITunesMetaData(off64_t offset,size_t size)2575 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2576     if (size < 4 || size == SIZE_MAX) {
2577         return ERROR_MALFORMED;
2578     }
2579 
2580     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2581     if (buffer == NULL) {
2582         return ERROR_MALFORMED;
2583     }
2584     if (mDataSource->readAt(
2585                 offset, buffer, size) != (ssize_t)size) {
2586         delete[] buffer;
2587         buffer = NULL;
2588 
2589         return ERROR_IO;
2590     }
2591 
2592     uint32_t flags = U32_AT(buffer);
2593 
2594     uint32_t metadataKey = 0;
2595     char chunk[5];
2596     MakeFourCCString(mPath[4], chunk);
2597     ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2598     switch ((int32_t)mPath[4]) {
2599         case FOURCC(0xa9, 'a', 'l', 'b'):
2600         {
2601             metadataKey = kKeyAlbum;
2602             break;
2603         }
2604         case FOURCC(0xa9, 'A', 'R', 'T'):
2605         {
2606             metadataKey = kKeyArtist;
2607             break;
2608         }
2609         case FOURCC('a', 'A', 'R', 'T'):
2610         {
2611             metadataKey = kKeyAlbumArtist;
2612             break;
2613         }
2614         case FOURCC(0xa9, 'd', 'a', 'y'):
2615         {
2616             metadataKey = kKeyYear;
2617             break;
2618         }
2619         case FOURCC(0xa9, 'n', 'a', 'm'):
2620         {
2621             metadataKey = kKeyTitle;
2622             break;
2623         }
2624         case FOURCC(0xa9, 'w', 'r', 't'):
2625         {
2626             metadataKey = kKeyWriter;
2627             break;
2628         }
2629         case FOURCC('c', 'o', 'v', 'r'):
2630         {
2631             metadataKey = kKeyAlbumArt;
2632             break;
2633         }
2634         case FOURCC('g', 'n', 'r', 'e'):
2635         {
2636             metadataKey = kKeyGenre;
2637             break;
2638         }
2639         case FOURCC(0xa9, 'g', 'e', 'n'):
2640         {
2641             metadataKey = kKeyGenre;
2642             break;
2643         }
2644         case FOURCC('c', 'p', 'i', 'l'):
2645         {
2646             if (size == 9 && flags == 21) {
2647                 char tmp[16];
2648                 sprintf(tmp, "%d",
2649                         (int)buffer[size - 1]);
2650 
2651                 mFileMetaData->setCString(kKeyCompilation, tmp);
2652             }
2653             break;
2654         }
2655         case FOURCC('t', 'r', 'k', 'n'):
2656         {
2657             if (size == 16 && flags == 0) {
2658                 char tmp[16];
2659                 uint16_t* pTrack = (uint16_t*)&buffer[10];
2660                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2661                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2662 
2663                 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2664             }
2665             break;
2666         }
2667         case FOURCC('d', 'i', 's', 'k'):
2668         {
2669             if ((size == 14 || size == 16) && flags == 0) {
2670                 char tmp[16];
2671                 uint16_t* pDisc = (uint16_t*)&buffer[10];
2672                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2673                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2674 
2675                 mFileMetaData->setCString(kKeyDiscNumber, tmp);
2676             }
2677             break;
2678         }
2679         case FOURCC('-', '-', '-', '-'):
2680         {
2681             buffer[size] = '\0';
2682             switch (mPath[5]) {
2683                 case FOURCC('m', 'e', 'a', 'n'):
2684                     mLastCommentMean.setTo((const char *)buffer + 4);
2685                     break;
2686                 case FOURCC('n', 'a', 'm', 'e'):
2687                     mLastCommentName.setTo((const char *)buffer + 4);
2688                     break;
2689                 case FOURCC('d', 'a', 't', 'a'):
2690                     if (size < 8) {
2691                         delete[] buffer;
2692                         buffer = NULL;
2693                         ALOGE("b/24346430");
2694                         return ERROR_MALFORMED;
2695                     }
2696                     mLastCommentData.setTo((const char *)buffer + 8);
2697                     break;
2698             }
2699 
2700             // Once we have a set of mean/name/data info, go ahead and process
2701             // it to see if its something we are interested in.  Whether or not
2702             // were are interested in the specific tag, make sure to clear out
2703             // the set so we can be ready to process another tuple should one
2704             // show up later in the file.
2705             if ((mLastCommentMean.length() != 0) &&
2706                 (mLastCommentName.length() != 0) &&
2707                 (mLastCommentData.length() != 0)) {
2708 
2709                 if (mLastCommentMean == "com.apple.iTunes"
2710                         && mLastCommentName == "iTunSMPB") {
2711                     int32_t delay, padding;
2712                     if (sscanf(mLastCommentData,
2713                                " %*x %x %x %*x", &delay, &padding) == 2) {
2714                         if (mLastTrack == NULL)
2715                             return ERROR_MALFORMED;
2716 
2717                         mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2718                         mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2719                     }
2720                 }
2721 
2722                 mLastCommentMean.clear();
2723                 mLastCommentName.clear();
2724                 mLastCommentData.clear();
2725             }
2726             break;
2727         }
2728 
2729         default:
2730             break;
2731     }
2732 
2733     if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2734         if (metadataKey == kKeyAlbumArt) {
2735             mFileMetaData->setData(
2736                     kKeyAlbumArt, MetaData::TYPE_NONE,
2737                     buffer + 8, size - 8);
2738         } else if (metadataKey == kKeyGenre) {
2739             if (flags == 0) {
2740                 // uint8_t genre code, iTunes genre codes are
2741                 // the standard id3 codes, except they start
2742                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
2743                 // We use standard id3 numbering, so subtract 1.
2744                 int genrecode = (int)buffer[size - 1];
2745                 genrecode--;
2746                 if (genrecode < 0) {
2747                     genrecode = 255; // reserved for 'unknown genre'
2748                 }
2749                 char genre[10];
2750                 sprintf(genre, "%d", genrecode);
2751 
2752                 mFileMetaData->setCString(metadataKey, genre);
2753             } else if (flags == 1) {
2754                 // custom genre string
2755                 buffer[size] = '\0';
2756 
2757                 mFileMetaData->setCString(
2758                         metadataKey, (const char *)buffer + 8);
2759             }
2760         } else {
2761             buffer[size] = '\0';
2762 
2763             mFileMetaData->setCString(
2764                     metadataKey, (const char *)buffer + 8);
2765         }
2766     }
2767 
2768     delete[] buffer;
2769     buffer = NULL;
2770 
2771     return OK;
2772 }
2773 
parseColorInfo(off64_t offset,size_t size)2774 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
2775     if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
2776         return ERROR_MALFORMED;
2777     }
2778 
2779     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2780     if (buffer == NULL) {
2781         return ERROR_MALFORMED;
2782     }
2783     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
2784         delete[] buffer;
2785         buffer = NULL;
2786 
2787         return ERROR_IO;
2788     }
2789 
2790     int32_t type = U32_AT(&buffer[0]);
2791     if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
2792             || (type == FOURCC('n', 'c', 'l', 'c' && size >= 10))) {
2793         int32_t primaries = U16_AT(&buffer[4]);
2794         int32_t transfer = U16_AT(&buffer[6]);
2795         int32_t coeffs = U16_AT(&buffer[8]);
2796         bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
2797 
2798         ColorAspects aspects;
2799         ColorUtils::convertIsoColorAspectsToCodecAspects(
2800                 primaries, transfer, coeffs, fullRange, aspects);
2801 
2802         // only store the first color specification
2803         if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) {
2804             mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries);
2805             mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer);
2806             mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
2807             mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange);
2808         }
2809     }
2810 
2811     delete[] buffer;
2812     buffer = NULL;
2813 
2814     return OK;
2815 }
2816 
parse3GPPMetaData(off64_t offset,size_t size,int depth)2817 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2818     if (size < 4 || size == SIZE_MAX) {
2819         return ERROR_MALFORMED;
2820     }
2821 
2822     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2823     if (buffer == NULL) {
2824         return ERROR_MALFORMED;
2825     }
2826     if (mDataSource->readAt(
2827                 offset, buffer, size) != (ssize_t)size) {
2828         delete[] buffer;
2829         buffer = NULL;
2830 
2831         return ERROR_IO;
2832     }
2833 
2834     uint32_t metadataKey = 0;
2835     switch (mPath[depth]) {
2836         case FOURCC('t', 'i', 't', 'l'):
2837         {
2838             metadataKey = kKeyTitle;
2839             break;
2840         }
2841         case FOURCC('p', 'e', 'r', 'f'):
2842         {
2843             metadataKey = kKeyArtist;
2844             break;
2845         }
2846         case FOURCC('a', 'u', 't', 'h'):
2847         {
2848             metadataKey = kKeyWriter;
2849             break;
2850         }
2851         case FOURCC('g', 'n', 'r', 'e'):
2852         {
2853             metadataKey = kKeyGenre;
2854             break;
2855         }
2856         case FOURCC('a', 'l', 'b', 'm'):
2857         {
2858             if (buffer[size - 1] != '\0') {
2859               char tmp[4];
2860               sprintf(tmp, "%u", buffer[size - 1]);
2861 
2862               mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2863             }
2864 
2865             metadataKey = kKeyAlbum;
2866             break;
2867         }
2868         case FOURCC('y', 'r', 'r', 'c'):
2869         {
2870             char tmp[5];
2871             uint16_t year = U16_AT(&buffer[4]);
2872 
2873             if (year < 10000) {
2874                 sprintf(tmp, "%u", year);
2875 
2876                 mFileMetaData->setCString(kKeyYear, tmp);
2877             }
2878             break;
2879         }
2880 
2881         default:
2882             break;
2883     }
2884 
2885     if (metadataKey > 0) {
2886         bool isUTF8 = true; // Common case
2887         char16_t *framedata = NULL;
2888         int len16 = 0; // Number of UTF-16 characters
2889 
2890         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2891         if (size < 6) {
2892             return ERROR_MALFORMED;
2893         }
2894 
2895         if (size - 6 >= 4) {
2896             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2897             framedata = (char16_t *)(buffer + 6);
2898             if (0xfffe == *framedata) {
2899                 // endianness marker (BOM) doesn't match host endianness
2900                 for (int i = 0; i < len16; i++) {
2901                     framedata[i] = bswap_16(framedata[i]);
2902                 }
2903                 // BOM is now swapped to 0xfeff, we will execute next block too
2904             }
2905 
2906             if (0xfeff == *framedata) {
2907                 // Remove the BOM
2908                 framedata++;
2909                 len16--;
2910                 isUTF8 = false;
2911             }
2912             // else normal non-zero-length UTF-8 string
2913             // we can't handle UTF-16 without BOM as there is no other
2914             // indication of encoding.
2915         }
2916 
2917         if (isUTF8) {
2918             buffer[size] = 0;
2919             mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2920         } else {
2921             // Convert from UTF-16 string to UTF-8 string.
2922             String8 tmpUTF8str(framedata, len16);
2923             mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2924         }
2925     }
2926 
2927     delete[] buffer;
2928     buffer = NULL;
2929 
2930     return OK;
2931 }
2932 
parseID3v2MetaData(off64_t offset)2933 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2934     ID3 id3(mDataSource, true /* ignorev1 */, offset);
2935 
2936     if (id3.isValid()) {
2937         struct Map {
2938             int key;
2939             const char *tag1;
2940             const char *tag2;
2941         };
2942         static const Map kMap[] = {
2943             { kKeyAlbum, "TALB", "TAL" },
2944             { kKeyArtist, "TPE1", "TP1" },
2945             { kKeyAlbumArtist, "TPE2", "TP2" },
2946             { kKeyComposer, "TCOM", "TCM" },
2947             { kKeyGenre, "TCON", "TCO" },
2948             { kKeyTitle, "TIT2", "TT2" },
2949             { kKeyYear, "TYE", "TYER" },
2950             { kKeyAuthor, "TXT", "TEXT" },
2951             { kKeyCDTrackNumber, "TRK", "TRCK" },
2952             { kKeyDiscNumber, "TPA", "TPOS" },
2953             { kKeyCompilation, "TCP", "TCMP" },
2954         };
2955         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2956 
2957         for (size_t i = 0; i < kNumMapEntries; ++i) {
2958             if (!mFileMetaData->hasData(kMap[i].key)) {
2959                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2960                 if (it->done()) {
2961                     delete it;
2962                     it = new ID3::Iterator(id3, kMap[i].tag2);
2963                 }
2964 
2965                 if (it->done()) {
2966                     delete it;
2967                     continue;
2968                 }
2969 
2970                 String8 s;
2971                 it->getString(&s);
2972                 delete it;
2973 
2974                 mFileMetaData->setCString(kMap[i].key, s);
2975             }
2976         }
2977 
2978         size_t dataSize;
2979         String8 mime;
2980         const void *data = id3.getAlbumArt(&dataSize, &mime);
2981 
2982         if (data) {
2983             mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2984             mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2985         }
2986     }
2987 }
2988 
getTrack(size_t index)2989 sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) {
2990     status_t err;
2991     if ((err = readMetaData()) != OK) {
2992         return NULL;
2993     }
2994 
2995     Track *track = mFirstTrack;
2996     while (index > 0) {
2997         if (track == NULL) {
2998             return NULL;
2999         }
3000 
3001         track = track->next;
3002         --index;
3003     }
3004 
3005     if (track == NULL) {
3006         return NULL;
3007     }
3008 
3009 
3010     Trex *trex = NULL;
3011     int32_t trackId;
3012     if (track->meta->findInt32(kKeyTrackID, &trackId)) {
3013         for (size_t i = 0; i < mTrex.size(); i++) {
3014             Trex *t = &mTrex.editItemAt(i);
3015             if (t->track_ID == (uint32_t) trackId) {
3016                 trex = t;
3017                 break;
3018             }
3019         }
3020     } else {
3021         ALOGE("b/21657957");
3022         return NULL;
3023     }
3024 
3025     ALOGV("getTrack called, pssh: %zu", mPssh.size());
3026 
3027     const char *mime;
3028     if (!track->meta->findCString(kKeyMIMEType, &mime)) {
3029         return NULL;
3030     }
3031 
3032     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3033         uint32_t type;
3034         const void *data;
3035         size_t size;
3036         if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) {
3037             return NULL;
3038         }
3039 
3040         const uint8_t *ptr = (const uint8_t *)data;
3041 
3042         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3043             return NULL;
3044         }
3045     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3046         uint32_t type;
3047         const void *data;
3048         size_t size;
3049         if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) {
3050             return NULL;
3051         }
3052 
3053         const uint8_t *ptr = (const uint8_t *)data;
3054 
3055         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3056             return NULL;
3057         }
3058     }
3059 
3060     return new MPEG4Source(this,
3061             track->meta, mDataSource, track->timescale, track->sampleTable,
3062             mSidxEntries, trex, mMoofOffset);
3063 }
3064 
3065 // static
verifyTrack(Track * track)3066 status_t MPEG4Extractor::verifyTrack(Track *track) {
3067     const char *mime;
3068     CHECK(track->meta->findCString(kKeyMIMEType, &mime));
3069 
3070     uint32_t type;
3071     const void *data;
3072     size_t size;
3073     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3074         if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
3075                 || type != kTypeAVCC) {
3076             return ERROR_MALFORMED;
3077         }
3078     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3079         if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
3080                     || type != kTypeHVCC) {
3081             return ERROR_MALFORMED;
3082         }
3083     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3084             || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3085             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3086         if (!track->meta->findData(kKeyESDS, &type, &data, &size)
3087                 || type != kTypeESDS) {
3088             return ERROR_MALFORMED;
3089         }
3090     }
3091 
3092     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3093         // Make sure we have all the metadata we need.
3094         ALOGE("stbl atom missing/invalid.");
3095         return ERROR_MALFORMED;
3096     }
3097 
3098     if (track->timescale == 0) {
3099         ALOGE("timescale invalid.");
3100         return ERROR_MALFORMED;
3101     }
3102 
3103     return OK;
3104 }
3105 
3106 typedef enum {
3107     //AOT_NONE             = -1,
3108     //AOT_NULL_OBJECT      = 0,
3109     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3110     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3111     //AOT_AAC_SSR          = 3,
3112     //AOT_AAC_LTP          = 4,
3113     AOT_SBR              = 5,
3114     //AOT_AAC_SCAL         = 6,
3115     //AOT_TWIN_VQ          = 7,
3116     //AOT_CELP             = 8,
3117     //AOT_HVXC             = 9,
3118     //AOT_RSVD_10          = 10, /**< (reserved)                                */
3119     //AOT_RSVD_11          = 11, /**< (reserved)                                */
3120     //AOT_TTSI             = 12, /**< TTSI Object                               */
3121     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3122     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3123     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3124     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3125     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3126     //AOT_RSVD_18          = 18, /**< (reserved)                                */
3127     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3128     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3129     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3130     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3131     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3132     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3133     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3134     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3135     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3136     //AOT_RSVD_28          = 28, /**< might become SSC                          */
3137     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3138     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3139 
3140     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3141 
3142     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3143     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3144     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3145     //AOT_RSVD_35          = 35, /**< might become DST                          */
3146     //AOT_RSVD_36          = 36, /**< might become ALS                          */
3147     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3148     //AOT_SLS              = 38, /**< SLS                                       */
3149     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3150 
3151     //AOT_USAC             = 42, /**< USAC                                      */
3152     //AOT_SAOC             = 43, /**< SAOC                                      */
3153     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3154 
3155     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3156 } AUDIO_OBJECT_TYPE;
3157 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)3158 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3159         const void *esds_data, size_t esds_size) {
3160     ESDS esds(esds_data, esds_size);
3161 
3162     uint8_t objectTypeIndication;
3163     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3164         return ERROR_MALFORMED;
3165     }
3166 
3167     if (objectTypeIndication == 0xe1) {
3168         // This isn't MPEG4 audio at all, it's QCELP 14k...
3169         if (mLastTrack == NULL)
3170             return ERROR_MALFORMED;
3171 
3172         mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3173         return OK;
3174     }
3175 
3176     if (objectTypeIndication  == 0x6b) {
3177         // The media subtype is MP3 audio
3178         // Our software MP3 audio decoder may not be able to handle
3179         // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3180         ALOGE("MP3 track in MP4/3GPP file is not supported");
3181         return ERROR_UNSUPPORTED;
3182     }
3183 
3184     const uint8_t *csd;
3185     size_t csd_size;
3186     if (esds.getCodecSpecificInfo(
3187                 (const void **)&csd, &csd_size) != OK) {
3188         return ERROR_MALFORMED;
3189     }
3190 
3191     if (kUseHexDump) {
3192         printf("ESD of size %zu\n", csd_size);
3193         hexdump(csd, csd_size);
3194     }
3195 
3196     if (csd_size == 0) {
3197         // There's no further information, i.e. no codec specific data
3198         // Let's assume that the information provided in the mpeg4 headers
3199         // is accurate and hope for the best.
3200 
3201         return OK;
3202     }
3203 
3204     if (csd_size < 2) {
3205         return ERROR_MALFORMED;
3206     }
3207 
3208     static uint32_t kSamplingRate[] = {
3209         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3210         16000, 12000, 11025, 8000, 7350
3211     };
3212 
3213     ABitReader br(csd, csd_size);
3214     uint32_t objectType = br.getBits(5);
3215 
3216     if (objectType == 31) {  // AAC-ELD => additional 6 bits
3217         objectType = 32 + br.getBits(6);
3218     }
3219 
3220     if (mLastTrack == NULL)
3221         return ERROR_MALFORMED;
3222 
3223     //keep AOT type
3224     mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
3225 
3226     uint32_t freqIndex = br.getBits(4);
3227 
3228     int32_t sampleRate = 0;
3229     int32_t numChannels = 0;
3230     if (freqIndex == 15) {
3231         if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3232         sampleRate = br.getBits(24);
3233         numChannels = br.getBits(4);
3234     } else {
3235         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3236         numChannels = br.getBits(4);
3237 
3238         if (freqIndex == 13 || freqIndex == 14) {
3239             return ERROR_MALFORMED;
3240         }
3241 
3242         sampleRate = kSamplingRate[freqIndex];
3243     }
3244 
3245     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3246         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3247         uint32_t extFreqIndex = br.getBits(4);
3248         int32_t extSampleRate __unused;
3249         if (extFreqIndex == 15) {
3250             if (csd_size < 8) {
3251                 return ERROR_MALFORMED;
3252             }
3253             if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3254             extSampleRate = br.getBits(24);
3255         } else {
3256             if (extFreqIndex == 13 || extFreqIndex == 14) {
3257                 return ERROR_MALFORMED;
3258             }
3259             extSampleRate = kSamplingRate[extFreqIndex];
3260         }
3261         //TODO: save the extension sampling rate value in meta data =>
3262         //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
3263     }
3264 
3265     switch (numChannels) {
3266         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3267         case 0:
3268         case 1:// FC
3269         case 2:// FL FR
3270         case 3:// FC, FL FR
3271         case 4:// FC, FL FR, RC
3272         case 5:// FC, FL FR, SL SR
3273         case 6:// FC, FL FR, SL SR, LFE
3274             //numChannels already contains the right value
3275             break;
3276         case 11:// FC, FL FR, SL SR, RC, LFE
3277             numChannels = 7;
3278             break;
3279         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3280         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3281         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3282             numChannels = 8;
3283             break;
3284         default:
3285             return ERROR_UNSUPPORTED;
3286     }
3287 
3288     {
3289         if (objectType == AOT_SBR || objectType == AOT_PS) {
3290             if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3291             objectType = br.getBits(5);
3292 
3293             if (objectType == AOT_ESCAPE) {
3294                 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3295                 objectType = 32 + br.getBits(6);
3296             }
3297         }
3298         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3299                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3300                 objectType == AOT_ER_BSAC) {
3301             if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3302             const int32_t frameLengthFlag __unused = br.getBits(1);
3303 
3304             const int32_t dependsOnCoreCoder = br.getBits(1);
3305 
3306             if (dependsOnCoreCoder ) {
3307                 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3308                 const int32_t coreCoderDelay __unused = br.getBits(14);
3309             }
3310 
3311             int32_t extensionFlag = -1;
3312             if (br.numBitsLeft() > 0) {
3313                 extensionFlag = br.getBits(1);
3314             } else {
3315                 switch (objectType) {
3316                 // 14496-3 4.5.1.1 extensionFlag
3317                 case AOT_AAC_LC:
3318                     extensionFlag = 0;
3319                     break;
3320                 case AOT_ER_AAC_LC:
3321                 case AOT_ER_AAC_SCAL:
3322                 case AOT_ER_BSAC:
3323                 case AOT_ER_AAC_LD:
3324                     extensionFlag = 1;
3325                     break;
3326                 default:
3327                     return ERROR_MALFORMED;
3328                     break;
3329                 }
3330                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
3331                         extensionFlag, objectType);
3332             }
3333 
3334             if (numChannels == 0) {
3335                 int32_t channelsEffectiveNum = 0;
3336                 int32_t channelsNum = 0;
3337                 if (br.numBitsLeft() < 32) {
3338                     return ERROR_MALFORMED;
3339                 }
3340                 const int32_t ElementInstanceTag __unused = br.getBits(4);
3341                 const int32_t Profile __unused = br.getBits(2);
3342                 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3343                 const int32_t NumFrontChannelElements = br.getBits(4);
3344                 const int32_t NumSideChannelElements = br.getBits(4);
3345                 const int32_t NumBackChannelElements = br.getBits(4);
3346                 const int32_t NumLfeChannelElements = br.getBits(2);
3347                 const int32_t NumAssocDataElements __unused = br.getBits(3);
3348                 const int32_t NumValidCcElements __unused = br.getBits(4);
3349 
3350                 const int32_t MonoMixdownPresent = br.getBits(1);
3351 
3352                 if (MonoMixdownPresent != 0) {
3353                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3354                     const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3355                 }
3356 
3357                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3358                 const int32_t StereoMixdownPresent = br.getBits(1);
3359                 if (StereoMixdownPresent != 0) {
3360                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3361                     const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3362                 }
3363 
3364                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3365                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3366                 if (MatrixMixdownIndexPresent != 0) {
3367                     if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3368                     const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3369                     const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3370                 }
3371 
3372                 int i;
3373                 for (i=0; i < NumFrontChannelElements; i++) {
3374                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3375                     const int32_t FrontElementIsCpe = br.getBits(1);
3376                     const int32_t FrontElementTagSelect __unused = br.getBits(4);
3377                     channelsNum += FrontElementIsCpe ? 2 : 1;
3378                 }
3379 
3380                 for (i=0; i < NumSideChannelElements; i++) {
3381                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3382                     const int32_t SideElementIsCpe = br.getBits(1);
3383                     const int32_t SideElementTagSelect __unused = br.getBits(4);
3384                     channelsNum += SideElementIsCpe ? 2 : 1;
3385                 }
3386 
3387                 for (i=0; i < NumBackChannelElements; i++) {
3388                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3389                     const int32_t BackElementIsCpe = br.getBits(1);
3390                     const int32_t BackElementTagSelect __unused = br.getBits(4);
3391                     channelsNum += BackElementIsCpe ? 2 : 1;
3392                 }
3393                 channelsEffectiveNum = channelsNum;
3394 
3395                 for (i=0; i < NumLfeChannelElements; i++) {
3396                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3397                     const int32_t LfeElementTagSelect __unused = br.getBits(4);
3398                     channelsNum += 1;
3399                 }
3400                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3401                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3402                 numChannels = channelsNum;
3403             }
3404         }
3405     }
3406 
3407     if (numChannels == 0) {
3408         return ERROR_UNSUPPORTED;
3409     }
3410 
3411     if (mLastTrack == NULL)
3412         return ERROR_MALFORMED;
3413 
3414     int32_t prevSampleRate;
3415     CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
3416 
3417     if (prevSampleRate != sampleRate) {
3418         ALOGV("mpeg4 audio sample rate different from previous setting. "
3419              "was: %d, now: %d", prevSampleRate, sampleRate);
3420     }
3421 
3422     mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
3423 
3424     int32_t prevChannelCount;
3425     CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
3426 
3427     if (prevChannelCount != numChannels) {
3428         ALOGV("mpeg4 audio channel count different from previous setting. "
3429              "was: %d, now: %d", prevChannelCount, numChannels);
3430     }
3431 
3432     mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
3433 
3434     return OK;
3435 }
3436 
3437 ////////////////////////////////////////////////////////////////////////////////
3438 
MPEG4Source(const sp<MPEG4Extractor> & owner,const sp<MetaData> & format,const sp<DataSource> & dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset)3439 MPEG4Source::MPEG4Source(
3440         const sp<MPEG4Extractor> &owner,
3441         const sp<MetaData> &format,
3442         const sp<DataSource> &dataSource,
3443         int32_t timeScale,
3444         const sp<SampleTable> &sampleTable,
3445         Vector<SidxEntry> &sidx,
3446         const Trex *trex,
3447         off64_t firstMoofOffset)
3448     : mOwner(owner),
3449       mFormat(format),
3450       mDataSource(dataSource),
3451       mTimescale(timeScale),
3452       mSampleTable(sampleTable),
3453       mCurrentSampleIndex(0),
3454       mCurrentFragmentIndex(0),
3455       mSegments(sidx),
3456       mTrex(trex),
3457       mFirstMoofOffset(firstMoofOffset),
3458       mCurrentMoofOffset(firstMoofOffset),
3459       mCurrentTime(0),
3460       mCurrentSampleInfoAllocSize(0),
3461       mCurrentSampleInfoSizes(NULL),
3462       mCurrentSampleInfoOffsetsAllocSize(0),
3463       mCurrentSampleInfoOffsets(NULL),
3464       mIsAVC(false),
3465       mIsHEVC(false),
3466       mNALLengthSize(0),
3467       mStarted(false),
3468       mGroup(NULL),
3469       mBuffer(NULL),
3470       mWantsNALFragments(false),
3471       mSrcBuffer(NULL) {
3472 
3473     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3474 
3475     mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3476     mDefaultIVSize = 0;
3477     mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3478     uint32_t keytype;
3479     const void *key;
3480     size_t keysize;
3481     if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3482         CHECK(keysize <= 16);
3483         memset(mCryptoKey, 0, 16);
3484         memcpy(mCryptoKey, key, keysize);
3485     }
3486 
3487     const char *mime;
3488     bool success = mFormat->findCString(kKeyMIMEType, &mime);
3489     CHECK(success);
3490 
3491     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3492     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3493 
3494     if (mIsAVC) {
3495         uint32_t type;
3496         const void *data;
3497         size_t size;
3498         CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3499 
3500         const uint8_t *ptr = (const uint8_t *)data;
3501 
3502         CHECK(size >= 7);
3503         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3504 
3505         // The number of bytes used to encode the length of a NAL unit.
3506         mNALLengthSize = 1 + (ptr[4] & 3);
3507     } else if (mIsHEVC) {
3508         uint32_t type;
3509         const void *data;
3510         size_t size;
3511         CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3512 
3513         const uint8_t *ptr = (const uint8_t *)data;
3514 
3515         CHECK(size >= 22);
3516         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3517 
3518         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3519     }
3520 
3521     CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3522 
3523     if (mFirstMoofOffset != 0) {
3524         off64_t offset = mFirstMoofOffset;
3525         parseChunk(&offset);
3526     }
3527 }
3528 
~MPEG4Source()3529 MPEG4Source::~MPEG4Source() {
3530     if (mStarted) {
3531         stop();
3532     }
3533     free(mCurrentSampleInfoSizes);
3534     free(mCurrentSampleInfoOffsets);
3535 }
3536 
start(MetaData * params)3537 status_t MPEG4Source::start(MetaData *params) {
3538     Mutex::Autolock autoLock(mLock);
3539 
3540     CHECK(!mStarted);
3541 
3542     int32_t val;
3543     if (params && params->findInt32(kKeyWantsNALFragments, &val)
3544         && val != 0) {
3545         mWantsNALFragments = true;
3546     } else {
3547         mWantsNALFragments = false;
3548     }
3549 
3550     int32_t tmp;
3551     CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp));
3552     size_t max_size = tmp;
3553 
3554     // A somewhat arbitrary limit that should be sufficient for 8k video frames
3555     // If you see the message below for a valid input stream: increase the limit
3556     if (max_size > 64 * 1024 * 1024) {
3557         ALOGE("bogus max input size: %zu", max_size);
3558         return ERROR_MALFORMED;
3559     }
3560     mGroup = new MediaBufferGroup;
3561     mGroup->add_buffer(new MediaBuffer(max_size));
3562 
3563     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3564     if (mSrcBuffer == NULL) {
3565         // file probably specified a bad max size
3566         delete mGroup;
3567         mGroup = NULL;
3568         return ERROR_MALFORMED;
3569     }
3570 
3571     mStarted = true;
3572 
3573     return OK;
3574 }
3575 
stop()3576 status_t MPEG4Source::stop() {
3577     Mutex::Autolock autoLock(mLock);
3578 
3579     CHECK(mStarted);
3580 
3581     if (mBuffer != NULL) {
3582         mBuffer->release();
3583         mBuffer = NULL;
3584     }
3585 
3586     delete[] mSrcBuffer;
3587     mSrcBuffer = NULL;
3588 
3589     delete mGroup;
3590     mGroup = NULL;
3591 
3592     mStarted = false;
3593     mCurrentSampleIndex = 0;
3594 
3595     return OK;
3596 }
3597 
parseChunk(off64_t * offset)3598 status_t MPEG4Source::parseChunk(off64_t *offset) {
3599     uint32_t hdr[2];
3600     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3601         return ERROR_IO;
3602     }
3603     uint64_t chunk_size = ntohl(hdr[0]);
3604     uint32_t chunk_type = ntohl(hdr[1]);
3605     off64_t data_offset = *offset + 8;
3606 
3607     if (chunk_size == 1) {
3608         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3609             return ERROR_IO;
3610         }
3611         chunk_size = ntoh64(chunk_size);
3612         data_offset += 8;
3613 
3614         if (chunk_size < 16) {
3615             // The smallest valid chunk is 16 bytes long in this case.
3616             return ERROR_MALFORMED;
3617         }
3618     } else if (chunk_size < 8) {
3619         // The smallest valid chunk is 8 bytes long.
3620         return ERROR_MALFORMED;
3621     }
3622 
3623     char chunk[5];
3624     MakeFourCCString(chunk_type, chunk);
3625     ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
3626 
3627     off64_t chunk_data_size = *offset + chunk_size - data_offset;
3628 
3629     switch(chunk_type) {
3630 
3631         case FOURCC('t', 'r', 'a', 'f'):
3632         case FOURCC('m', 'o', 'o', 'f'): {
3633             off64_t stop_offset = *offset + chunk_size;
3634             *offset = data_offset;
3635             while (*offset < stop_offset) {
3636                 status_t err = parseChunk(offset);
3637                 if (err != OK) {
3638                     return err;
3639                 }
3640             }
3641             if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3642                 // *offset points to the box following this moof. Find the next moof from there.
3643 
3644                 while (true) {
3645                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3646                         return ERROR_END_OF_STREAM;
3647                     }
3648                     chunk_size = ntohl(hdr[0]);
3649                     chunk_type = ntohl(hdr[1]);
3650                     if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3651                         mNextMoofOffset = *offset;
3652                         break;
3653                     }
3654                     *offset += chunk_size;
3655                 }
3656             }
3657             break;
3658         }
3659 
3660         case FOURCC('t', 'f', 'h', 'd'): {
3661                 status_t err;
3662                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3663                     return err;
3664                 }
3665                 *offset += chunk_size;
3666                 break;
3667         }
3668 
3669         case FOURCC('t', 'r', 'u', 'n'): {
3670                 status_t err;
3671                 if (mLastParsedTrackId == mTrackId) {
3672                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3673                         return err;
3674                     }
3675                 }
3676 
3677                 *offset += chunk_size;
3678                 break;
3679         }
3680 
3681         case FOURCC('s', 'a', 'i', 'z'): {
3682             status_t err;
3683             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3684                 return err;
3685             }
3686             *offset += chunk_size;
3687             break;
3688         }
3689         case FOURCC('s', 'a', 'i', 'o'): {
3690             status_t err;
3691             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3692                 return err;
3693             }
3694             *offset += chunk_size;
3695             break;
3696         }
3697 
3698         case FOURCC('m', 'd', 'a', 't'): {
3699             // parse DRM info if present
3700             ALOGV("MPEG4Source::parseChunk mdat");
3701             // if saiz/saoi was previously observed, do something with the sampleinfos
3702             *offset += chunk_size;
3703             break;
3704         }
3705 
3706         default: {
3707             *offset += chunk_size;
3708             break;
3709         }
3710     }
3711     return OK;
3712 }
3713 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)3714 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3715         off64_t offset, off64_t /* size */) {
3716     ALOGV("parseSampleAuxiliaryInformationSizes");
3717     // 14496-12 8.7.12
3718     uint8_t version;
3719     if (mDataSource->readAt(
3720             offset, &version, sizeof(version))
3721             < (ssize_t)sizeof(version)) {
3722         return ERROR_IO;
3723     }
3724 
3725     if (version != 0) {
3726         return ERROR_UNSUPPORTED;
3727     }
3728     offset++;
3729 
3730     uint32_t flags;
3731     if (!mDataSource->getUInt24(offset, &flags)) {
3732         return ERROR_IO;
3733     }
3734     offset += 3;
3735 
3736     if (flags & 1) {
3737         uint32_t tmp;
3738         if (!mDataSource->getUInt32(offset, &tmp)) {
3739             return ERROR_MALFORMED;
3740         }
3741         mCurrentAuxInfoType = tmp;
3742         offset += 4;
3743         if (!mDataSource->getUInt32(offset, &tmp)) {
3744             return ERROR_MALFORMED;
3745         }
3746         mCurrentAuxInfoTypeParameter = tmp;
3747         offset += 4;
3748     }
3749 
3750     uint8_t defsize;
3751     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3752         return ERROR_MALFORMED;
3753     }
3754     mCurrentDefaultSampleInfoSize = defsize;
3755     offset++;
3756 
3757     uint32_t smplcnt;
3758     if (!mDataSource->getUInt32(offset, &smplcnt)) {
3759         return ERROR_MALFORMED;
3760     }
3761     mCurrentSampleInfoCount = smplcnt;
3762     offset += 4;
3763 
3764     if (mCurrentDefaultSampleInfoSize != 0) {
3765         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3766         return OK;
3767     }
3768     if (smplcnt > mCurrentSampleInfoAllocSize) {
3769         mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3770         mCurrentSampleInfoAllocSize = smplcnt;
3771     }
3772 
3773     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3774     return OK;
3775 }
3776 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)3777 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3778         off64_t offset, off64_t /* size */) {
3779     ALOGV("parseSampleAuxiliaryInformationOffsets");
3780     // 14496-12 8.7.13
3781     uint8_t version;
3782     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3783         return ERROR_IO;
3784     }
3785     offset++;
3786 
3787     uint32_t flags;
3788     if (!mDataSource->getUInt24(offset, &flags)) {
3789         return ERROR_IO;
3790     }
3791     offset += 3;
3792 
3793     uint32_t entrycount;
3794     if (!mDataSource->getUInt32(offset, &entrycount)) {
3795         return ERROR_IO;
3796     }
3797     offset += 4;
3798     if (entrycount == 0) {
3799         return OK;
3800     }
3801     if (entrycount > UINT32_MAX / 8) {
3802         return ERROR_MALFORMED;
3803     }
3804 
3805     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3806         uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3807         if (newPtr == NULL) {
3808             return NO_MEMORY;
3809         }
3810         mCurrentSampleInfoOffsets = newPtr;
3811         mCurrentSampleInfoOffsetsAllocSize = entrycount;
3812     }
3813     mCurrentSampleInfoOffsetCount = entrycount;
3814 
3815     if (mCurrentSampleInfoOffsets == NULL) {
3816         return OK;
3817     }
3818 
3819     for (size_t i = 0; i < entrycount; i++) {
3820         if (version == 0) {
3821             uint32_t tmp;
3822             if (!mDataSource->getUInt32(offset, &tmp)) {
3823                 return ERROR_IO;
3824             }
3825             mCurrentSampleInfoOffsets[i] = tmp;
3826             offset += 4;
3827         } else {
3828             uint64_t tmp;
3829             if (!mDataSource->getUInt64(offset, &tmp)) {
3830                 return ERROR_IO;
3831             }
3832             mCurrentSampleInfoOffsets[i] = tmp;
3833             offset += 8;
3834         }
3835     }
3836 
3837     // parse clear/encrypted data
3838 
3839     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3840 
3841     drmoffset += mCurrentMoofOffset;
3842     int ivlength;
3843     CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3844 
3845     // only 0, 8 and 16 byte initialization vectors are supported
3846     if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
3847         ALOGW("unsupported IV length: %d", ivlength);
3848         return ERROR_MALFORMED;
3849     }
3850     // read CencSampleAuxiliaryDataFormats
3851     for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3852         if (i >= mCurrentSamples.size()) {
3853             ALOGW("too few samples");
3854             break;
3855         }
3856         Sample *smpl = &mCurrentSamples.editItemAt(i);
3857 
3858         memset(smpl->iv, 0, 16);
3859         if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3860             return ERROR_IO;
3861         }
3862 
3863         drmoffset += ivlength;
3864 
3865         int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3866         if (smplinfosize == 0) {
3867             smplinfosize = mCurrentSampleInfoSizes[i];
3868         }
3869         if (smplinfosize > ivlength) {
3870             uint16_t numsubsamples;
3871             if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3872                 return ERROR_IO;
3873             }
3874             drmoffset += 2;
3875             for (size_t j = 0; j < numsubsamples; j++) {
3876                 uint16_t numclear;
3877                 uint32_t numencrypted;
3878                 if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3879                     return ERROR_IO;
3880                 }
3881                 drmoffset += 2;
3882                 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3883                     return ERROR_IO;
3884                 }
3885                 drmoffset += 4;
3886                 smpl->clearsizes.add(numclear);
3887                 smpl->encryptedsizes.add(numencrypted);
3888             }
3889         } else {
3890             smpl->clearsizes.add(0);
3891             smpl->encryptedsizes.add(smpl->size);
3892         }
3893     }
3894 
3895 
3896     return OK;
3897 }
3898 
parseTrackFragmentHeader(off64_t offset,off64_t size)3899 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3900 
3901     if (size < 8) {
3902         return -EINVAL;
3903     }
3904 
3905     uint32_t flags;
3906     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3907         return ERROR_MALFORMED;
3908     }
3909 
3910     if (flags & 0xff000000) {
3911         return -EINVAL;
3912     }
3913 
3914     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3915         return ERROR_MALFORMED;
3916     }
3917 
3918     if (mLastParsedTrackId != mTrackId) {
3919         // this is not the right track, skip it
3920         return OK;
3921     }
3922 
3923     mTrackFragmentHeaderInfo.mFlags = flags;
3924     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3925     offset += 8;
3926     size -= 8;
3927 
3928     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3929 
3930     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3931         if (size < 8) {
3932             return -EINVAL;
3933         }
3934 
3935         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3936             return ERROR_MALFORMED;
3937         }
3938         offset += 8;
3939         size -= 8;
3940     }
3941 
3942     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3943         if (size < 4) {
3944             return -EINVAL;
3945         }
3946 
3947         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3948             return ERROR_MALFORMED;
3949         }
3950         offset += 4;
3951         size -= 4;
3952     }
3953 
3954     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3955         if (size < 4) {
3956             return -EINVAL;
3957         }
3958 
3959         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3960             return ERROR_MALFORMED;
3961         }
3962         offset += 4;
3963         size -= 4;
3964     }
3965 
3966     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3967         if (size < 4) {
3968             return -EINVAL;
3969         }
3970 
3971         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3972             return ERROR_MALFORMED;
3973         }
3974         offset += 4;
3975         size -= 4;
3976     }
3977 
3978     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3979         if (size < 4) {
3980             return -EINVAL;
3981         }
3982 
3983         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3984             return ERROR_MALFORMED;
3985         }
3986         offset += 4;
3987         size -= 4;
3988     }
3989 
3990     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3991         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3992     }
3993 
3994     mTrackFragmentHeaderInfo.mDataOffset = 0;
3995     return OK;
3996 }
3997 
parseTrackFragmentRun(off64_t offset,off64_t size)3998 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3999 
4000     ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4001     if (size < 8) {
4002         return -EINVAL;
4003     }
4004 
4005     enum {
4006         kDataOffsetPresent                  = 0x01,
4007         kFirstSampleFlagsPresent            = 0x04,
4008         kSampleDurationPresent              = 0x100,
4009         kSampleSizePresent                  = 0x200,
4010         kSampleFlagsPresent                 = 0x400,
4011         kSampleCompositionTimeOffsetPresent = 0x800,
4012     };
4013 
4014     uint32_t flags;
4015     if (!mDataSource->getUInt32(offset, &flags)) {
4016         return ERROR_MALFORMED;
4017     }
4018     ALOGV("fragment run flags: %08x", flags);
4019 
4020     if (flags & 0xff000000) {
4021         return -EINVAL;
4022     }
4023 
4024     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4025         // These two shall not be used together.
4026         return -EINVAL;
4027     }
4028 
4029     uint32_t sampleCount;
4030     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4031         return ERROR_MALFORMED;
4032     }
4033     offset += 8;
4034     size -= 8;
4035 
4036     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4037 
4038     uint32_t firstSampleFlags = 0;
4039 
4040     if (flags & kDataOffsetPresent) {
4041         if (size < 4) {
4042             return -EINVAL;
4043         }
4044 
4045         int32_t dataOffsetDelta;
4046         if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4047             return ERROR_MALFORMED;
4048         }
4049 
4050         dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4051 
4052         offset += 4;
4053         size -= 4;
4054     }
4055 
4056     if (flags & kFirstSampleFlagsPresent) {
4057         if (size < 4) {
4058             return -EINVAL;
4059         }
4060 
4061         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4062             return ERROR_MALFORMED;
4063         }
4064         offset += 4;
4065         size -= 4;
4066     }
4067 
4068     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4069              sampleCtsOffset = 0;
4070 
4071     size_t bytesPerSample = 0;
4072     if (flags & kSampleDurationPresent) {
4073         bytesPerSample += 4;
4074     } else if (mTrackFragmentHeaderInfo.mFlags
4075             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4076         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4077     } else if (mTrex) {
4078         sampleDuration = mTrex->default_sample_duration;
4079     }
4080 
4081     if (flags & kSampleSizePresent) {
4082         bytesPerSample += 4;
4083     } else if (mTrackFragmentHeaderInfo.mFlags
4084             & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4085         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4086     } else {
4087         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4088     }
4089 
4090     if (flags & kSampleFlagsPresent) {
4091         bytesPerSample += 4;
4092     } else if (mTrackFragmentHeaderInfo.mFlags
4093             & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4094         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4095     } else {
4096         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4097     }
4098 
4099     if (flags & kSampleCompositionTimeOffsetPresent) {
4100         bytesPerSample += 4;
4101     } else {
4102         sampleCtsOffset = 0;
4103     }
4104 
4105     if (size < (off64_t)(sampleCount * bytesPerSample)) {
4106         return -EINVAL;
4107     }
4108 
4109     Sample tmp;
4110     for (uint32_t i = 0; i < sampleCount; ++i) {
4111         if (flags & kSampleDurationPresent) {
4112             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4113                 return ERROR_MALFORMED;
4114             }
4115             offset += 4;
4116         }
4117 
4118         if (flags & kSampleSizePresent) {
4119             if (!mDataSource->getUInt32(offset, &sampleSize)) {
4120                 return ERROR_MALFORMED;
4121             }
4122             offset += 4;
4123         }
4124 
4125         if (flags & kSampleFlagsPresent) {
4126             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4127                 return ERROR_MALFORMED;
4128             }
4129             offset += 4;
4130         }
4131 
4132         if (flags & kSampleCompositionTimeOffsetPresent) {
4133             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4134                 return ERROR_MALFORMED;
4135             }
4136             offset += 4;
4137         }
4138 
4139         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4140               " flags 0x%08x", i + 1,
4141                 dataOffset, sampleSize, sampleDuration,
4142                 (flags & kFirstSampleFlagsPresent) && i == 0
4143                     ? firstSampleFlags : sampleFlags);
4144         tmp.offset = dataOffset;
4145         tmp.size = sampleSize;
4146         tmp.duration = sampleDuration;
4147         tmp.compositionOffset = sampleCtsOffset;
4148         mCurrentSamples.add(tmp);
4149 
4150         dataOffset += sampleSize;
4151     }
4152 
4153     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4154 
4155     return OK;
4156 }
4157 
getFormat()4158 sp<MetaData> MPEG4Source::getFormat() {
4159     Mutex::Autolock autoLock(mLock);
4160 
4161     return mFormat;
4162 }
4163 
parseNALSize(const uint8_t * data) const4164 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4165     switch (mNALLengthSize) {
4166         case 1:
4167             return *data;
4168         case 2:
4169             return U16_AT(data);
4170         case 3:
4171             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4172         case 4:
4173             return U32_AT(data);
4174     }
4175 
4176     // This cannot happen, mNALLengthSize springs to life by adding 1 to
4177     // a 2-bit integer.
4178     CHECK(!"Should not be here.");
4179 
4180     return 0;
4181 }
4182 
read(MediaBuffer ** out,const ReadOptions * options)4183 status_t MPEG4Source::read(
4184         MediaBuffer **out, const ReadOptions *options) {
4185     Mutex::Autolock autoLock(mLock);
4186 
4187     CHECK(mStarted);
4188 
4189     if (mFirstMoofOffset > 0) {
4190         return fragmentedRead(out, options);
4191     }
4192 
4193     *out = NULL;
4194 
4195     int64_t targetSampleTimeUs = -1;
4196 
4197     int64_t seekTimeUs;
4198     ReadOptions::SeekMode mode;
4199     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4200         uint32_t findFlags = 0;
4201         switch (mode) {
4202             case ReadOptions::SEEK_PREVIOUS_SYNC:
4203                 findFlags = SampleTable::kFlagBefore;
4204                 break;
4205             case ReadOptions::SEEK_NEXT_SYNC:
4206                 findFlags = SampleTable::kFlagAfter;
4207                 break;
4208             case ReadOptions::SEEK_CLOSEST_SYNC:
4209             case ReadOptions::SEEK_CLOSEST:
4210                 findFlags = SampleTable::kFlagClosest;
4211                 break;
4212             default:
4213                 CHECK(!"Should not be here.");
4214                 break;
4215         }
4216 
4217         uint32_t sampleIndex;
4218         status_t err = mSampleTable->findSampleAtTime(
4219                 seekTimeUs, 1000000, mTimescale,
4220                 &sampleIndex, findFlags);
4221 
4222         if (mode == ReadOptions::SEEK_CLOSEST) {
4223             // We found the closest sample already, now we want the sync
4224             // sample preceding it (or the sample itself of course), even
4225             // if the subsequent sync sample is closer.
4226             findFlags = SampleTable::kFlagBefore;
4227         }
4228 
4229         uint32_t syncSampleIndex;
4230         if (err == OK) {
4231             err = mSampleTable->findSyncSampleNear(
4232                     sampleIndex, &syncSampleIndex, findFlags);
4233         }
4234 
4235         uint32_t sampleTime;
4236         if (err == OK) {
4237             err = mSampleTable->getMetaDataForSample(
4238                     sampleIndex, NULL, NULL, &sampleTime);
4239         }
4240 
4241         if (err != OK) {
4242             if (err == ERROR_OUT_OF_RANGE) {
4243                 // An attempt to seek past the end of the stream would
4244                 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4245                 // this all the way to the MediaPlayer would cause abnormal
4246                 // termination. Legacy behaviour appears to be to behave as if
4247                 // we had seeked to the end of stream, ending normally.
4248                 err = ERROR_END_OF_STREAM;
4249             }
4250             ALOGV("end of stream");
4251             return err;
4252         }
4253 
4254         if (mode == ReadOptions::SEEK_CLOSEST) {
4255             targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4256         }
4257 
4258 #if 0
4259         uint32_t syncSampleTime;
4260         CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4261                     syncSampleIndex, NULL, NULL, &syncSampleTime));
4262 
4263         ALOGI("seek to time %lld us => sample at time %lld us, "
4264              "sync sample at time %lld us",
4265              seekTimeUs,
4266              sampleTime * 1000000ll / mTimescale,
4267              syncSampleTime * 1000000ll / mTimescale);
4268 #endif
4269 
4270         mCurrentSampleIndex = syncSampleIndex;
4271         if (mBuffer != NULL) {
4272             mBuffer->release();
4273             mBuffer = NULL;
4274         }
4275 
4276         // fall through
4277     }
4278 
4279     off64_t offset;
4280     size_t size;
4281     uint32_t cts, stts;
4282     bool isSyncSample;
4283     bool newBuffer = false;
4284     if (mBuffer == NULL) {
4285         newBuffer = true;
4286 
4287         status_t err =
4288             mSampleTable->getMetaDataForSample(
4289                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4290 
4291         if (err != OK) {
4292             return err;
4293         }
4294 
4295         err = mGroup->acquire_buffer(&mBuffer);
4296 
4297         if (err != OK) {
4298             CHECK(mBuffer == NULL);
4299             return err;
4300         }
4301         if (size > mBuffer->size()) {
4302             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4303             return ERROR_BUFFER_TOO_SMALL;
4304         }
4305     }
4306 
4307     if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4308         if (newBuffer) {
4309             ssize_t num_bytes_read =
4310                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4311 
4312             if (num_bytes_read < (ssize_t)size) {
4313                 mBuffer->release();
4314                 mBuffer = NULL;
4315 
4316                 return ERROR_IO;
4317             }
4318 
4319             CHECK(mBuffer != NULL);
4320             mBuffer->set_range(0, size);
4321             mBuffer->meta_data()->clear();
4322             mBuffer->meta_data()->setInt64(
4323                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4324             mBuffer->meta_data()->setInt64(
4325                     kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4326 
4327             if (targetSampleTimeUs >= 0) {
4328                 mBuffer->meta_data()->setInt64(
4329                         kKeyTargetTime, targetSampleTimeUs);
4330             }
4331 
4332             if (isSyncSample) {
4333                 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4334             }
4335 
4336             ++mCurrentSampleIndex;
4337         }
4338 
4339         if (!mIsAVC && !mIsHEVC) {
4340             *out = mBuffer;
4341             mBuffer = NULL;
4342 
4343             return OK;
4344         }
4345 
4346         // Each NAL unit is split up into its constituent fragments and
4347         // each one of them returned in its own buffer.
4348 
4349         CHECK(mBuffer->range_length() >= mNALLengthSize);
4350 
4351         const uint8_t *src =
4352             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4353 
4354         size_t nal_size = parseNALSize(src);
4355         if (mNALLengthSize > SIZE_MAX - nal_size) {
4356             ALOGE("b/24441553, b/24445122");
4357         }
4358         if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4359             ALOGE("incomplete NAL unit.");
4360 
4361             mBuffer->release();
4362             mBuffer = NULL;
4363 
4364             return ERROR_MALFORMED;
4365         }
4366 
4367         MediaBuffer *clone = mBuffer->clone();
4368         CHECK(clone != NULL);
4369         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4370 
4371         CHECK(mBuffer != NULL);
4372         mBuffer->set_range(
4373                 mBuffer->range_offset() + mNALLengthSize + nal_size,
4374                 mBuffer->range_length() - mNALLengthSize - nal_size);
4375 
4376         if (mBuffer->range_length() == 0) {
4377             mBuffer->release();
4378             mBuffer = NULL;
4379         }
4380 
4381         *out = clone;
4382 
4383         return OK;
4384     } else {
4385         // Whole NAL units are returned but each fragment is prefixed by
4386         // the start code (0x00 00 00 01).
4387         ssize_t num_bytes_read = 0;
4388         int32_t drm = 0;
4389         bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4390         if (usesDRM) {
4391             num_bytes_read =
4392                 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4393         } else {
4394             num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4395         }
4396 
4397         if (num_bytes_read < (ssize_t)size) {
4398             mBuffer->release();
4399             mBuffer = NULL;
4400 
4401             return ERROR_IO;
4402         }
4403 
4404         if (usesDRM) {
4405             CHECK(mBuffer != NULL);
4406             mBuffer->set_range(0, size);
4407 
4408         } else {
4409             uint8_t *dstData = (uint8_t *)mBuffer->data();
4410             size_t srcOffset = 0;
4411             size_t dstOffset = 0;
4412 
4413             while (srcOffset < size) {
4414                 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4415                 size_t nalLength = 0;
4416                 if (!isMalFormed) {
4417                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4418                     srcOffset += mNALLengthSize;
4419                     isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4420                 }
4421 
4422                 if (isMalFormed) {
4423                     ALOGE("Video is malformed");
4424                     mBuffer->release();
4425                     mBuffer = NULL;
4426                     return ERROR_MALFORMED;
4427                 }
4428 
4429                 if (nalLength == 0) {
4430                     continue;
4431                 }
4432 
4433                 if (dstOffset > SIZE_MAX - 4 ||
4434                         dstOffset + 4 > SIZE_MAX - nalLength ||
4435                         dstOffset + 4 + nalLength > mBuffer->size()) {
4436                     ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4437                     android_errorWriteLog(0x534e4554, "27208621");
4438                     mBuffer->release();
4439                     mBuffer = NULL;
4440                     return ERROR_MALFORMED;
4441                 }
4442 
4443                 dstData[dstOffset++] = 0;
4444                 dstData[dstOffset++] = 0;
4445                 dstData[dstOffset++] = 0;
4446                 dstData[dstOffset++] = 1;
4447                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4448                 srcOffset += nalLength;
4449                 dstOffset += nalLength;
4450             }
4451             CHECK_EQ(srcOffset, size);
4452             CHECK(mBuffer != NULL);
4453             mBuffer->set_range(0, dstOffset);
4454         }
4455 
4456         mBuffer->meta_data()->clear();
4457         mBuffer->meta_data()->setInt64(
4458                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4459         mBuffer->meta_data()->setInt64(
4460                 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4461 
4462         if (targetSampleTimeUs >= 0) {
4463             mBuffer->meta_data()->setInt64(
4464                     kKeyTargetTime, targetSampleTimeUs);
4465         }
4466 
4467         if (isSyncSample) {
4468             mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4469         }
4470 
4471         ++mCurrentSampleIndex;
4472 
4473         *out = mBuffer;
4474         mBuffer = NULL;
4475 
4476         return OK;
4477     }
4478 }
4479 
fragmentedRead(MediaBuffer ** out,const ReadOptions * options)4480 status_t MPEG4Source::fragmentedRead(
4481         MediaBuffer **out, const ReadOptions *options) {
4482 
4483     ALOGV("MPEG4Source::fragmentedRead");
4484 
4485     CHECK(mStarted);
4486 
4487     *out = NULL;
4488 
4489     int64_t targetSampleTimeUs = -1;
4490 
4491     int64_t seekTimeUs;
4492     ReadOptions::SeekMode mode;
4493     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4494 
4495         int numSidxEntries = mSegments.size();
4496         if (numSidxEntries != 0) {
4497             int64_t totalTime = 0;
4498             off64_t totalOffset = mFirstMoofOffset;
4499             for (int i = 0; i < numSidxEntries; i++) {
4500                 const SidxEntry *se = &mSegments[i];
4501                 if (totalTime + se->mDurationUs > seekTimeUs) {
4502                     // The requested time is somewhere in this segment
4503                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4504                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4505                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4506                         // requested next sync, or closest sync and it was closer to the end of
4507                         // this segment
4508                         totalTime += se->mDurationUs;
4509                         totalOffset += se->mSize;
4510                     }
4511                     break;
4512                 }
4513                 totalTime += se->mDurationUs;
4514                 totalOffset += se->mSize;
4515             }
4516             mCurrentMoofOffset = totalOffset;
4517             mCurrentSamples.clear();
4518             mCurrentSampleIndex = 0;
4519             parseChunk(&totalOffset);
4520             mCurrentTime = totalTime * mTimescale / 1000000ll;
4521         } else {
4522             // without sidx boxes, we can only seek to 0
4523             mCurrentMoofOffset = mFirstMoofOffset;
4524             mCurrentSamples.clear();
4525             mCurrentSampleIndex = 0;
4526             off64_t tmp = mCurrentMoofOffset;
4527             parseChunk(&tmp);
4528             mCurrentTime = 0;
4529         }
4530 
4531         if (mBuffer != NULL) {
4532             mBuffer->release();
4533             mBuffer = NULL;
4534         }
4535 
4536         // fall through
4537     }
4538 
4539     off64_t offset = 0;
4540     size_t size = 0;
4541     uint32_t cts = 0;
4542     bool isSyncSample = false;
4543     bool newBuffer = false;
4544     if (mBuffer == NULL) {
4545         newBuffer = true;
4546 
4547         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4548             // move to next fragment if there is one
4549             if (mNextMoofOffset <= mCurrentMoofOffset) {
4550                 return ERROR_END_OF_STREAM;
4551             }
4552             off64_t nextMoof = mNextMoofOffset;
4553             mCurrentMoofOffset = nextMoof;
4554             mCurrentSamples.clear();
4555             mCurrentSampleIndex = 0;
4556             parseChunk(&nextMoof);
4557             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4558                 return ERROR_END_OF_STREAM;
4559             }
4560         }
4561 
4562         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4563         offset = smpl->offset;
4564         size = smpl->size;
4565         cts = mCurrentTime + smpl->compositionOffset;
4566         mCurrentTime += smpl->duration;
4567         isSyncSample = (mCurrentSampleIndex == 0); // XXX
4568 
4569         status_t err = mGroup->acquire_buffer(&mBuffer);
4570 
4571         if (err != OK) {
4572             CHECK(mBuffer == NULL);
4573             ALOGV("acquire_buffer returned %d", err);
4574             return err;
4575         }
4576         if (size > mBuffer->size()) {
4577             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4578             return ERROR_BUFFER_TOO_SMALL;
4579         }
4580     }
4581 
4582     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4583     const sp<MetaData> bufmeta = mBuffer->meta_data();
4584     bufmeta->clear();
4585     if (smpl->encryptedsizes.size()) {
4586         // store clear/encrypted lengths in metadata
4587         bufmeta->setData(kKeyPlainSizes, 0,
4588                 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4589         bufmeta->setData(kKeyEncryptedSizes, 0,
4590                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4591         bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4592         bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4593         bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4594         bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4595     }
4596 
4597     if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4598         if (newBuffer) {
4599             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
4600                 mBuffer->release();
4601                 mBuffer = NULL;
4602 
4603                 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
4604                 return ERROR_MALFORMED;
4605             }
4606 
4607             ssize_t num_bytes_read =
4608                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4609 
4610             if (num_bytes_read < (ssize_t)size) {
4611                 mBuffer->release();
4612                 mBuffer = NULL;
4613 
4614                 ALOGE("i/o error");
4615                 return ERROR_IO;
4616             }
4617 
4618             CHECK(mBuffer != NULL);
4619             mBuffer->set_range(0, size);
4620             mBuffer->meta_data()->setInt64(
4621                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4622             mBuffer->meta_data()->setInt64(
4623                     kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4624 
4625             if (targetSampleTimeUs >= 0) {
4626                 mBuffer->meta_data()->setInt64(
4627                         kKeyTargetTime, targetSampleTimeUs);
4628             }
4629 
4630             if (isSyncSample) {
4631                 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4632             }
4633 
4634             ++mCurrentSampleIndex;
4635         }
4636 
4637         if (!mIsAVC && !mIsHEVC) {
4638             *out = mBuffer;
4639             mBuffer = NULL;
4640 
4641             return OK;
4642         }
4643 
4644         // Each NAL unit is split up into its constituent fragments and
4645         // each one of them returned in its own buffer.
4646 
4647         CHECK(mBuffer->range_length() >= mNALLengthSize);
4648 
4649         const uint8_t *src =
4650             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4651 
4652         size_t nal_size = parseNALSize(src);
4653         if (mNALLengthSize > SIZE_MAX - nal_size) {
4654             ALOGE("b/24441553, b/24445122");
4655         }
4656 
4657         if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4658             ALOGE("incomplete NAL unit.");
4659 
4660             mBuffer->release();
4661             mBuffer = NULL;
4662 
4663             return ERROR_MALFORMED;
4664         }
4665 
4666         MediaBuffer *clone = mBuffer->clone();
4667         CHECK(clone != NULL);
4668         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4669 
4670         CHECK(mBuffer != NULL);
4671         mBuffer->set_range(
4672                 mBuffer->range_offset() + mNALLengthSize + nal_size,
4673                 mBuffer->range_length() - mNALLengthSize - nal_size);
4674 
4675         if (mBuffer->range_length() == 0) {
4676             mBuffer->release();
4677             mBuffer = NULL;
4678         }
4679 
4680         *out = clone;
4681 
4682         return OK;
4683     } else {
4684         ALOGV("whole NAL");
4685         // Whole NAL units are returned but each fragment is prefixed by
4686         // the start code (0x00 00 00 01).
4687         ssize_t num_bytes_read = 0;
4688         int32_t drm = 0;
4689         bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4690         void *data = NULL;
4691         bool isMalFormed = false;
4692         if (usesDRM) {
4693             if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
4694                 isMalFormed = true;
4695             } else {
4696                 data = mBuffer->data();
4697             }
4698         } else {
4699             int32_t max_size;
4700             if (mFormat == NULL
4701                     || !mFormat->findInt32(kKeyMaxInputSize, &max_size)
4702                     || !isInRange((size_t)0u, (size_t)max_size, size)) {
4703                 isMalFormed = true;
4704             } else {
4705                 data = mSrcBuffer;
4706             }
4707         }
4708 
4709         if (isMalFormed || data == NULL) {
4710             ALOGE("isMalFormed size %zu", size);
4711             if (mBuffer != NULL) {
4712                 mBuffer->release();
4713                 mBuffer = NULL;
4714             }
4715             return ERROR_MALFORMED;
4716         }
4717         num_bytes_read = mDataSource->readAt(offset, data, size);
4718 
4719         if (num_bytes_read < (ssize_t)size) {
4720             mBuffer->release();
4721             mBuffer = NULL;
4722 
4723             ALOGE("i/o error");
4724             return ERROR_IO;
4725         }
4726 
4727         if (usesDRM) {
4728             CHECK(mBuffer != NULL);
4729             mBuffer->set_range(0, size);
4730 
4731         } else {
4732             uint8_t *dstData = (uint8_t *)mBuffer->data();
4733             size_t srcOffset = 0;
4734             size_t dstOffset = 0;
4735 
4736             while (srcOffset < size) {
4737                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4738                 size_t nalLength = 0;
4739                 if (!isMalFormed) {
4740                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4741                     srcOffset += mNALLengthSize;
4742                     isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
4743                             || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
4744                             || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
4745                 }
4746 
4747                 if (isMalFormed) {
4748                     ALOGE("Video is malformed; nalLength %zu", nalLength);
4749                     mBuffer->release();
4750                     mBuffer = NULL;
4751                     return ERROR_MALFORMED;
4752                 }
4753 
4754                 if (nalLength == 0) {
4755                     continue;
4756                 }
4757 
4758                 if (dstOffset > SIZE_MAX - 4 ||
4759                         dstOffset + 4 > SIZE_MAX - nalLength ||
4760                         dstOffset + 4 + nalLength > mBuffer->size()) {
4761                     ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
4762                     android_errorWriteLog(0x534e4554, "26365349");
4763                     mBuffer->release();
4764                     mBuffer = NULL;
4765                     return ERROR_MALFORMED;
4766                 }
4767 
4768                 dstData[dstOffset++] = 0;
4769                 dstData[dstOffset++] = 0;
4770                 dstData[dstOffset++] = 0;
4771                 dstData[dstOffset++] = 1;
4772                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4773                 srcOffset += nalLength;
4774                 dstOffset += nalLength;
4775             }
4776             CHECK_EQ(srcOffset, size);
4777             CHECK(mBuffer != NULL);
4778             mBuffer->set_range(0, dstOffset);
4779         }
4780 
4781         mBuffer->meta_data()->setInt64(
4782                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4783         mBuffer->meta_data()->setInt64(
4784                 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4785 
4786         if (targetSampleTimeUs >= 0) {
4787             mBuffer->meta_data()->setInt64(
4788                     kKeyTargetTime, targetSampleTimeUs);
4789         }
4790 
4791         if (isSyncSample) {
4792             mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4793         }
4794 
4795         ++mCurrentSampleIndex;
4796 
4797         *out = mBuffer;
4798         mBuffer = NULL;
4799 
4800         return OK;
4801     }
4802 }
4803 
findTrackByMimePrefix(const char * mimePrefix)4804 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4805         const char *mimePrefix) {
4806     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4807         const char *mime;
4808         if (track->meta != NULL
4809                 && track->meta->findCString(kKeyMIMEType, &mime)
4810                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4811             return track;
4812         }
4813     }
4814 
4815     return NULL;
4816 }
4817 
LegacySniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence)4818 static bool LegacySniffMPEG4(
4819         const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4820     uint8_t header[8];
4821 
4822     ssize_t n = source->readAt(4, header, sizeof(header));
4823     if (n < (ssize_t)sizeof(header)) {
4824         return false;
4825     }
4826 
4827     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4828         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4829         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4830         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4831         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4832         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4833         *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4834         *confidence = 0.4;
4835 
4836         return true;
4837     }
4838 
4839     return false;
4840 }
4841 
isCompatibleBrand(uint32_t fourcc)4842 static bool isCompatibleBrand(uint32_t fourcc) {
4843     static const uint32_t kCompatibleBrands[] = {
4844         FOURCC('i', 's', 'o', 'm'),
4845         FOURCC('i', 's', 'o', '2'),
4846         FOURCC('a', 'v', 'c', '1'),
4847         FOURCC('h', 'v', 'c', '1'),
4848         FOURCC('h', 'e', 'v', '1'),
4849         FOURCC('3', 'g', 'p', '4'),
4850         FOURCC('m', 'p', '4', '1'),
4851         FOURCC('m', 'p', '4', '2'),
4852 
4853         // Won't promise that the following file types can be played.
4854         // Just give these file types a chance.
4855         FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4856         FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4857 
4858         FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4859         FOURCC('3', 'g', '2', 'b'),
4860     };
4861 
4862     for (size_t i = 0;
4863          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4864          ++i) {
4865         if (kCompatibleBrands[i] == fourcc) {
4866             return true;
4867         }
4868     }
4869 
4870     return false;
4871 }
4872 
4873 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
4874 // compatible brand is present.
4875 // Also try to identify where this file's metadata ends
4876 // (end of the 'moov' atom) and report it to the caller as part of
4877 // the metadata.
BetterSniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4878 static bool BetterSniffMPEG4(
4879         const sp<DataSource> &source, String8 *mimeType, float *confidence,
4880         sp<AMessage> *meta) {
4881     // We scan up to 128 bytes to identify this file as an MP4.
4882     static const off64_t kMaxScanOffset = 128ll;
4883 
4884     off64_t offset = 0ll;
4885     bool foundGoodFileType = false;
4886     off64_t moovAtomEndOffset = -1ll;
4887     bool done = false;
4888 
4889     while (!done && offset < kMaxScanOffset) {
4890         uint32_t hdr[2];
4891         if (source->readAt(offset, hdr, 8) < 8) {
4892             return false;
4893         }
4894 
4895         uint64_t chunkSize = ntohl(hdr[0]);
4896         uint32_t chunkType = ntohl(hdr[1]);
4897         off64_t chunkDataOffset = offset + 8;
4898 
4899         if (chunkSize == 1) {
4900             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4901                 return false;
4902             }
4903 
4904             chunkSize = ntoh64(chunkSize);
4905             chunkDataOffset += 8;
4906 
4907             if (chunkSize < 16) {
4908                 // The smallest valid chunk is 16 bytes long in this case.
4909                 return false;
4910             }
4911 
4912         } else if (chunkSize < 8) {
4913             // The smallest valid chunk is 8 bytes long.
4914             return false;
4915         }
4916 
4917         // (data_offset - offset) is either 8 or 16
4918         off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
4919         if (chunkDataSize < 0) {
4920             ALOGE("b/23540914");
4921             return ERROR_MALFORMED;
4922         }
4923 
4924         char chunkstring[5];
4925         MakeFourCCString(chunkType, chunkstring);
4926         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
4927         switch (chunkType) {
4928             case FOURCC('f', 't', 'y', 'p'):
4929             {
4930                 if (chunkDataSize < 8) {
4931                     return false;
4932                 }
4933 
4934                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4935                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4936                     if (i == 1) {
4937                         // Skip this index, it refers to the minorVersion,
4938                         // not a brand.
4939                         continue;
4940                     }
4941 
4942                     uint32_t brand;
4943                     if (source->readAt(
4944                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
4945                         return false;
4946                     }
4947 
4948                     brand = ntohl(brand);
4949 
4950                     if (isCompatibleBrand(brand)) {
4951                         foundGoodFileType = true;
4952                         break;
4953                     }
4954                 }
4955 
4956                 if (!foundGoodFileType) {
4957                     return false;
4958                 }
4959 
4960                 break;
4961             }
4962 
4963             case FOURCC('m', 'o', 'o', 'v'):
4964             {
4965                 moovAtomEndOffset = offset + chunkSize;
4966 
4967                 done = true;
4968                 break;
4969             }
4970 
4971             default:
4972                 break;
4973         }
4974 
4975         offset += chunkSize;
4976     }
4977 
4978     if (!foundGoodFileType) {
4979         return false;
4980     }
4981 
4982     *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4983     *confidence = 0.4f;
4984 
4985     if (moovAtomEndOffset >= 0) {
4986         *meta = new AMessage;
4987         (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4988 
4989         ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset);
4990     }
4991 
4992     return true;
4993 }
4994 
SniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4995 bool SniffMPEG4(
4996         const sp<DataSource> &source, String8 *mimeType, float *confidence,
4997         sp<AMessage> *meta) {
4998     if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4999         return true;
5000     }
5001 
5002     if (LegacySniffMPEG4(source, mimeType, confidence)) {
5003         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5004         return true;
5005     }
5006 
5007     return false;
5008 }
5009 
5010 }  // namespace android
5011