1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include <utils/Log.h>
28 
29 #include "MPEG4Extractor.h"
30 #include "SampleTable.h"
31 #include "ItemTable.h"
32 #include "include/ESDS.h"
33 
34 #include <media/ExtractorUtils.h>
35 #include <media/MediaTrack.h>
36 #include <media/stagefright/foundation/ABitReader.h>
37 #include <media/stagefright/foundation/ABuffer.h>
38 #include <media/stagefright/foundation/ADebug.h>
39 #include <media/stagefright/foundation/AMessage.h>
40 #include <media/stagefright/foundation/AUtils.h>
41 #include <media/stagefright/foundation/ByteUtils.h>
42 #include <media/stagefright/foundation/ColorUtils.h>
43 #include <media/stagefright/foundation/avc_utils.h>
44 #include <media/stagefright/foundation/hexdump.h>
45 #include <media/stagefright/MediaBufferBase.h>
46 #include <media/stagefright/MediaBufferGroup.h>
47 #include <media/stagefright/MediaDefs.h>
48 #include <media/stagefright/MetaData.h>
49 #include <utils/String8.h>
50 
51 #include <byteswap.h>
52 #include "include/ID3.h"
53 
54 #ifndef UINT32_MAX
55 #define UINT32_MAX       (4294967295U)
56 #endif
57 
58 namespace android {
59 
60 enum {
61     // max track header chunk to return
62     kMaxTrackHeaderSize = 32,
63 
64     // maximum size of an atom. Some atoms can be bigger according to the spec,
65     // but we only allow up to this size.
66     kMaxAtomSize = 64 * 1024 * 1024,
67 };
68 
69 class MPEG4Source : public MediaTrack {
70 public:
71     // Caller retains ownership of both "dataSource" and "sampleTable".
72     MPEG4Source(MetaDataBase &format,
73                 DataSourceBase *dataSource,
74                 int32_t timeScale,
75                 const sp<SampleTable> &sampleTable,
76                 Vector<SidxEntry> &sidx,
77                 const Trex *trex,
78                 off64_t firstMoofOffset,
79                 const sp<ItemTable> &itemTable);
80     virtual status_t init();
81 
82     virtual status_t start(MetaDataBase *params = NULL);
83     virtual status_t stop();
84 
85     virtual status_t getFormat(MetaDataBase &);
86 
87     virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
supportNonblockingRead()88     virtual bool supportNonblockingRead() { return true; }
89     virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
90 
91     virtual ~MPEG4Source();
92 
93 private:
94     Mutex mLock;
95 
96     MetaDataBase &mFormat;
97     DataSourceBase *mDataSource;
98     int32_t mTimescale;
99     sp<SampleTable> mSampleTable;
100     uint32_t mCurrentSampleIndex;
101     uint32_t mCurrentFragmentIndex;
102     Vector<SidxEntry> &mSegments;
103     const Trex *mTrex;
104     off64_t mFirstMoofOffset;
105     off64_t mCurrentMoofOffset;
106     off64_t mNextMoofOffset;
107     uint32_t mCurrentTime;
108     int32_t mLastParsedTrackId;
109     int32_t mTrackId;
110 
111     int32_t mCryptoMode;    // passed in from extractor
112     int32_t mDefaultIVSize; // passed in from extractor
113     uint8_t mCryptoKey[16]; // passed in from extractor
114     int32_t mDefaultEncryptedByteBlock;
115     int32_t mDefaultSkipByteBlock;
116     uint32_t mCurrentAuxInfoType;
117     uint32_t mCurrentAuxInfoTypeParameter;
118     int32_t mCurrentDefaultSampleInfoSize;
119     uint32_t mCurrentSampleInfoCount;
120     uint32_t mCurrentSampleInfoAllocSize;
121     uint8_t* mCurrentSampleInfoSizes;
122     uint32_t mCurrentSampleInfoOffsetCount;
123     uint32_t mCurrentSampleInfoOffsetsAllocSize;
124     uint64_t* mCurrentSampleInfoOffsets;
125 
126     bool mIsAVC;
127     bool mIsHEVC;
128     size_t mNALLengthSize;
129 
130     bool mStarted;
131 
132     MediaBufferGroup *mGroup;
133 
134     MediaBufferBase *mBuffer;
135 
136     bool mWantsNALFragments;
137 
138     uint8_t *mSrcBuffer;
139 
140     bool mIsHeif;
141     sp<ItemTable> mItemTable;
142 
143     size_t parseNALSize(const uint8_t *data) const;
144     status_t parseChunk(off64_t *offset);
145     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
146     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
147     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
148     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
149     status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
150     status_t parseSampleEncryption(off64_t offset);
151 
152     struct TrackFragmentHeaderInfo {
153         enum Flags {
154             kBaseDataOffsetPresent         = 0x01,
155             kSampleDescriptionIndexPresent = 0x02,
156             kDefaultSampleDurationPresent  = 0x08,
157             kDefaultSampleSizePresent      = 0x10,
158             kDefaultSampleFlagsPresent     = 0x20,
159             kDurationIsEmpty               = 0x10000,
160         };
161 
162         uint32_t mTrackID;
163         uint32_t mFlags;
164         uint64_t mBaseDataOffset;
165         uint32_t mSampleDescriptionIndex;
166         uint32_t mDefaultSampleDuration;
167         uint32_t mDefaultSampleSize;
168         uint32_t mDefaultSampleFlags;
169 
170         uint64_t mDataOffset;
171     };
172     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
173 
174     struct Sample {
175         off64_t offset;
176         size_t size;
177         uint32_t duration;
178         int32_t compositionOffset;
179         uint8_t iv[16];
180         Vector<size_t> clearsizes;
181         Vector<size_t> encryptedsizes;
182     };
183     Vector<Sample> mCurrentSamples;
184 
185     MPEG4Source(const MPEG4Source &);
186     MPEG4Source &operator=(const MPEG4Source &);
187 };
188 
189 // This custom data source wraps an existing one and satisfies requests
190 // falling entirely within a cached range from the cache while forwarding
191 // all remaining requests to the wrapped datasource.
192 // This is used to cache the full sampletable metadata for a single track,
193 // possibly wrapping multiple times to cover all tracks, i.e.
194 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
195 
196 struct CachedRangedDataSource : public DataSourceBase {
197     explicit CachedRangedDataSource(DataSourceBase *source);
198     virtual ~CachedRangedDataSource();
199 
200     virtual status_t initCheck() const;
201     virtual ssize_t readAt(off64_t offset, void *data, size_t size);
202     virtual status_t getSize(off64_t *size);
203     virtual uint32_t flags();
204 
205     status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
206 
207 
208 private:
209     Mutex mLock;
210 
211     DataSourceBase *mSource;
212     bool mOwnsDataSource;
213     off64_t mCachedOffset;
214     size_t mCachedSize;
215     uint8_t *mCache;
216 
217     void clearCache();
218 
219     CachedRangedDataSource(const CachedRangedDataSource &);
220     CachedRangedDataSource &operator=(const CachedRangedDataSource &);
221 };
222 
CachedRangedDataSource(DataSourceBase * source)223 CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
224     : mSource(source),
225       mOwnsDataSource(false),
226       mCachedOffset(0),
227       mCachedSize(0),
228       mCache(NULL) {
229 }
230 
~CachedRangedDataSource()231 CachedRangedDataSource::~CachedRangedDataSource() {
232     clearCache();
233     if (mOwnsDataSource) {
234         delete (CachedRangedDataSource*)mSource;
235     }
236 }
237 
clearCache()238 void CachedRangedDataSource::clearCache() {
239     if (mCache) {
240         free(mCache);
241         mCache = NULL;
242     }
243 
244     mCachedOffset = 0;
245     mCachedSize = 0;
246 }
247 
initCheck() const248 status_t CachedRangedDataSource::initCheck() const {
249     return mSource->initCheck();
250 }
251 
readAt(off64_t offset,void * data,size_t size)252 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
253     Mutex::Autolock autoLock(mLock);
254 
255     if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
256         memcpy(data, &mCache[offset - mCachedOffset], size);
257         return size;
258     }
259 
260     return mSource->readAt(offset, data, size);
261 }
262 
getSize(off64_t * size)263 status_t CachedRangedDataSource::getSize(off64_t *size) {
264     return mSource->getSize(size);
265 }
266 
flags()267 uint32_t CachedRangedDataSource::flags() {
268     return mSource->flags();
269 }
270 
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)271 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
272         size_t size,
273         bool assumeSourceOwnershipOnSuccess) {
274     Mutex::Autolock autoLock(mLock);
275 
276     clearCache();
277 
278     mCache = (uint8_t *)malloc(size);
279 
280     if (mCache == NULL) {
281         return -ENOMEM;
282     }
283 
284     mCachedOffset = offset;
285     mCachedSize = size;
286 
287     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
288 
289     if (err < (ssize_t)size) {
290         clearCache();
291 
292         return ERROR_IO;
293     }
294     mOwnsDataSource = assumeSourceOwnershipOnSuccess;
295     return OK;
296 }
297 
298 ////////////////////////////////////////////////////////////////////////////////
299 
300 static const bool kUseHexDump = false;
301 
FourCC2MIME(uint32_t fourcc)302 static const char *FourCC2MIME(uint32_t fourcc) {
303     switch (fourcc) {
304         case FOURCC('m', 'p', '4', 'a'):
305             return MEDIA_MIMETYPE_AUDIO_AAC;
306 
307         case FOURCC('s', 'a', 'm', 'r'):
308             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
309 
310         case FOURCC('s', 'a', 'w', 'b'):
311             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
312 
313         case FOURCC('m', 'p', '4', 'v'):
314             return MEDIA_MIMETYPE_VIDEO_MPEG4;
315 
316         case FOURCC('s', '2', '6', '3'):
317         case FOURCC('h', '2', '6', '3'):
318         case FOURCC('H', '2', '6', '3'):
319             return MEDIA_MIMETYPE_VIDEO_H263;
320 
321         case FOURCC('a', 'v', 'c', '1'):
322             return MEDIA_MIMETYPE_VIDEO_AVC;
323 
324         case FOURCC('h', 'v', 'c', '1'):
325         case FOURCC('h', 'e', 'v', '1'):
326             return MEDIA_MIMETYPE_VIDEO_HEVC;
327         default:
328             ALOGW("Unknown fourcc: %c%c%c%c",
329                    (fourcc >> 24) & 0xff,
330                    (fourcc >> 16) & 0xff,
331                    (fourcc >> 8) & 0xff,
332                    fourcc & 0xff
333                    );
334             return "application/octet-stream";
335     }
336 }
337 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)338 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340         // AMR NB audio is always mono, 8kHz
341         *channels = 1;
342         *rate = 8000;
343         return true;
344     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345         // AMR WB audio is always mono, 16kHz
346         *channels = 1;
347         *rate = 16000;
348         return true;
349     }
350     return false;
351 }
352 
MPEG4Extractor(DataSourceBase * source,const char * mime)353 MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
354     : mMoofOffset(0),
355       mMoofFound(false),
356       mMdatFound(false),
357       mDataSource(source),
358       mCachedSource(NULL),
359       mInitCheck(NO_INIT),
360       mHeaderTimescale(0),
361       mIsQT(false),
362       mIsHeif(false),
363       mHasMoovBox(false),
364       mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
365       mFirstTrack(NULL),
366       mLastTrack(NULL) {
367     ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
368 }
369 
~MPEG4Extractor()370 MPEG4Extractor::~MPEG4Extractor() {
371     Track *track = mFirstTrack;
372     while (track) {
373         Track *next = track->next;
374 
375         delete track;
376         track = next;
377     }
378     mFirstTrack = mLastTrack = NULL;
379 
380     for (size_t i = 0; i < mPssh.size(); i++) {
381         delete [] mPssh[i].data;
382     }
383     mPssh.clear();
384 
385     delete mCachedSource;
386 }
387 
flags() const388 uint32_t MPEG4Extractor::flags() const {
389     return CAN_PAUSE |
390             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
391                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
392 }
393 
getMetaData(MetaDataBase & meta)394 status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
395     status_t err;
396     if ((err = readMetaData()) != OK) {
397         return UNKNOWN_ERROR;
398     }
399     meta = mFileMetaData;
400     return OK;
401 }
402 
countTracks()403 size_t MPEG4Extractor::countTracks() {
404     status_t err;
405     if ((err = readMetaData()) != OK) {
406         ALOGV("MPEG4Extractor::countTracks: no tracks");
407         return 0;
408     }
409 
410     size_t n = 0;
411     Track *track = mFirstTrack;
412     while (track) {
413         ++n;
414         track = track->next;
415     }
416 
417     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
418     return n;
419 }
420 
getTrackMetaData(MetaDataBase & meta,size_t index,uint32_t flags)421 status_t MPEG4Extractor::getTrackMetaData(
422         MetaDataBase &meta,
423         size_t index, uint32_t flags) {
424     status_t err;
425     if ((err = readMetaData()) != OK) {
426         return UNKNOWN_ERROR;
427     }
428 
429     Track *track = mFirstTrack;
430     while (index > 0) {
431         if (track == NULL) {
432             return UNKNOWN_ERROR;
433         }
434 
435         track = track->next;
436         --index;
437     }
438 
439     if (track == NULL) {
440         return UNKNOWN_ERROR;
441     }
442 
443     [=] {
444         int64_t duration;
445         int32_t samplerate;
446         if (track->has_elst && mHeaderTimescale != 0 &&
447                 track->meta.findInt64(kKeyDuration, &duration) &&
448                 track->meta.findInt32(kKeySampleRate, &samplerate)) {
449 
450             track->has_elst = false;
451 
452             if (track->elst_segment_duration > INT64_MAX) {
453                 return;
454             }
455             int64_t segment_duration = track->elst_segment_duration;
456             int64_t media_time = track->elst_media_time;
457             int64_t halfscale = mHeaderTimescale / 2;
458             ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
459                   ", halfscale = %" PRId64 ", timescale = %d",
460                   segment_duration,
461                   media_time,
462                   halfscale,
463                   mHeaderTimescale);
464 
465             int64_t delay;
466             // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
467             if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
468                     __builtin_add_overflow(delay, halfscale, &delay) ||
469                     (delay /= mHeaderTimescale, false) ||
470                     delay > INT32_MAX ||
471                     delay < INT32_MIN) {
472                 return;
473             }
474             ALOGV("delay = %" PRId64, delay);
475             track->meta.setInt32(kKeyEncoderDelay, delay);
476 
477             int64_t scaled_duration;
478             // scaled_duration = duration * mHeaderTimescale;
479             if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
480                 return;
481             }
482             ALOGV("scaled_duration = %" PRId64, scaled_duration);
483 
484             int64_t segment_end;
485             int64_t padding;
486             // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
487             if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
488                     __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
489                     __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
490                 return;
491             }
492             ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
493 
494             if (padding < 0) {
495                 // track duration from media header (which is what kKeyDuration is) might
496                 // be slightly shorter than the segment duration, which would make the
497                 // padding negative. Clamp to zero.
498                 padding = 0;
499             }
500 
501             int64_t paddingsamples;
502             int64_t halfscale_e6;
503             int64_t timescale_e6;
504             // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
505             //                / (mHeaderTimescale * 1000000);
506             if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
507                     __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
508                     __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
509                     __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
510                     (paddingsamples /= timescale_e6, false) ||
511                     paddingsamples > INT32_MAX) {
512                 return;
513             }
514             ALOGV("paddingsamples = %" PRId64, paddingsamples);
515             track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
516         }
517     }();
518 
519     if ((flags & kIncludeExtensiveMetaData)
520             && !track->includes_expensive_metadata) {
521         track->includes_expensive_metadata = true;
522 
523         const char *mime;
524         CHECK(track->meta.findCString(kKeyMIMEType, &mime));
525         if (!strncasecmp("video/", mime, 6)) {
526             // MPEG2 tracks do not provide CSD, so read the stream header
527             if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
528                 off64_t offset;
529                 size_t size;
530                 if (track->sampleTable->getMetaDataForSample(
531                             0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
532                     if (size > kMaxTrackHeaderSize) {
533                         size = kMaxTrackHeaderSize;
534                     }
535                     uint8_t header[kMaxTrackHeaderSize];
536                     if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
537                         track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
538                     }
539                 }
540             }
541 
542             if (mMoofOffset > 0) {
543                 int64_t duration;
544                 if (track->meta.findInt64(kKeyDuration, &duration)) {
545                     // nothing fancy, just pick a frame near 1/4th of the duration
546                     track->meta.setInt64(
547                             kKeyThumbnailTime, duration / 4);
548                 }
549             } else {
550                 uint32_t sampleIndex;
551                 uint32_t sampleTime;
552                 if (track->timescale != 0 &&
553                         track->sampleTable->findThumbnailSample(&sampleIndex) == OK
554                         && track->sampleTable->getMetaDataForSample(
555                             sampleIndex, NULL /* offset */, NULL /* size */,
556                             &sampleTime) == OK) {
557                     track->meta.setInt64(
558                             kKeyThumbnailTime,
559                             ((int64_t)sampleTime * 1000000) / track->timescale);
560                 }
561             }
562         }
563     }
564 
565     meta = track->meta;
566     return OK;
567 }
568 
readMetaData()569 status_t MPEG4Extractor::readMetaData() {
570     if (mInitCheck != NO_INIT) {
571         return mInitCheck;
572     }
573 
574     off64_t offset = 0;
575     status_t err;
576     bool sawMoovOrSidx = false;
577 
578     while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
579              (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
580                      (mItemTable != NULL) && mItemTable->isValid()))) {
581         off64_t orig_offset = offset;
582         err = parseChunk(&offset, 0);
583 
584         if (err != OK && err != UNKNOWN_ERROR) {
585             break;
586         } else if (offset <= orig_offset) {
587             // only continue parsing if the offset was advanced,
588             // otherwise we might end up in an infinite loop
589             ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
590             err = ERROR_MALFORMED;
591             break;
592         } else if (err == UNKNOWN_ERROR) {
593             sawMoovOrSidx = true;
594         }
595     }
596 
597     if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
598         off64_t exifOffset;
599         size_t exifSize;
600         if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
601             mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
602             mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
603         }
604         for (uint32_t imageIndex = 0;
605                 imageIndex < mItemTable->countImages(); imageIndex++) {
606             sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
607             if (meta == NULL) {
608                 ALOGE("heif image %u has no meta!", imageIndex);
609                 continue;
610             }
611             // Some heif files advertise image sequence brands (eg. 'hevc') in
612             // ftyp box, but don't have any valid tracks in them. Instead of
613             // reporting the entire file as malformed, we override the error
614             // to allow still images to be extracted.
615             if (err != OK) {
616                 ALOGW("Extracting still images only");
617                 err = OK;
618             }
619             mInitCheck = OK;
620 
621             ALOGV("adding HEIF image track %u", imageIndex);
622             Track *track = new Track;
623             track->next = NULL;
624             if (mLastTrack != NULL) {
625                 mLastTrack->next = track;
626             } else {
627                 mFirstTrack = track;
628             }
629             mLastTrack = track;
630 
631             track->meta = *(meta.get());
632             track->meta.setInt32(kKeyTrackID, imageIndex);
633             track->includes_expensive_metadata = false;
634             track->skipTrack = false;
635             track->timescale = 1000000;
636         }
637     }
638 
639     if (mInitCheck == OK) {
640         if (findTrackByMimePrefix("video/") != NULL) {
641             mFileMetaData.setCString(
642                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
643         } else if (findTrackByMimePrefix("audio/") != NULL) {
644             mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
645         } else if (findTrackByMimePrefix(
646                 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
647             mFileMetaData.setCString(
648                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
649         } else {
650             mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
651         }
652     } else {
653         mInitCheck = err;
654     }
655 
656     CHECK_NE(err, (status_t)NO_INIT);
657 
658     // copy pssh data into file metadata
659     uint64_t psshsize = 0;
660     for (size_t i = 0; i < mPssh.size(); i++) {
661         psshsize += 20 + mPssh[i].datalen;
662     }
663     if (psshsize > 0 && psshsize <= UINT32_MAX) {
664         char *buf = (char*)malloc(psshsize);
665         if (!buf) {
666             ALOGE("b/28471206");
667             return NO_MEMORY;
668         }
669         char *ptr = buf;
670         for (size_t i = 0; i < mPssh.size(); i++) {
671             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
672             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
673             ptr += (20 + mPssh[i].datalen);
674         }
675         mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
676         free(buf);
677     }
678 
679     return mInitCheck;
680 }
681 
682 struct PathAdder {
PathAdderandroid::PathAdder683     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
684         : mPath(path) {
685         mPath->push(chunkType);
686     }
687 
~PathAdderandroid::PathAdder688     ~PathAdder() {
689         mPath->pop();
690     }
691 
692 private:
693     Vector<uint32_t> *mPath;
694 
695     PathAdder(const PathAdder &);
696     PathAdder &operator=(const PathAdder &);
697 };
698 
underMetaDataPath(const Vector<uint32_t> & path)699 static bool underMetaDataPath(const Vector<uint32_t> &path) {
700     return path.size() >= 5
701         && path[0] == FOURCC('m', 'o', 'o', 'v')
702         && path[1] == FOURCC('u', 'd', 't', 'a')
703         && path[2] == FOURCC('m', 'e', 't', 'a')
704         && path[3] == FOURCC('i', 'l', 's', 't');
705 }
706 
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)707 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
708     return path.size() >= 2
709             && path[0] == FOURCC('m', 'o', 'o', 'v')
710             && path[1] == FOURCC('m', 'e', 't', 'a')
711             && (depth == 2
712             || (depth == 3
713                     && (path[2] == FOURCC('h', 'd', 'l', 'r')
714                     ||  path[2] == FOURCC('i', 'l', 's', 't')
715                     ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
716 }
717 
718 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)719 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
720     // delta between mpeg4 time and unix epoch time
721     static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
722     if (time_1904 < INT64_MIN + delta) {
723         return false;
724     }
725     time_t time_1970 = time_1904 - delta;
726 
727     char tmp[32];
728     struct tm* tm = gmtime(&time_1970);
729     if (tm != NULL &&
730             strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
731         s->setTo(tmp);
732         return true;
733     }
734     return false;
735 }
736 
parseChunk(off64_t * offset,int depth)737 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
738     ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
739 
740     if (*offset < 0) {
741         ALOGE("b/23540914");
742         return ERROR_MALFORMED;
743     }
744     if (depth > 100) {
745         ALOGE("b/27456299");
746         return ERROR_MALFORMED;
747     }
748     uint32_t hdr[2];
749     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
750         return ERROR_IO;
751     }
752     uint64_t chunk_size = ntohl(hdr[0]);
753     int32_t chunk_type = ntohl(hdr[1]);
754     off64_t data_offset = *offset + 8;
755 
756     if (chunk_size == 1) {
757         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
758             return ERROR_IO;
759         }
760         chunk_size = ntoh64(chunk_size);
761         data_offset += 8;
762 
763         if (chunk_size < 16) {
764             // The smallest valid chunk is 16 bytes long in this case.
765             return ERROR_MALFORMED;
766         }
767     } else if (chunk_size == 0) {
768         if (depth == 0) {
769             // atom extends to end of file
770             off64_t sourceSize;
771             if (mDataSource->getSize(&sourceSize) == OK) {
772                 chunk_size = (sourceSize - *offset);
773             } else {
774                 // XXX could we just pick a "sufficiently large" value here?
775                 ALOGE("atom size is 0, and data source has no size");
776                 return ERROR_MALFORMED;
777             }
778         } else {
779             // not allowed for non-toplevel atoms, skip it
780             *offset += 4;
781             return OK;
782         }
783     } else if (chunk_size < 8) {
784         // The smallest valid chunk is 8 bytes long.
785         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
786         return ERROR_MALFORMED;
787     }
788 
789     char chunk[5];
790     MakeFourCCString(chunk_type, chunk);
791     ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
792 
793     if (kUseHexDump) {
794         static const char kWhitespace[] = "                                        ";
795         const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
796         printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
797 
798         char buffer[256];
799         size_t n = chunk_size;
800         if (n > sizeof(buffer)) {
801             n = sizeof(buffer);
802         }
803         if (mDataSource->readAt(*offset, buffer, n)
804                 < (ssize_t)n) {
805             return ERROR_IO;
806         }
807 
808         hexdump(buffer, n);
809     }
810 
811     PathAdder autoAdder(&mPath, chunk_type);
812 
813     // (data_offset - *offset) is either 8 or 16
814     off64_t chunk_data_size = chunk_size - (data_offset - *offset);
815     if (chunk_data_size < 0) {
816         ALOGE("b/23540914");
817         return ERROR_MALFORMED;
818     }
819     if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
820         char errMsg[100];
821         sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
822         ALOGE("%s (b/28615448)", errMsg);
823         android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
824         return ERROR_MALFORMED;
825     }
826 
827     if (chunk_type != FOURCC('c', 'p', 'r', 't')
828             && chunk_type != FOURCC('c', 'o', 'v', 'r')
829             && mPath.size() == 5 && underMetaDataPath(mPath)) {
830         off64_t stop_offset = *offset + chunk_size;
831         *offset = data_offset;
832         while (*offset < stop_offset) {
833             status_t err = parseChunk(offset, depth + 1);
834             if (err != OK) {
835                 return err;
836             }
837         }
838 
839         if (*offset != stop_offset) {
840             return ERROR_MALFORMED;
841         }
842 
843         return OK;
844     }
845 
846     switch(chunk_type) {
847         case FOURCC('m', 'o', 'o', 'v'):
848         case FOURCC('t', 'r', 'a', 'k'):
849         case FOURCC('m', 'd', 'i', 'a'):
850         case FOURCC('m', 'i', 'n', 'f'):
851         case FOURCC('d', 'i', 'n', 'f'):
852         case FOURCC('s', 't', 'b', 'l'):
853         case FOURCC('m', 'v', 'e', 'x'):
854         case FOURCC('m', 'o', 'o', 'f'):
855         case FOURCC('t', 'r', 'a', 'f'):
856         case FOURCC('m', 'f', 'r', 'a'):
857         case FOURCC('u', 'd', 't', 'a'):
858         case FOURCC('i', 'l', 's', 't'):
859         case FOURCC('s', 'i', 'n', 'f'):
860         case FOURCC('s', 'c', 'h', 'i'):
861         case FOURCC('e', 'd', 't', 's'):
862         case FOURCC('w', 'a', 'v', 'e'):
863         {
864             if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
865                 ALOGE("moov: depth %d", depth);
866                 return ERROR_MALFORMED;
867             }
868 
869             if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
870                 ALOGE("duplicate moov");
871                 return ERROR_MALFORMED;
872             }
873 
874             if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
875                 // store the offset of the first segment
876                 mMoofFound = true;
877                 mMoofOffset = *offset;
878             }
879 
880             if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
881                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
882 
883                 if (mDataSource->flags()
884                         & (DataSourceBase::kWantsPrefetching
885                             | DataSourceBase::kIsCachingDataSource)) {
886                     CachedRangedDataSource *cachedSource =
887                         new CachedRangedDataSource(mDataSource);
888 
889                     if (cachedSource->setCachedRange(
890                             *offset, chunk_size,
891                             mCachedSource != NULL /* assume ownership on success */) == OK) {
892                         mDataSource = mCachedSource = cachedSource;
893                     } else {
894                         delete cachedSource;
895                     }
896                 }
897 
898                 if (mLastTrack == NULL) {
899                     return ERROR_MALFORMED;
900                 }
901 
902                 mLastTrack->sampleTable = new SampleTable(mDataSource);
903             }
904 
905             bool isTrack = false;
906             if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
907                 if (depth != 1) {
908                     ALOGE("trak: depth %d", depth);
909                     return ERROR_MALFORMED;
910                 }
911                 isTrack = true;
912 
913                 ALOGV("adding new track");
914                 Track *track = new Track;
915                 track->next = NULL;
916                 if (mLastTrack) {
917                     mLastTrack->next = track;
918                 } else {
919                     mFirstTrack = track;
920                 }
921                 mLastTrack = track;
922 
923                 track->includes_expensive_metadata = false;
924                 track->skipTrack = false;
925                 track->timescale = 0;
926                 track->meta.setCString(kKeyMIMEType, "application/octet-stream");
927                 track->has_elst = false;
928                 track->subsample_encryption = false;
929             }
930 
931             off64_t stop_offset = *offset + chunk_size;
932             *offset = data_offset;
933             while (*offset < stop_offset) {
934                 status_t err = parseChunk(offset, depth + 1);
935                 if (err != OK) {
936                     if (isTrack) {
937                         mLastTrack->skipTrack = true;
938                         break;
939                     }
940                     return err;
941                 }
942             }
943 
944             if (*offset != stop_offset) {
945                 return ERROR_MALFORMED;
946             }
947 
948             if (isTrack) {
949                 int32_t trackId;
950                 // There must be exact one track header per track.
951                 if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
952                     mLastTrack->skipTrack = true;
953                 }
954 
955                 status_t err = verifyTrack(mLastTrack);
956                 if (err != OK) {
957                     mLastTrack->skipTrack = true;
958                 }
959 
960                 if (mLastTrack->skipTrack) {
961                     ALOGV("skipping this track...");
962                     Track *cur = mFirstTrack;
963 
964                     if (cur == mLastTrack) {
965                         delete cur;
966                         mFirstTrack = mLastTrack = NULL;
967                     } else {
968                         while (cur && cur->next != mLastTrack) {
969                             cur = cur->next;
970                         }
971                         if (cur) {
972                             cur->next = NULL;
973                         }
974                         delete mLastTrack;
975                         mLastTrack = cur;
976                     }
977 
978                     return OK;
979                 }
980             } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
981                 mInitCheck = OK;
982 
983                 return UNKNOWN_ERROR;  // Return a dummy error.
984             }
985             break;
986         }
987 
988         case FOURCC('s', 'c', 'h', 'm'):
989         {
990 
991             *offset += chunk_size;
992             if (!mLastTrack) {
993                 return ERROR_MALFORMED;
994             }
995 
996             uint32_t scheme_type;
997             if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
998                 return ERROR_IO;
999             }
1000             scheme_type = ntohl(scheme_type);
1001             int32_t mode = kCryptoModeUnencrypted;
1002             switch(scheme_type) {
1003                 case FOURCC('c', 'b', 'c', '1'):
1004                 {
1005                     mode = kCryptoModeAesCbc;
1006                     break;
1007                 }
1008                 case FOURCC('c', 'b', 'c', 's'):
1009                 {
1010                     mode = kCryptoModeAesCbc;
1011                     mLastTrack->subsample_encryption = true;
1012                     break;
1013                 }
1014                 case FOURCC('c', 'e', 'n', 'c'):
1015                 {
1016                     mode = kCryptoModeAesCtr;
1017                     break;
1018                 }
1019                 case FOURCC('c', 'e', 'n', 's'):
1020                 {
1021                     mode = kCryptoModeAesCtr;
1022                     mLastTrack->subsample_encryption = true;
1023                     break;
1024                 }
1025             }
1026             if (mode != kCryptoModeUnencrypted) {
1027                 mLastTrack->meta.setInt32(kKeyCryptoMode, mode);
1028             }
1029             break;
1030         }
1031 
1032 
1033         case FOURCC('e', 'l', 's', 't'):
1034         {
1035             *offset += chunk_size;
1036 
1037             if (!mLastTrack) {
1038                 return ERROR_MALFORMED;
1039             }
1040 
1041             // See 14496-12 8.6.6
1042             uint8_t version;
1043             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1044                 return ERROR_IO;
1045             }
1046 
1047             uint32_t entry_count;
1048             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1049                 return ERROR_IO;
1050             }
1051 
1052             if (entry_count != 1) {
1053                 // we only support a single entry at the moment, for gapless playback
1054                 ALOGW("ignoring edit list with %d entries", entry_count);
1055             } else {
1056                 off64_t entriesoffset = data_offset + 8;
1057                 uint64_t segment_duration;
1058                 int64_t media_time;
1059 
1060                 if (version == 1) {
1061                     if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1062                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1063                         return ERROR_IO;
1064                     }
1065                 } else if (version == 0) {
1066                     uint32_t sd;
1067                     int32_t mt;
1068                     if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1069                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1070                         return ERROR_IO;
1071                     }
1072                     segment_duration = sd;
1073                     media_time = mt;
1074                 } else {
1075                     return ERROR_IO;
1076                 }
1077 
1078                 // save these for later, because the elst atom might precede
1079                 // the atoms that actually gives us the duration and sample rate
1080                 // needed to calculate the padding and delay values
1081                 mLastTrack->has_elst = true;
1082                 mLastTrack->elst_media_time = media_time;
1083                 mLastTrack->elst_segment_duration = segment_duration;
1084             }
1085             break;
1086         }
1087 
1088         case FOURCC('f', 'r', 'm', 'a'):
1089         {
1090             *offset += chunk_size;
1091 
1092             uint32_t original_fourcc;
1093             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1094                 return ERROR_IO;
1095             }
1096             original_fourcc = ntohl(original_fourcc);
1097             ALOGV("read original format: %d", original_fourcc);
1098 
1099             if (mLastTrack == NULL) {
1100                 return ERROR_MALFORMED;
1101             }
1102 
1103             mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1104             uint32_t num_channels = 0;
1105             uint32_t sample_rate = 0;
1106             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1107                 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1108                 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1109             }
1110             break;
1111         }
1112 
1113         case FOURCC('t', 'e', 'n', 'c'):
1114         {
1115             *offset += chunk_size;
1116 
1117             if (chunk_size < 32) {
1118                 return ERROR_MALFORMED;
1119             }
1120 
1121             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1122             // default IV size, 16 bytes default KeyID
1123             // (ISO 23001-7)
1124 
1125             uint8_t version;
1126             if (mDataSource->readAt(data_offset, &version, sizeof(version))
1127                     < (ssize_t)sizeof(version)) {
1128                 return ERROR_IO;
1129             }
1130 
1131             uint8_t buf[4];
1132             memset(buf, 0, 4);
1133             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1134                 return ERROR_IO;
1135             }
1136 
1137             if (mLastTrack == NULL) {
1138                 return ERROR_MALFORMED;
1139             }
1140 
1141             uint8_t defaultEncryptedByteBlock = 0;
1142             uint8_t defaultSkipByteBlock = 0;
1143             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1144             if (version == 1) {
1145                 uint32_t pattern = buf[2];
1146                 defaultEncryptedByteBlock = pattern >> 4;
1147                 defaultSkipByteBlock = pattern & 0xf;
1148                 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1149                     // use (1,0) to mean "encrypt everything"
1150                     defaultEncryptedByteBlock = 1;
1151                 }
1152             } else if (mLastTrack->subsample_encryption) {
1153                 ALOGW("subsample_encryption should be version 1");
1154             } else if (defaultAlgorithmId > 1) {
1155                 // only 0 (clear) and 1 (AES-128) are valid
1156                 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1157                 defaultAlgorithmId = 1;
1158             }
1159 
1160             memset(buf, 0, 4);
1161             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1162                 return ERROR_IO;
1163             }
1164             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1165 
1166             if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1167                 // only unencrypted data must have 0 IV size
1168                 return ERROR_MALFORMED;
1169             } else if (defaultIVSize != 0 &&
1170                     defaultIVSize != 8 &&
1171                     defaultIVSize != 16) {
1172                 return ERROR_MALFORMED;
1173             }
1174 
1175             uint8_t defaultKeyId[16];
1176 
1177             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1178                 return ERROR_IO;
1179             }
1180 
1181             sp<ABuffer> defaultConstantIv;
1182             if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1183 
1184                 uint8_t ivlength;
1185                 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1186                         < (ssize_t)sizeof(ivlength)) {
1187                     return ERROR_IO;
1188                 }
1189 
1190                 if (ivlength != 8 && ivlength != 16) {
1191                     ALOGW("unsupported IV length: %u", ivlength);
1192                     return ERROR_MALFORMED;
1193                 }
1194 
1195                 defaultConstantIv = new ABuffer(ivlength);
1196                 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1197                         < (ssize_t)ivlength) {
1198                     return ERROR_IO;
1199                 }
1200 
1201                 defaultConstantIv->setRange(0, ivlength);
1202             }
1203 
1204             int32_t tmpAlgorithmId;
1205             if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) {
1206                 mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
1207             }
1208 
1209             mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1210             mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1211             mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock);
1212             mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock);
1213             if (defaultConstantIv != NULL) {
1214                 mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size());
1215             }
1216             break;
1217         }
1218 
1219         case FOURCC('t', 'k', 'h', 'd'):
1220         {
1221             *offset += chunk_size;
1222 
1223             status_t err;
1224             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1225                 return err;
1226             }
1227 
1228             break;
1229         }
1230 
1231         case FOURCC('t', 'r', 'e', 'f'):
1232         {
1233             off64_t stop_offset = *offset + chunk_size;
1234             *offset = data_offset;
1235             while (*offset < stop_offset) {
1236                 status_t err = parseChunk(offset, depth + 1);
1237                 if (err != OK) {
1238                     return err;
1239                 }
1240             }
1241             if (*offset != stop_offset) {
1242                 return ERROR_MALFORMED;
1243             }
1244             break;
1245         }
1246 
1247         case FOURCC('t', 'h', 'm', 'b'):
1248         {
1249             *offset += chunk_size;
1250 
1251             if (mLastTrack != NULL) {
1252                 // Skip thumbnail track for now since we don't have an
1253                 // API to retrieve it yet.
1254                 // The thumbnail track can't be accessed by negative index or time,
1255                 // because each timed sample has its own corresponding thumbnail
1256                 // in the thumbnail track. We'll need a dedicated API to retrieve
1257                 // thumbnail at time instead.
1258                 mLastTrack->skipTrack = true;
1259             }
1260 
1261             break;
1262         }
1263 
1264         case FOURCC('p', 's', 's', 'h'):
1265         {
1266             *offset += chunk_size;
1267 
1268             PsshInfo pssh;
1269 
1270             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1271                 return ERROR_IO;
1272             }
1273 
1274             uint32_t psshdatalen = 0;
1275             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1276                 return ERROR_IO;
1277             }
1278             pssh.datalen = ntohl(psshdatalen);
1279             ALOGV("pssh data size: %d", pssh.datalen);
1280             if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1281                 // pssh data length exceeds size of containing box
1282                 return ERROR_MALFORMED;
1283             }
1284 
1285             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1286             if (pssh.data == NULL) {
1287                 return ERROR_MALFORMED;
1288             }
1289             ALOGV("allocated pssh @ %p", pssh.data);
1290             ssize_t requested = (ssize_t) pssh.datalen;
1291             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1292                 delete[] pssh.data;
1293                 return ERROR_IO;
1294             }
1295             mPssh.push_back(pssh);
1296 
1297             break;
1298         }
1299 
1300         case FOURCC('m', 'd', 'h', 'd'):
1301         {
1302             *offset += chunk_size;
1303 
1304             if (chunk_data_size < 4 || mLastTrack == NULL) {
1305                 return ERROR_MALFORMED;
1306             }
1307 
1308             uint8_t version;
1309             if (mDataSource->readAt(
1310                         data_offset, &version, sizeof(version))
1311                     < (ssize_t)sizeof(version)) {
1312                 return ERROR_IO;
1313             }
1314 
1315             off64_t timescale_offset;
1316 
1317             if (version == 1) {
1318                 timescale_offset = data_offset + 4 + 16;
1319             } else if (version == 0) {
1320                 timescale_offset = data_offset + 4 + 8;
1321             } else {
1322                 return ERROR_IO;
1323             }
1324 
1325             uint32_t timescale;
1326             if (mDataSource->readAt(
1327                         timescale_offset, &timescale, sizeof(timescale))
1328                     < (ssize_t)sizeof(timescale)) {
1329                 return ERROR_IO;
1330             }
1331 
1332             if (!timescale) {
1333                 ALOGE("timescale should not be ZERO.");
1334                 return ERROR_MALFORMED;
1335             }
1336 
1337             mLastTrack->timescale = ntohl(timescale);
1338 
1339             // 14496-12 says all ones means indeterminate, but some files seem to use
1340             // 0 instead. We treat both the same.
1341             int64_t duration = 0;
1342             if (version == 1) {
1343                 if (mDataSource->readAt(
1344                             timescale_offset + 4, &duration, sizeof(duration))
1345                         < (ssize_t)sizeof(duration)) {
1346                     return ERROR_IO;
1347                 }
1348                 if (duration != -1) {
1349                     duration = ntoh64(duration);
1350                 }
1351             } else {
1352                 uint32_t duration32;
1353                 if (mDataSource->readAt(
1354                             timescale_offset + 4, &duration32, sizeof(duration32))
1355                         < (ssize_t)sizeof(duration32)) {
1356                     return ERROR_IO;
1357                 }
1358                 if (duration32 != 0xffffffff) {
1359                     duration = ntohl(duration32);
1360                 }
1361             }
1362             if (duration != 0 && mLastTrack->timescale != 0) {
1363                 mLastTrack->meta.setInt64(
1364                         kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1365             }
1366 
1367             uint8_t lang[2];
1368             off64_t lang_offset;
1369             if (version == 1) {
1370                 lang_offset = timescale_offset + 4 + 8;
1371             } else if (version == 0) {
1372                 lang_offset = timescale_offset + 4 + 4;
1373             } else {
1374                 return ERROR_IO;
1375             }
1376 
1377             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1378                     < (ssize_t)sizeof(lang)) {
1379                 return ERROR_IO;
1380             }
1381 
1382             // To get the ISO-639-2/T three character language code
1383             // 1 bit pad followed by 3 5-bits characters. Each character
1384             // is packed as the difference between its ASCII value and 0x60.
1385             char lang_code[4];
1386             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1387             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1388             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1389             lang_code[3] = '\0';
1390 
1391             mLastTrack->meta.setCString(
1392                     kKeyMediaLanguage, lang_code);
1393 
1394             break;
1395         }
1396 
1397         case FOURCC('s', 't', 's', 'd'):
1398         {
1399             uint8_t buffer[8];
1400             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1401                 return ERROR_MALFORMED;
1402             }
1403 
1404             if (mDataSource->readAt(
1405                         data_offset, buffer, 8) < 8) {
1406                 return ERROR_IO;
1407             }
1408 
1409             if (U32_AT(buffer) != 0) {
1410                 // Should be version 0, flags 0.
1411                 return ERROR_MALFORMED;
1412             }
1413 
1414             uint32_t entry_count = U32_AT(&buffer[4]);
1415 
1416             if (entry_count > 1) {
1417                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1418                 // multiple text display formats. These formats will be used to
1419                 // display the timed text.
1420                 // For encrypted files, there may also be more than one entry.
1421                 const char *mime;
1422 
1423                 if (mLastTrack == NULL)
1424                     return ERROR_MALFORMED;
1425 
1426                 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1427                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1428                         strcasecmp(mime, "application/octet-stream")) {
1429                     // For now we only support a single type of media per track.
1430                     mLastTrack->skipTrack = true;
1431                     *offset += chunk_size;
1432                     break;
1433                 }
1434             }
1435             off64_t stop_offset = *offset + chunk_size;
1436             *offset = data_offset + 8;
1437             for (uint32_t i = 0; i < entry_count; ++i) {
1438                 status_t err = parseChunk(offset, depth + 1);
1439                 if (err != OK) {
1440                     return err;
1441                 }
1442             }
1443 
1444             if (*offset != stop_offset) {
1445                 return ERROR_MALFORMED;
1446             }
1447             break;
1448         }
1449         case FOURCC('m', 'e', 't', 't'):
1450         {
1451             *offset += chunk_size;
1452 
1453             if (mLastTrack == NULL)
1454                 return ERROR_MALFORMED;
1455 
1456             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1457             if (buffer.get() == NULL) {
1458                 return NO_MEMORY;
1459             }
1460 
1461             if (mDataSource->readAt(
1462                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1463                 return ERROR_IO;
1464             }
1465 
1466             String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1467             mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
1468 
1469             break;
1470         }
1471 
1472         case FOURCC('m', 'p', '4', 'a'):
1473         case FOURCC('e', 'n', 'c', 'a'):
1474         case FOURCC('s', 'a', 'm', 'r'):
1475         case FOURCC('s', 'a', 'w', 'b'):
1476         {
1477             if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1478                     && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1479                 // Ignore mp4a embedded in QT wave atom
1480                 *offset += chunk_size;
1481                 break;
1482             }
1483 
1484             uint8_t buffer[8 + 20];
1485             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1486                 // Basic AudioSampleEntry size.
1487                 return ERROR_MALFORMED;
1488             }
1489 
1490             if (mDataSource->readAt(
1491                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1492                 return ERROR_IO;
1493             }
1494 
1495             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1496             uint16_t version = U16_AT(&buffer[8]);
1497             uint32_t num_channels = U16_AT(&buffer[16]);
1498 
1499             uint16_t sample_size = U16_AT(&buffer[18]);
1500             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1501 
1502             if (mLastTrack == NULL)
1503                 return ERROR_MALFORMED;
1504 
1505             off64_t stop_offset = *offset + chunk_size;
1506             *offset = data_offset + sizeof(buffer);
1507 
1508             if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1509                 if (version == 1) {
1510                     if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1511                         return ERROR_IO;
1512                     }
1513 
1514 #if 0
1515                     U32_AT(buffer);  // samples per packet
1516                     U32_AT(&buffer[4]);  // bytes per packet
1517                     U32_AT(&buffer[8]);  // bytes per frame
1518                     U32_AT(&buffer[12]);  // bytes per sample
1519 #endif
1520                     *offset += 16;
1521                 } else if (version == 2) {
1522                     uint8_t v2buffer[36];
1523                     if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1524                         return ERROR_IO;
1525                     }
1526 
1527 #if 0
1528                     U32_AT(v2buffer);  // size of struct only
1529                     sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1530                     num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1531                     U32_AT(&v2buffer[16]);  // always 0x7f000000
1532                     sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1533                     U32_AT(&v2buffer[24]);  // format specifc flags
1534                     U32_AT(&v2buffer[28]);  // const bytes per audio packet
1535                     U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1536 #endif
1537                     *offset += 36;
1538                 }
1539             }
1540 
1541             if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1542                 // if the chunk type is enca, we'll get the type from the frma box later
1543                 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1544                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1545             }
1546             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1547                    chunk, num_channels, sample_size, sample_rate);
1548             mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1549             mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1550 
1551             while (*offset < stop_offset) {
1552                 status_t err = parseChunk(offset, depth + 1);
1553                 if (err != OK) {
1554                     return err;
1555                 }
1556             }
1557 
1558             if (*offset != stop_offset) {
1559                 return ERROR_MALFORMED;
1560             }
1561             break;
1562         }
1563 
1564         case FOURCC('m', 'p', '4', 'v'):
1565         case FOURCC('e', 'n', 'c', 'v'):
1566         case FOURCC('s', '2', '6', '3'):
1567         case FOURCC('H', '2', '6', '3'):
1568         case FOURCC('h', '2', '6', '3'):
1569         case FOURCC('a', 'v', 'c', '1'):
1570         case FOURCC('h', 'v', 'c', '1'):
1571         case FOURCC('h', 'e', 'v', '1'):
1572         {
1573             uint8_t buffer[78];
1574             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1575                 // Basic VideoSampleEntry size.
1576                 return ERROR_MALFORMED;
1577             }
1578 
1579             if (mDataSource->readAt(
1580                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1581                 return ERROR_IO;
1582             }
1583 
1584             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1585             uint16_t width = U16_AT(&buffer[6 + 18]);
1586             uint16_t height = U16_AT(&buffer[6 + 20]);
1587 
1588             // The video sample is not standard-compliant if it has invalid dimension.
1589             // Use some default width and height value, and
1590             // let the decoder figure out the actual width and height (and thus
1591             // be prepared for INFO_FOMRAT_CHANGED event).
1592             if (width == 0)  width  = 352;
1593             if (height == 0) height = 288;
1594 
1595             // printf("*** coding='%s' width=%d height=%d\n",
1596             //        chunk, width, height);
1597 
1598             if (mLastTrack == NULL)
1599                 return ERROR_MALFORMED;
1600 
1601             if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1602                 // if the chunk type is encv, we'll get the type from the frma box later
1603                 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1604             }
1605             mLastTrack->meta.setInt32(kKeyWidth, width);
1606             mLastTrack->meta.setInt32(kKeyHeight, height);
1607 
1608             off64_t stop_offset = *offset + chunk_size;
1609             *offset = data_offset + sizeof(buffer);
1610             while (*offset < stop_offset) {
1611                 status_t err = parseChunk(offset, depth + 1);
1612                 if (err != OK) {
1613                     return err;
1614                 }
1615             }
1616 
1617             if (*offset != stop_offset) {
1618                 return ERROR_MALFORMED;
1619             }
1620             break;
1621         }
1622 
1623         case FOURCC('s', 't', 'c', 'o'):
1624         case FOURCC('c', 'o', '6', '4'):
1625         {
1626             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1627                 return ERROR_MALFORMED;
1628             }
1629 
1630             status_t err =
1631                 mLastTrack->sampleTable->setChunkOffsetParams(
1632                         chunk_type, data_offset, chunk_data_size);
1633 
1634             *offset += chunk_size;
1635 
1636             if (err != OK) {
1637                 return err;
1638             }
1639 
1640             break;
1641         }
1642 
1643         case FOURCC('s', 't', 's', 'c'):
1644         {
1645             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1646                 return ERROR_MALFORMED;
1647 
1648             status_t err =
1649                 mLastTrack->sampleTable->setSampleToChunkParams(
1650                         data_offset, chunk_data_size);
1651 
1652             *offset += chunk_size;
1653 
1654             if (err != OK) {
1655                 return err;
1656             }
1657 
1658             break;
1659         }
1660 
1661         case FOURCC('s', 't', 's', 'z'):
1662         case FOURCC('s', 't', 'z', '2'):
1663         {
1664             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1665                 return ERROR_MALFORMED;
1666             }
1667 
1668             status_t err =
1669                 mLastTrack->sampleTable->setSampleSizeParams(
1670                         chunk_type, data_offset, chunk_data_size);
1671 
1672             *offset += chunk_size;
1673 
1674             if (err != OK) {
1675                 return err;
1676             }
1677 
1678             size_t max_size;
1679             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1680 
1681             if (err != OK) {
1682                 return err;
1683             }
1684 
1685             if (max_size != 0) {
1686                 // Assume that a given buffer only contains at most 10 chunks,
1687                 // each chunk originally prefixed with a 2 byte length will
1688                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1689                 // and thus will grow by 2 bytes per chunk.
1690                 if (max_size > SIZE_MAX - 10 * 2) {
1691                     ALOGE("max sample size too big: %zu", max_size);
1692                     return ERROR_MALFORMED;
1693                 }
1694                 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1695             } else {
1696                 // No size was specified. Pick a conservatively large size.
1697                 uint32_t width, height;
1698                 if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
1699                     !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
1700                     ALOGE("No width or height, assuming worst case 1080p");
1701                     width = 1920;
1702                     height = 1080;
1703                 } else {
1704                     // A resolution was specified, check that it's not too big. The values below
1705                     // were chosen so that the calculations below don't cause overflows, they're
1706                     // not indicating that resolutions up to 32kx32k are actually supported.
1707                     if (width > 32768 || height > 32768) {
1708                         ALOGE("can't support %u x %u video", width, height);
1709                         return ERROR_MALFORMED;
1710                     }
1711                 }
1712 
1713                 const char *mime;
1714                 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1715                 if (!strncmp(mime, "audio/", 6)) {
1716                     // for audio, use 128KB
1717                     max_size = 1024 * 128;
1718                 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1719                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1720                     // AVC & HEVC requires compression ratio of at least 2, and uses
1721                     // macroblocks
1722                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1723                 } else {
1724                     // For all other formats there is no minimum compression
1725                     // ratio. Use compression ratio of 1.
1726                     max_size = width * height * 3 / 2;
1727                 }
1728                 // HACK: allow 10% overhead
1729                 // TODO: read sample size from traf atom for fragmented MPEG4.
1730                 max_size += max_size / 10;
1731                 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
1732             }
1733 
1734             // NOTE: setting another piece of metadata invalidates any pointers (such as the
1735             // mimetype) previously obtained, so don't cache them.
1736             const char *mime;
1737             CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1738             // Calculate average frame rate.
1739             if (!strncasecmp("video/", mime, 6)) {
1740                 size_t nSamples = mLastTrack->sampleTable->countSamples();
1741                 if (nSamples == 0) {
1742                     int32_t trackId;
1743                     if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
1744                         for (size_t i = 0; i < mTrex.size(); i++) {
1745                             Trex *t = &mTrex.editItemAt(i);
1746                             if (t->track_ID == (uint32_t) trackId) {
1747                                 if (t->default_sample_duration > 0) {
1748                                     int32_t frameRate =
1749                                             mLastTrack->timescale / t->default_sample_duration;
1750                                     mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1751                                 }
1752                                 break;
1753                             }
1754                         }
1755                     }
1756                 } else {
1757                     int64_t durationUs;
1758                     if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
1759                         if (durationUs > 0) {
1760                             int32_t frameRate = (nSamples * 1000000LL +
1761                                         (durationUs >> 1)) / durationUs;
1762                             mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1763                         }
1764                     }
1765                     ALOGV("setting frame count %zu", nSamples);
1766                     mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
1767                 }
1768             }
1769 
1770             break;
1771         }
1772 
1773         case FOURCC('s', 't', 't', 's'):
1774         {
1775             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1776                 return ERROR_MALFORMED;
1777 
1778             *offset += chunk_size;
1779 
1780             status_t err =
1781                 mLastTrack->sampleTable->setTimeToSampleParams(
1782                         data_offset, chunk_data_size);
1783 
1784             if (err != OK) {
1785                 return err;
1786             }
1787 
1788             break;
1789         }
1790 
1791         case FOURCC('c', 't', 't', 's'):
1792         {
1793             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1794                 return ERROR_MALFORMED;
1795 
1796             *offset += chunk_size;
1797 
1798             status_t err =
1799                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1800                         data_offset, chunk_data_size);
1801 
1802             if (err != OK) {
1803                 return err;
1804             }
1805 
1806             break;
1807         }
1808 
1809         case FOURCC('s', 't', 's', 's'):
1810         {
1811             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1812                 return ERROR_MALFORMED;
1813 
1814             *offset += chunk_size;
1815 
1816             status_t err =
1817                 mLastTrack->sampleTable->setSyncSampleParams(
1818                         data_offset, chunk_data_size);
1819 
1820             if (err != OK) {
1821                 return err;
1822             }
1823 
1824             break;
1825         }
1826 
1827         // \xA9xyz
1828         case FOURCC(0xA9, 'x', 'y', 'z'):
1829         {
1830             *offset += chunk_size;
1831 
1832             // Best case the total data length inside "\xA9xyz" box would
1833             // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
1834             // where "\x00\x05" is the text string length with value = 5,
1835             // "\0x15\xc7" is the language code = en, and "+0+0/" is a
1836             // location (string) value with longitude = 0 and latitude = 0.
1837             // Since some devices encountered in the wild omit the trailing
1838             // slash, we'll allow that.
1839             if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
1840                 return ERROR_MALFORMED;
1841             }
1842 
1843             uint16_t len;
1844             if (!mDataSource->getUInt16(data_offset, &len)) {
1845                 return ERROR_IO;
1846             }
1847 
1848             // allow "+0+0" without trailing slash
1849             if (len < 4 || len > chunk_data_size - 4) {
1850                 return ERROR_MALFORMED;
1851             }
1852             // The location string following the language code is formatted
1853             // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
1854             // Allocate 2 extra bytes, in case we need to add a trailing slash,
1855             // and to add a terminating 0.
1856             std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
1857             if (!buffer) {
1858                 return NO_MEMORY;
1859             }
1860 
1861             if (mDataSource->readAt(
1862                         data_offset + 4, &buffer[0], len) < len) {
1863                 return ERROR_IO;
1864             }
1865 
1866             len = strlen(&buffer[0]);
1867             if (len < 4) {
1868                 return ERROR_MALFORMED;
1869             }
1870             // Add a trailing slash if there wasn't one.
1871             if (buffer[len - 1] != '/') {
1872                 buffer[len] = '/';
1873             }
1874             mFileMetaData.setCString(kKeyLocation, &buffer[0]);
1875             break;
1876         }
1877 
1878         case FOURCC('e', 's', 'd', 's'):
1879         {
1880             *offset += chunk_size;
1881 
1882             if (chunk_data_size < 4) {
1883                 return ERROR_MALFORMED;
1884             }
1885 
1886             uint8_t buffer[256];
1887             if (chunk_data_size > (off64_t)sizeof(buffer)) {
1888                 return ERROR_BUFFER_TOO_SMALL;
1889             }
1890 
1891             if (mDataSource->readAt(
1892                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
1893                 return ERROR_IO;
1894             }
1895 
1896             if (U32_AT(buffer) != 0) {
1897                 // Should be version 0, flags 0.
1898                 return ERROR_MALFORMED;
1899             }
1900 
1901             if (mLastTrack == NULL)
1902                 return ERROR_MALFORMED;
1903 
1904             mLastTrack->meta.setData(
1905                     kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1906 
1907             if (mPath.size() >= 2
1908                     && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1909                 // Information from the ESDS must be relied on for proper
1910                 // setup of sample rate and channel count for MPEG4 Audio.
1911                 // The generic header appears to only contain generic
1912                 // information...
1913 
1914                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1915                         &buffer[4], chunk_data_size - 4);
1916 
1917                 if (err != OK) {
1918                     return err;
1919                 }
1920             }
1921             if (mPath.size() >= 2
1922                     && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1923                 // Check if the video is MPEG2
1924                 ESDS esds(&buffer[4], chunk_data_size - 4);
1925 
1926                 uint8_t objectTypeIndication;
1927                 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1928                     if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1929                         mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1930                     }
1931                 }
1932             }
1933             break;
1934         }
1935 
1936         case FOURCC('b', 't', 'r', 't'):
1937         {
1938             *offset += chunk_size;
1939             if (mLastTrack == NULL) {
1940                 return ERROR_MALFORMED;
1941             }
1942 
1943             uint8_t buffer[12];
1944             if (chunk_data_size != sizeof(buffer)) {
1945                 return ERROR_MALFORMED;
1946             }
1947 
1948             if (mDataSource->readAt(
1949                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
1950                 return ERROR_IO;
1951             }
1952 
1953             uint32_t maxBitrate = U32_AT(&buffer[4]);
1954             uint32_t avgBitrate = U32_AT(&buffer[8]);
1955             if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1956                 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1957             }
1958             if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1959                 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
1960             }
1961             break;
1962         }
1963 
1964         case FOURCC('a', 'v', 'c', 'C'):
1965         {
1966             *offset += chunk_size;
1967 
1968             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1969 
1970             if (buffer.get() == NULL) {
1971                 ALOGE("b/28471206");
1972                 return NO_MEMORY;
1973             }
1974 
1975             if (mDataSource->readAt(
1976                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1977                 return ERROR_IO;
1978             }
1979 
1980             if (mLastTrack == NULL)
1981                 return ERROR_MALFORMED;
1982 
1983             mLastTrack->meta.setData(
1984                     kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
1985 
1986             break;
1987         }
1988         case FOURCC('h', 'v', 'c', 'C'):
1989         {
1990             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1991 
1992             if (buffer.get() == NULL) {
1993                 ALOGE("b/28471206");
1994                 return NO_MEMORY;
1995             }
1996 
1997             if (mDataSource->readAt(
1998                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1999                 return ERROR_IO;
2000             }
2001 
2002             if (mLastTrack == NULL)
2003                 return ERROR_MALFORMED;
2004 
2005             mLastTrack->meta.setData(
2006                     kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
2007 
2008             *offset += chunk_size;
2009             break;
2010         }
2011 
2012         case FOURCC('d', '2', '6', '3'):
2013         {
2014             *offset += chunk_size;
2015             /*
2016              * d263 contains a fixed 7 bytes part:
2017              *   vendor - 4 bytes
2018              *   version - 1 byte
2019              *   level - 1 byte
2020              *   profile - 1 byte
2021              * optionally, "d263" box itself may contain a 16-byte
2022              * bit rate box (bitr)
2023              *   average bit rate - 4 bytes
2024              *   max bit rate - 4 bytes
2025              */
2026             char buffer[23];
2027             if (chunk_data_size != 7 &&
2028                 chunk_data_size != 23) {
2029                 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2030                 return ERROR_MALFORMED;
2031             }
2032 
2033             if (mDataSource->readAt(
2034                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2035                 return ERROR_IO;
2036             }
2037 
2038             if (mLastTrack == NULL)
2039                 return ERROR_MALFORMED;
2040 
2041             mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
2042 
2043             break;
2044         }
2045 
2046         case FOURCC('m', 'e', 't', 'a'):
2047         {
2048             off64_t stop_offset = *offset + chunk_size;
2049             *offset = data_offset;
2050             bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2051             if (!isParsingMetaKeys) {
2052                 uint8_t buffer[4];
2053                 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2054                     *offset = stop_offset;
2055                     return ERROR_MALFORMED;
2056                 }
2057 
2058                 if (mDataSource->readAt(
2059                             data_offset, buffer, 4) < 4) {
2060                     *offset = stop_offset;
2061                     return ERROR_IO;
2062                 }
2063 
2064                 if (U32_AT(buffer) != 0) {
2065                     // Should be version 0, flags 0.
2066 
2067                     // If it's not, let's assume this is one of those
2068                     // apparently malformed chunks that don't have flags
2069                     // and completely different semantics than what's
2070                     // in the MPEG4 specs and skip it.
2071                     *offset = stop_offset;
2072                     return OK;
2073                 }
2074                 *offset +=  sizeof(buffer);
2075             }
2076 
2077             while (*offset < stop_offset) {
2078                 status_t err = parseChunk(offset, depth + 1);
2079                 if (err != OK) {
2080                     return err;
2081                 }
2082             }
2083 
2084             if (*offset != stop_offset) {
2085                 return ERROR_MALFORMED;
2086             }
2087             break;
2088         }
2089 
2090         case FOURCC('i', 'l', 'o', 'c'):
2091         case FOURCC('i', 'i', 'n', 'f'):
2092         case FOURCC('i', 'p', 'r', 'p'):
2093         case FOURCC('p', 'i', 't', 'm'):
2094         case FOURCC('i', 'd', 'a', 't'):
2095         case FOURCC('i', 'r', 'e', 'f'):
2096         case FOURCC('i', 'p', 'r', 'o'):
2097         {
2098             if (mIsHeif) {
2099                 if (mItemTable == NULL) {
2100                     mItemTable = new ItemTable(mDataSource);
2101                 }
2102                 status_t err = mItemTable->parse(
2103                         chunk_type, data_offset, chunk_data_size);
2104                 if (err != OK) {
2105                     return err;
2106                 }
2107             }
2108             *offset += chunk_size;
2109             break;
2110         }
2111 
2112         case FOURCC('m', 'e', 'a', 'n'):
2113         case FOURCC('n', 'a', 'm', 'e'):
2114         case FOURCC('d', 'a', 't', 'a'):
2115         {
2116             *offset += chunk_size;
2117 
2118             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2119                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2120 
2121                 if (err != OK) {
2122                     return err;
2123                 }
2124             }
2125 
2126             break;
2127         }
2128 
2129         case FOURCC('m', 'v', 'h', 'd'):
2130         {
2131             *offset += chunk_size;
2132 
2133             if (depth != 1) {
2134                 ALOGE("mvhd: depth %d", depth);
2135                 return ERROR_MALFORMED;
2136             }
2137             if (chunk_data_size < 32) {
2138                 return ERROR_MALFORMED;
2139             }
2140 
2141             uint8_t header[32];
2142             if (mDataSource->readAt(
2143                         data_offset, header, sizeof(header))
2144                     < (ssize_t)sizeof(header)) {
2145                 return ERROR_IO;
2146             }
2147 
2148             uint64_t creationTime;
2149             uint64_t duration = 0;
2150             if (header[0] == 1) {
2151                 creationTime = U64_AT(&header[4]);
2152                 mHeaderTimescale = U32_AT(&header[20]);
2153                 duration = U64_AT(&header[24]);
2154                 if (duration == 0xffffffffffffffff) {
2155                     duration = 0;
2156                 }
2157             } else if (header[0] != 0) {
2158                 return ERROR_MALFORMED;
2159             } else {
2160                 creationTime = U32_AT(&header[4]);
2161                 mHeaderTimescale = U32_AT(&header[12]);
2162                 uint32_t d32 = U32_AT(&header[16]);
2163                 if (d32 == 0xffffffff) {
2164                     d32 = 0;
2165                 }
2166                 duration = d32;
2167             }
2168             if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2169                 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2170             }
2171 
2172             String8 s;
2173             if (convertTimeToDate(creationTime, &s)) {
2174                 mFileMetaData.setCString(kKeyDate, s.string());
2175             }
2176 
2177 
2178             break;
2179         }
2180 
2181         case FOURCC('m', 'e', 'h', 'd'):
2182         {
2183             *offset += chunk_size;
2184 
2185             if (chunk_data_size < 8) {
2186                 return ERROR_MALFORMED;
2187             }
2188 
2189             uint8_t flags[4];
2190             if (mDataSource->readAt(
2191                         data_offset, flags, sizeof(flags))
2192                     < (ssize_t)sizeof(flags)) {
2193                 return ERROR_IO;
2194             }
2195 
2196             uint64_t duration = 0;
2197             if (flags[0] == 1) {
2198                 // 64 bit
2199                 if (chunk_data_size < 12) {
2200                     return ERROR_MALFORMED;
2201                 }
2202                 mDataSource->getUInt64(data_offset + 4, &duration);
2203                 if (duration == 0xffffffffffffffff) {
2204                     duration = 0;
2205                 }
2206             } else if (flags[0] == 0) {
2207                 // 32 bit
2208                 uint32_t d32;
2209                 mDataSource->getUInt32(data_offset + 4, &d32);
2210                 if (d32 == 0xffffffff) {
2211                     d32 = 0;
2212                 }
2213                 duration = d32;
2214             } else {
2215                 return ERROR_MALFORMED;
2216             }
2217 
2218             if (duration != 0 && mHeaderTimescale != 0) {
2219                 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2220             }
2221 
2222             break;
2223         }
2224 
2225         case FOURCC('m', 'd', 'a', 't'):
2226         {
2227             mMdatFound = true;
2228 
2229             *offset += chunk_size;
2230             break;
2231         }
2232 
2233         case FOURCC('h', 'd', 'l', 'r'):
2234         {
2235             *offset += chunk_size;
2236 
2237             if (underQTMetaPath(mPath, 3)) {
2238                 break;
2239             }
2240 
2241             uint32_t buffer;
2242             if (mDataSource->readAt(
2243                         data_offset + 8, &buffer, 4) < 4) {
2244                 return ERROR_IO;
2245             }
2246 
2247             uint32_t type = ntohl(buffer);
2248             // For the 3GPP file format, the handler-type within the 'hdlr' box
2249             // shall be 'text'. We also want to support 'sbtl' handler type
2250             // for a practical reason as various MPEG4 containers use it.
2251             if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2252                 if (mLastTrack != NULL) {
2253                     mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2254                 }
2255             }
2256 
2257             break;
2258         }
2259 
2260         case FOURCC('k', 'e', 'y', 's'):
2261         {
2262             *offset += chunk_size;
2263 
2264             if (underQTMetaPath(mPath, 3)) {
2265                 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2266                 if (err != OK) {
2267                     return err;
2268                 }
2269             }
2270             break;
2271         }
2272 
2273         case FOURCC('t', 'r', 'e', 'x'):
2274         {
2275             *offset += chunk_size;
2276 
2277             if (chunk_data_size < 24) {
2278                 return ERROR_IO;
2279             }
2280             Trex trex;
2281             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2282                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2283                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2284                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2285                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2286                 return ERROR_IO;
2287             }
2288             mTrex.add(trex);
2289             break;
2290         }
2291 
2292         case FOURCC('t', 'x', '3', 'g'):
2293         {
2294             if (mLastTrack == NULL)
2295                 return ERROR_MALFORMED;
2296 
2297             uint32_t type;
2298             const void *data;
2299             size_t size = 0;
2300             if (!mLastTrack->meta.findData(
2301                     kKeyTextFormatData, &type, &data, &size)) {
2302                 size = 0;
2303             }
2304 
2305             if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2306                 return ERROR_MALFORMED;
2307             }
2308 
2309             uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2310             if (buffer == NULL) {
2311                 return ERROR_MALFORMED;
2312             }
2313 
2314             if (size > 0) {
2315                 memcpy(buffer, data, size);
2316             }
2317 
2318             if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2319                     < chunk_size) {
2320                 delete[] buffer;
2321                 buffer = NULL;
2322 
2323                 // advance read pointer so we don't end up reading this again
2324                 *offset += chunk_size;
2325                 return ERROR_IO;
2326             }
2327 
2328             mLastTrack->meta.setData(
2329                     kKeyTextFormatData, 0, buffer, size + chunk_size);
2330 
2331             delete[] buffer;
2332 
2333             *offset += chunk_size;
2334             break;
2335         }
2336 
2337         case FOURCC('c', 'o', 'v', 'r'):
2338         {
2339             *offset += chunk_size;
2340 
2341             ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2342                   chunk_data_size, data_offset);
2343 
2344             if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2345                 return ERROR_MALFORMED;
2346             }
2347             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2348             if (buffer.get() == NULL) {
2349                 ALOGE("b/28471206");
2350                 return NO_MEMORY;
2351             }
2352             if (mDataSource->readAt(
2353                 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2354                 return ERROR_IO;
2355             }
2356             const int kSkipBytesOfDataBox = 16;
2357             if (chunk_data_size <= kSkipBytesOfDataBox) {
2358                 return ERROR_MALFORMED;
2359             }
2360 
2361             mFileMetaData.setData(
2362                 kKeyAlbumArt, MetaData::TYPE_NONE,
2363                 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2364 
2365             break;
2366         }
2367 
2368         case FOURCC('c', 'o', 'l', 'r'):
2369         {
2370             *offset += chunk_size;
2371             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2372             // ignore otherwise
2373             if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2374                 status_t err = parseColorInfo(data_offset, chunk_data_size);
2375                 if (err != OK) {
2376                     return err;
2377                 }
2378             }
2379 
2380             break;
2381         }
2382 
2383         case FOURCC('t', 'i', 't', 'l'):
2384         case FOURCC('p', 'e', 'r', 'f'):
2385         case FOURCC('a', 'u', 't', 'h'):
2386         case FOURCC('g', 'n', 'r', 'e'):
2387         case FOURCC('a', 'l', 'b', 'm'):
2388         case FOURCC('y', 'r', 'r', 'c'):
2389         {
2390             *offset += chunk_size;
2391 
2392             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2393 
2394             if (err != OK) {
2395                 return err;
2396             }
2397 
2398             break;
2399         }
2400 
2401         case FOURCC('I', 'D', '3', '2'):
2402         {
2403             *offset += chunk_size;
2404 
2405             if (chunk_data_size < 6) {
2406                 return ERROR_MALFORMED;
2407             }
2408 
2409             parseID3v2MetaData(data_offset + 6);
2410 
2411             break;
2412         }
2413 
2414         case FOURCC('-', '-', '-', '-'):
2415         {
2416             mLastCommentMean.clear();
2417             mLastCommentName.clear();
2418             mLastCommentData.clear();
2419             *offset += chunk_size;
2420             break;
2421         }
2422 
2423         case FOURCC('s', 'i', 'd', 'x'):
2424         {
2425             status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2426             if (err != OK) {
2427                 return err;
2428             }
2429             *offset += chunk_size;
2430             return UNKNOWN_ERROR; // stop parsing after sidx
2431         }
2432 
2433         case FOURCC('a', 'c', '-', '3'):
2434         {
2435             *offset += chunk_size;
2436             return parseAC3SampleEntry(data_offset);
2437         }
2438 
2439         case FOURCC('f', 't', 'y', 'p'):
2440         {
2441             if (chunk_data_size < 8 || depth != 0) {
2442                 return ERROR_MALFORMED;
2443             }
2444 
2445             off64_t stop_offset = *offset + chunk_size;
2446             uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2447             std::set<uint32_t> brandSet;
2448             for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2449                 if (i == 1) {
2450                     // Skip this index, it refers to the minorVersion,
2451                     // not a brand.
2452                     continue;
2453                 }
2454 
2455                 uint32_t brand;
2456                 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2457                     return ERROR_MALFORMED;
2458                 }
2459 
2460                 brand = ntohl(brand);
2461                 brandSet.insert(brand);
2462             }
2463 
2464             if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
2465                 mIsQT = true;
2466             } else {
2467                 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
2468                  && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
2469                     ALOGV("identified HEIF image");
2470 
2471                     mIsHeif = true;
2472                     brandSet.erase(FOURCC('m', 'i', 'f', '1'));
2473                     brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
2474                 }
2475 
2476                 if (!brandSet.empty()) {
2477                     // This means that the file should have moov box.
2478                     // It could be any iso files (mp4, heifs, etc.)
2479                     mHasMoovBox = true;
2480                     if (mIsHeif) {
2481                         ALOGV("identified HEIF image with other tracks");
2482                     }
2483                 }
2484             }
2485 
2486             *offset = stop_offset;
2487 
2488             break;
2489         }
2490 
2491         default:
2492         {
2493             // check if we're parsing 'ilst' for meta keys
2494             // if so, treat type as a number (key-id).
2495             if (underQTMetaPath(mPath, 3)) {
2496                 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2497                 if (err != OK) {
2498                     return err;
2499                 }
2500             }
2501 
2502             *offset += chunk_size;
2503             break;
2504         }
2505     }
2506 
2507     return OK;
2508 }
2509 
parseAC3SampleEntry(off64_t offset)2510 status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2511     // skip 16 bytes:
2512     //  + 6-byte reserved,
2513     //  + 2-byte data reference index,
2514     //  + 8-byte reserved
2515     offset += 16;
2516     uint16_t channelCount;
2517     if (!mDataSource->getUInt16(offset, &channelCount)) {
2518         return ERROR_MALFORMED;
2519     }
2520     // skip 8 bytes:
2521     //  + 2-byte channelCount,
2522     //  + 2-byte sample size,
2523     //  + 4-byte reserved
2524     offset += 8;
2525     uint16_t sampleRate;
2526     if (!mDataSource->getUInt16(offset, &sampleRate)) {
2527         ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2528         return ERROR_MALFORMED;
2529     }
2530 
2531     // skip 4 bytes:
2532     //  + 2-byte sampleRate,
2533     //  + 2-byte reserved
2534     offset += 4;
2535     return parseAC3SpecificBox(offset, sampleRate);
2536 }
2537 
parseAC3SpecificBox(off64_t offset,uint16_t sampleRate)2538 status_t MPEG4Extractor::parseAC3SpecificBox(
2539         off64_t offset, uint16_t sampleRate) {
2540     uint32_t size;
2541     // + 4-byte size
2542     // + 4-byte type
2543     // + 3-byte payload
2544     const uint32_t kAC3SpecificBoxSize = 11;
2545     if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2546         ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2547         return ERROR_MALFORMED;
2548     }
2549 
2550     offset += 4;
2551     uint32_t type;
2552     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2553         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2554         return ERROR_MALFORMED;
2555     }
2556 
2557     offset += 4;
2558     const uint32_t kAC3SpecificBoxPayloadSize = 3;
2559     uint8_t chunk[kAC3SpecificBoxPayloadSize];
2560     if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2561         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2562         return ERROR_MALFORMED;
2563     }
2564 
2565     ABitReader br(chunk, sizeof(chunk));
2566     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2567     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2568 
2569     unsigned fscod = br.getBits(2);
2570     if (fscod == 3) {
2571         ALOGE("Incorrect fscod (3) in AC3 header");
2572         return ERROR_MALFORMED;
2573     }
2574     unsigned boxSampleRate = sampleRateTable[fscod];
2575     if (boxSampleRate != sampleRate) {
2576         ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2577             boxSampleRate, sampleRate);
2578         return ERROR_MALFORMED;
2579     }
2580 
2581     unsigned bsid = br.getBits(5);
2582     if (bsid > 8) {
2583         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2584         return ERROR_MALFORMED;
2585     }
2586 
2587     // skip
2588     unsigned bsmod __unused = br.getBits(3);
2589 
2590     unsigned acmod = br.getBits(3);
2591     unsigned lfeon = br.getBits(1);
2592     unsigned channelCount = channelCountTable[acmod] + lfeon;
2593 
2594     if (mLastTrack == NULL) {
2595         return ERROR_MALFORMED;
2596     }
2597     mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2598     mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
2599     mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
2600     return OK;
2601 }
2602 
parseSegmentIndex(off64_t offset,size_t size)2603 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2604   ALOGV("MPEG4Extractor::parseSegmentIndex");
2605 
2606     if (size < 12) {
2607       return -EINVAL;
2608     }
2609 
2610     uint32_t flags;
2611     if (!mDataSource->getUInt32(offset, &flags)) {
2612         return ERROR_MALFORMED;
2613     }
2614 
2615     uint32_t version = flags >> 24;
2616     flags &= 0xffffff;
2617 
2618     ALOGV("sidx version %d", version);
2619 
2620     uint32_t referenceId;
2621     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2622         return ERROR_MALFORMED;
2623     }
2624 
2625     uint32_t timeScale;
2626     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2627         return ERROR_MALFORMED;
2628     }
2629     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2630     if (timeScale == 0)
2631         return ERROR_MALFORMED;
2632 
2633     uint64_t earliestPresentationTime;
2634     uint64_t firstOffset;
2635 
2636     offset += 12;
2637     size -= 12;
2638 
2639     if (version == 0) {
2640         if (size < 8) {
2641             return -EINVAL;
2642         }
2643         uint32_t tmp;
2644         if (!mDataSource->getUInt32(offset, &tmp)) {
2645             return ERROR_MALFORMED;
2646         }
2647         earliestPresentationTime = tmp;
2648         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2649             return ERROR_MALFORMED;
2650         }
2651         firstOffset = tmp;
2652         offset += 8;
2653         size -= 8;
2654     } else {
2655         if (size < 16) {
2656             return -EINVAL;
2657         }
2658         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2659             return ERROR_MALFORMED;
2660         }
2661         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2662             return ERROR_MALFORMED;
2663         }
2664         offset += 16;
2665         size -= 16;
2666     }
2667     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2668 
2669     if (size < 4) {
2670         return -EINVAL;
2671     }
2672 
2673     uint16_t referenceCount;
2674     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2675         return ERROR_MALFORMED;
2676     }
2677     offset += 4;
2678     size -= 4;
2679     ALOGV("refcount: %d", referenceCount);
2680 
2681     if (size < referenceCount * 12) {
2682         return -EINVAL;
2683     }
2684 
2685     uint64_t total_duration = 0;
2686     for (unsigned int i = 0; i < referenceCount; i++) {
2687         uint32_t d1, d2, d3;
2688 
2689         if (!mDataSource->getUInt32(offset, &d1) ||     // size
2690             !mDataSource->getUInt32(offset + 4, &d2) || // duration
2691             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2692             return ERROR_MALFORMED;
2693         }
2694 
2695         if (d1 & 0x80000000) {
2696             ALOGW("sub-sidx boxes not supported yet");
2697         }
2698         bool sap = d3 & 0x80000000;
2699         uint32_t saptype = (d3 >> 28) & 7;
2700         if (!sap || (saptype != 1 && saptype != 2)) {
2701             // type 1 and 2 are sync samples
2702             ALOGW("not a stream access point, or unsupported type: %08x", d3);
2703         }
2704         total_duration += d2;
2705         offset += 12;
2706         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2707         SidxEntry se;
2708         se.mSize = d1 & 0x7fffffff;
2709         se.mDurationUs = 1000000LL * d2 / timeScale;
2710         mSidxEntries.add(se);
2711     }
2712 
2713     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2714 
2715     if (mLastTrack == NULL)
2716         return ERROR_MALFORMED;
2717 
2718     int64_t metaDuration;
2719     if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2720         mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
2721     }
2722     return OK;
2723 }
2724 
parseQTMetaKey(off64_t offset,size_t size)2725 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2726     if (size < 8) {
2727         return ERROR_MALFORMED;
2728     }
2729 
2730     uint32_t count;
2731     if (!mDataSource->getUInt32(offset + 4, &count)) {
2732         return ERROR_MALFORMED;
2733     }
2734 
2735     if (mMetaKeyMap.size() > 0) {
2736         ALOGW("'keys' atom seen again, discarding existing entries");
2737         mMetaKeyMap.clear();
2738     }
2739 
2740     off64_t keyOffset = offset + 8;
2741     off64_t stopOffset = offset + size;
2742     for (size_t i = 1; i <= count; i++) {
2743         if (keyOffset + 8 > stopOffset) {
2744             return ERROR_MALFORMED;
2745         }
2746 
2747         uint32_t keySize;
2748         if (!mDataSource->getUInt32(keyOffset, &keySize)
2749                 || keySize < 8
2750                 || keyOffset + keySize > stopOffset) {
2751             return ERROR_MALFORMED;
2752         }
2753 
2754         uint32_t type;
2755         if (!mDataSource->getUInt32(keyOffset + 4, &type)
2756                 || type != FOURCC('m', 'd', 't', 'a')) {
2757             return ERROR_MALFORMED;
2758         }
2759 
2760         keySize -= 8;
2761         keyOffset += 8;
2762 
2763         auto keyData = heapbuffer<uint8_t>(keySize);
2764         if (keyData.get() == NULL) {
2765             return ERROR_MALFORMED;
2766         }
2767         if (mDataSource->readAt(
2768                 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
2769             return ERROR_MALFORMED;
2770         }
2771 
2772         AString key((const char *)keyData.get(), keySize);
2773         mMetaKeyMap.add(i, key);
2774 
2775         keyOffset += keySize;
2776     }
2777     return OK;
2778 }
2779 
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)2780 status_t MPEG4Extractor::parseQTMetaVal(
2781         int32_t keyId, off64_t offset, size_t size) {
2782     ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2783     if (index < 0) {
2784         // corresponding key is not present, ignore
2785         return ERROR_MALFORMED;
2786     }
2787 
2788     if (size <= 16) {
2789         return ERROR_MALFORMED;
2790     }
2791     uint32_t dataSize;
2792     if (!mDataSource->getUInt32(offset, &dataSize)
2793             || dataSize > size || dataSize <= 16) {
2794         return ERROR_MALFORMED;
2795     }
2796     uint32_t atomFourCC;
2797     if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2798             || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2799         return ERROR_MALFORMED;
2800     }
2801     uint32_t dataType;
2802     if (!mDataSource->getUInt32(offset + 8, &dataType)
2803             || ((dataType & 0xff000000) != 0)) {
2804         // not well-known type
2805         return ERROR_MALFORMED;
2806     }
2807 
2808     dataSize -= 16;
2809     offset += 16;
2810 
2811     if (dataType == 23 && dataSize >= 4) {
2812         // BE Float32
2813         uint32_t val;
2814         if (!mDataSource->getUInt32(offset, &val)) {
2815             return ERROR_MALFORMED;
2816         }
2817         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2818             mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
2819         }
2820     } else if (dataType == 67 && dataSize >= 4) {
2821         // BE signed int32
2822         uint32_t val;
2823         if (!mDataSource->getUInt32(offset, &val)) {
2824             return ERROR_MALFORMED;
2825         }
2826         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2827             mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
2828         }
2829     } else {
2830         // add more keys if needed
2831         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2832     }
2833 
2834     return OK;
2835 }
2836 
parseTrackHeader(off64_t data_offset,off64_t data_size)2837 status_t MPEG4Extractor::parseTrackHeader(
2838         off64_t data_offset, off64_t data_size) {
2839     if (data_size < 4) {
2840         return ERROR_MALFORMED;
2841     }
2842 
2843     uint8_t version;
2844     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2845         return ERROR_IO;
2846     }
2847 
2848     size_t dynSize = (version == 1) ? 36 : 24;
2849 
2850     uint8_t buffer[36 + 60];
2851 
2852     if (data_size != (off64_t)dynSize + 60) {
2853         return ERROR_MALFORMED;
2854     }
2855 
2856     if (mDataSource->readAt(
2857                 data_offset, buffer, data_size) < (ssize_t)data_size) {
2858         return ERROR_IO;
2859     }
2860 
2861     uint64_t ctime __unused, mtime __unused, duration __unused;
2862     int32_t id;
2863 
2864     if (version == 1) {
2865         ctime = U64_AT(&buffer[4]);
2866         mtime = U64_AT(&buffer[12]);
2867         id = U32_AT(&buffer[20]);
2868         duration = U64_AT(&buffer[28]);
2869     } else if (version == 0) {
2870         ctime = U32_AT(&buffer[4]);
2871         mtime = U32_AT(&buffer[8]);
2872         id = U32_AT(&buffer[12]);
2873         duration = U32_AT(&buffer[20]);
2874     } else {
2875         return ERROR_UNSUPPORTED;
2876     }
2877 
2878     if (mLastTrack == NULL)
2879         return ERROR_MALFORMED;
2880 
2881     mLastTrack->meta.setInt32(kKeyTrackID, id);
2882 
2883     size_t matrixOffset = dynSize + 16;
2884     int32_t a00 = U32_AT(&buffer[matrixOffset]);
2885     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2886     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2887     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2888 
2889 #if 0
2890     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2891     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2892 
2893     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2894          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2895     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2896          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2897 #endif
2898 
2899     uint32_t rotationDegrees;
2900 
2901     static const int32_t kFixedOne = 0x10000;
2902     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2903         // Identity, no rotation
2904         rotationDegrees = 0;
2905     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2906         rotationDegrees = 90;
2907     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2908         rotationDegrees = 270;
2909     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2910         rotationDegrees = 180;
2911     } else {
2912         ALOGW("We only support 0,90,180,270 degree rotation matrices");
2913         rotationDegrees = 0;
2914     }
2915 
2916     if (rotationDegrees != 0) {
2917         mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
2918     }
2919 
2920     // Handle presentation display size, which could be different
2921     // from the image size indicated by kKeyWidth and kKeyHeight.
2922     uint32_t width = U32_AT(&buffer[dynSize + 52]);
2923     uint32_t height = U32_AT(&buffer[dynSize + 56]);
2924     mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
2925     mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
2926 
2927     return OK;
2928 }
2929 
parseITunesMetaData(off64_t offset,size_t size)2930 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2931     if (size == 0) {
2932         return OK;
2933     }
2934 
2935     if (size < 4 || size == SIZE_MAX) {
2936         return ERROR_MALFORMED;
2937     }
2938 
2939     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2940     if (buffer == NULL) {
2941         return ERROR_MALFORMED;
2942     }
2943     if (mDataSource->readAt(
2944                 offset, buffer, size) != (ssize_t)size) {
2945         delete[] buffer;
2946         buffer = NULL;
2947 
2948         return ERROR_IO;
2949     }
2950 
2951     uint32_t flags = U32_AT(buffer);
2952 
2953     uint32_t metadataKey = 0;
2954     char chunk[5];
2955     MakeFourCCString(mPath[4], chunk);
2956     ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2957     switch ((int32_t)mPath[4]) {
2958         case FOURCC(0xa9, 'a', 'l', 'b'):
2959         {
2960             metadataKey = kKeyAlbum;
2961             break;
2962         }
2963         case FOURCC(0xa9, 'A', 'R', 'T'):
2964         {
2965             metadataKey = kKeyArtist;
2966             break;
2967         }
2968         case FOURCC('a', 'A', 'R', 'T'):
2969         {
2970             metadataKey = kKeyAlbumArtist;
2971             break;
2972         }
2973         case FOURCC(0xa9, 'd', 'a', 'y'):
2974         {
2975             metadataKey = kKeyYear;
2976             break;
2977         }
2978         case FOURCC(0xa9, 'n', 'a', 'm'):
2979         {
2980             metadataKey = kKeyTitle;
2981             break;
2982         }
2983         case FOURCC(0xa9, 'w', 'r', 't'):
2984         {
2985             metadataKey = kKeyWriter;
2986             break;
2987         }
2988         case FOURCC('c', 'o', 'v', 'r'):
2989         {
2990             metadataKey = kKeyAlbumArt;
2991             break;
2992         }
2993         case FOURCC('g', 'n', 'r', 'e'):
2994         {
2995             metadataKey = kKeyGenre;
2996             break;
2997         }
2998         case FOURCC(0xa9, 'g', 'e', 'n'):
2999         {
3000             metadataKey = kKeyGenre;
3001             break;
3002         }
3003         case FOURCC('c', 'p', 'i', 'l'):
3004         {
3005             if (size == 9 && flags == 21) {
3006                 char tmp[16];
3007                 sprintf(tmp, "%d",
3008                         (int)buffer[size - 1]);
3009 
3010                 mFileMetaData.setCString(kKeyCompilation, tmp);
3011             }
3012             break;
3013         }
3014         case FOURCC('t', 'r', 'k', 'n'):
3015         {
3016             if (size == 16 && flags == 0) {
3017                 char tmp[16];
3018                 uint16_t* pTrack = (uint16_t*)&buffer[10];
3019                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3020                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3021 
3022                 mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3023             }
3024             break;
3025         }
3026         case FOURCC('d', 'i', 's', 'k'):
3027         {
3028             if ((size == 14 || size == 16) && flags == 0) {
3029                 char tmp[16];
3030                 uint16_t* pDisc = (uint16_t*)&buffer[10];
3031                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3032                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3033 
3034                 mFileMetaData.setCString(kKeyDiscNumber, tmp);
3035             }
3036             break;
3037         }
3038         case FOURCC('-', '-', '-', '-'):
3039         {
3040             buffer[size] = '\0';
3041             switch (mPath[5]) {
3042                 case FOURCC('m', 'e', 'a', 'n'):
3043                     mLastCommentMean.setTo((const char *)buffer + 4);
3044                     break;
3045                 case FOURCC('n', 'a', 'm', 'e'):
3046                     mLastCommentName.setTo((const char *)buffer + 4);
3047                     break;
3048                 case FOURCC('d', 'a', 't', 'a'):
3049                     if (size < 8) {
3050                         delete[] buffer;
3051                         buffer = NULL;
3052                         ALOGE("b/24346430");
3053                         return ERROR_MALFORMED;
3054                     }
3055                     mLastCommentData.setTo((const char *)buffer + 8);
3056                     break;
3057             }
3058 
3059             // Once we have a set of mean/name/data info, go ahead and process
3060             // it to see if its something we are interested in.  Whether or not
3061             // were are interested in the specific tag, make sure to clear out
3062             // the set so we can be ready to process another tuple should one
3063             // show up later in the file.
3064             if ((mLastCommentMean.length() != 0) &&
3065                 (mLastCommentName.length() != 0) &&
3066                 (mLastCommentData.length() != 0)) {
3067 
3068                 if (mLastCommentMean == "com.apple.iTunes"
3069                         && mLastCommentName == "iTunSMPB") {
3070                     int32_t delay, padding;
3071                     if (sscanf(mLastCommentData,
3072                                " %*x %x %x %*x", &delay, &padding) == 2) {
3073                         if (mLastTrack == NULL) {
3074                             delete[] buffer;
3075                             return ERROR_MALFORMED;
3076                         }
3077 
3078                         mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
3079                         mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
3080                     }
3081                 }
3082 
3083                 mLastCommentMean.clear();
3084                 mLastCommentName.clear();
3085                 mLastCommentData.clear();
3086             }
3087             break;
3088         }
3089 
3090         default:
3091             break;
3092     }
3093 
3094     if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
3095         if (metadataKey == kKeyAlbumArt) {
3096             mFileMetaData.setData(
3097                     kKeyAlbumArt, MetaData::TYPE_NONE,
3098                     buffer + 8, size - 8);
3099         } else if (metadataKey == kKeyGenre) {
3100             if (flags == 0) {
3101                 // uint8_t genre code, iTunes genre codes are
3102                 // the standard id3 codes, except they start
3103                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3104                 // We use standard id3 numbering, so subtract 1.
3105                 int genrecode = (int)buffer[size - 1];
3106                 genrecode--;
3107                 if (genrecode < 0) {
3108                     genrecode = 255; // reserved for 'unknown genre'
3109                 }
3110                 char genre[10];
3111                 sprintf(genre, "%d", genrecode);
3112 
3113                 mFileMetaData.setCString(metadataKey, genre);
3114             } else if (flags == 1) {
3115                 // custom genre string
3116                 buffer[size] = '\0';
3117 
3118                 mFileMetaData.setCString(
3119                         metadataKey, (const char *)buffer + 8);
3120             }
3121         } else {
3122             buffer[size] = '\0';
3123 
3124             mFileMetaData.setCString(
3125                     metadataKey, (const char *)buffer + 8);
3126         }
3127     }
3128 
3129     delete[] buffer;
3130     buffer = NULL;
3131 
3132     return OK;
3133 }
3134 
parseColorInfo(off64_t offset,size_t size)3135 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3136     if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3137         return ERROR_MALFORMED;
3138     }
3139 
3140     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3141     if (buffer == NULL) {
3142         return ERROR_MALFORMED;
3143     }
3144     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3145         delete[] buffer;
3146         buffer = NULL;
3147 
3148         return ERROR_IO;
3149     }
3150 
3151     int32_t type = U32_AT(&buffer[0]);
3152     if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3153             || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3154         int32_t primaries = U16_AT(&buffer[4]);
3155         int32_t transfer = U16_AT(&buffer[6]);
3156         int32_t coeffs = U16_AT(&buffer[8]);
3157         bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3158 
3159         ColorAspects aspects;
3160         ColorUtils::convertIsoColorAspectsToCodecAspects(
3161                 primaries, transfer, coeffs, fullRange, aspects);
3162 
3163         // only store the first color specification
3164         if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
3165             mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3166             mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
3167             mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3168             mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
3169         }
3170     }
3171 
3172     delete[] buffer;
3173     buffer = NULL;
3174 
3175     return OK;
3176 }
3177 
parse3GPPMetaData(off64_t offset,size_t size,int depth)3178 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3179     if (size < 4 || size == SIZE_MAX) {
3180         return ERROR_MALFORMED;
3181     }
3182 
3183     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3184     if (buffer == NULL) {
3185         return ERROR_MALFORMED;
3186     }
3187     if (mDataSource->readAt(
3188                 offset, buffer, size) != (ssize_t)size) {
3189         delete[] buffer;
3190         buffer = NULL;
3191 
3192         return ERROR_IO;
3193     }
3194 
3195     uint32_t metadataKey = 0;
3196     switch (mPath[depth]) {
3197         case FOURCC('t', 'i', 't', 'l'):
3198         {
3199             metadataKey = kKeyTitle;
3200             break;
3201         }
3202         case FOURCC('p', 'e', 'r', 'f'):
3203         {
3204             metadataKey = kKeyArtist;
3205             break;
3206         }
3207         case FOURCC('a', 'u', 't', 'h'):
3208         {
3209             metadataKey = kKeyWriter;
3210             break;
3211         }
3212         case FOURCC('g', 'n', 'r', 'e'):
3213         {
3214             metadataKey = kKeyGenre;
3215             break;
3216         }
3217         case FOURCC('a', 'l', 'b', 'm'):
3218         {
3219             if (buffer[size - 1] != '\0') {
3220               char tmp[4];
3221               sprintf(tmp, "%u", buffer[size - 1]);
3222 
3223               mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3224             }
3225 
3226             metadataKey = kKeyAlbum;
3227             break;
3228         }
3229         case FOURCC('y', 'r', 'r', 'c'):
3230         {
3231             if (size < 6) {
3232                 delete[] buffer;
3233                 buffer = NULL;
3234                 ALOGE("b/62133227");
3235                 android_errorWriteLog(0x534e4554, "62133227");
3236                 return ERROR_MALFORMED;
3237             }
3238             char tmp[5];
3239             uint16_t year = U16_AT(&buffer[4]);
3240 
3241             if (year < 10000) {
3242                 sprintf(tmp, "%u", year);
3243 
3244                 mFileMetaData.setCString(kKeyYear, tmp);
3245             }
3246             break;
3247         }
3248 
3249         default:
3250             break;
3251     }
3252 
3253     if (metadataKey > 0) {
3254         bool isUTF8 = true; // Common case
3255         char16_t *framedata = NULL;
3256         int len16 = 0; // Number of UTF-16 characters
3257 
3258         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3259         if (size < 6) {
3260             delete[] buffer;
3261             buffer = NULL;
3262             return ERROR_MALFORMED;
3263         }
3264 
3265         if (size - 6 >= 4) {
3266             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3267             framedata = (char16_t *)(buffer + 6);
3268             if (0xfffe == *framedata) {
3269                 // endianness marker (BOM) doesn't match host endianness
3270                 for (int i = 0; i < len16; i++) {
3271                     framedata[i] = bswap_16(framedata[i]);
3272                 }
3273                 // BOM is now swapped to 0xfeff, we will execute next block too
3274             }
3275 
3276             if (0xfeff == *framedata) {
3277                 // Remove the BOM
3278                 framedata++;
3279                 len16--;
3280                 isUTF8 = false;
3281             }
3282             // else normal non-zero-length UTF-8 string
3283             // we can't handle UTF-16 without BOM as there is no other
3284             // indication of encoding.
3285         }
3286 
3287         if (isUTF8) {
3288             buffer[size] = 0;
3289             mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
3290         } else {
3291             // Convert from UTF-16 string to UTF-8 string.
3292             String8 tmpUTF8str(framedata, len16);
3293             mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
3294         }
3295     }
3296 
3297     delete[] buffer;
3298     buffer = NULL;
3299 
3300     return OK;
3301 }
3302 
parseID3v2MetaData(off64_t offset)3303 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3304     ID3 id3(mDataSource, true /* ignorev1 */, offset);
3305 
3306     if (id3.isValid()) {
3307         struct Map {
3308             int key;
3309             const char *tag1;
3310             const char *tag2;
3311         };
3312         static const Map kMap[] = {
3313             { kKeyAlbum, "TALB", "TAL" },
3314             { kKeyArtist, "TPE1", "TP1" },
3315             { kKeyAlbumArtist, "TPE2", "TP2" },
3316             { kKeyComposer, "TCOM", "TCM" },
3317             { kKeyGenre, "TCON", "TCO" },
3318             { kKeyTitle, "TIT2", "TT2" },
3319             { kKeyYear, "TYE", "TYER" },
3320             { kKeyAuthor, "TXT", "TEXT" },
3321             { kKeyCDTrackNumber, "TRK", "TRCK" },
3322             { kKeyDiscNumber, "TPA", "TPOS" },
3323             { kKeyCompilation, "TCP", "TCMP" },
3324         };
3325         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3326 
3327         for (size_t i = 0; i < kNumMapEntries; ++i) {
3328             if (!mFileMetaData.hasData(kMap[i].key)) {
3329                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3330                 if (it->done()) {
3331                     delete it;
3332                     it = new ID3::Iterator(id3, kMap[i].tag2);
3333                 }
3334 
3335                 if (it->done()) {
3336                     delete it;
3337                     continue;
3338                 }
3339 
3340                 String8 s;
3341                 it->getString(&s);
3342                 delete it;
3343 
3344                 mFileMetaData.setCString(kMap[i].key, s);
3345             }
3346         }
3347 
3348         size_t dataSize;
3349         String8 mime;
3350         const void *data = id3.getAlbumArt(&dataSize, &mime);
3351 
3352         if (data) {
3353             mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3354             mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
3355         }
3356     }
3357 }
3358 
getTrack(size_t index)3359 MediaTrack *MPEG4Extractor::getTrack(size_t index) {
3360     status_t err;
3361     if ((err = readMetaData()) != OK) {
3362         return NULL;
3363     }
3364 
3365     Track *track = mFirstTrack;
3366     while (index > 0) {
3367         if (track == NULL) {
3368             return NULL;
3369         }
3370 
3371         track = track->next;
3372         --index;
3373     }
3374 
3375     if (track == NULL) {
3376         return NULL;
3377     }
3378 
3379 
3380     Trex *trex = NULL;
3381     int32_t trackId;
3382     if (track->meta.findInt32(kKeyTrackID, &trackId)) {
3383         for (size_t i = 0; i < mTrex.size(); i++) {
3384             Trex *t = &mTrex.editItemAt(i);
3385             if (t->track_ID == (uint32_t) trackId) {
3386                 trex = t;
3387                 break;
3388             }
3389         }
3390     } else {
3391         ALOGE("b/21657957");
3392         return NULL;
3393     }
3394 
3395     ALOGV("getTrack called, pssh: %zu", mPssh.size());
3396 
3397     const char *mime;
3398     if (!track->meta.findCString(kKeyMIMEType, &mime)) {
3399         return NULL;
3400     }
3401 
3402     sp<ItemTable> itemTable;
3403     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3404         uint32_t type;
3405         const void *data;
3406         size_t size;
3407         if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
3408             return NULL;
3409         }
3410 
3411         const uint8_t *ptr = (const uint8_t *)data;
3412 
3413         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3414             return NULL;
3415         }
3416     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
3417             || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3418         uint32_t type;
3419         const void *data;
3420         size_t size;
3421         if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
3422             return NULL;
3423         }
3424 
3425         const uint8_t *ptr = (const uint8_t *)data;
3426 
3427         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3428             return NULL;
3429         }
3430         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3431             itemTable = mItemTable;
3432         }
3433     }
3434 
3435     MPEG4Source *source =  new MPEG4Source(
3436             track->meta, mDataSource, track->timescale, track->sampleTable,
3437             mSidxEntries, trex, mMoofOffset, itemTable);
3438     if (source->init() != OK) {
3439         delete source;
3440         return NULL;
3441     }
3442     return source;
3443 }
3444 
3445 // static
verifyTrack(Track * track)3446 status_t MPEG4Extractor::verifyTrack(Track *track) {
3447     const char *mime;
3448     CHECK(track->meta.findCString(kKeyMIMEType, &mime));
3449 
3450     uint32_t type;
3451     const void *data;
3452     size_t size;
3453     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3454         if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
3455                 || type != kTypeAVCC) {
3456             return ERROR_MALFORMED;
3457         }
3458     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3459         if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
3460                     || type != kTypeHVCC) {
3461             return ERROR_MALFORMED;
3462         }
3463     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3464             || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3465             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3466         if (!track->meta.findData(kKeyESDS, &type, &data, &size)
3467                 || type != kTypeESDS) {
3468             return ERROR_MALFORMED;
3469         }
3470     }
3471 
3472     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3473         // Make sure we have all the metadata we need.
3474         ALOGE("stbl atom missing/invalid.");
3475         return ERROR_MALFORMED;
3476     }
3477 
3478     if (track->timescale == 0) {
3479         ALOGE("timescale invalid.");
3480         return ERROR_MALFORMED;
3481     }
3482 
3483     return OK;
3484 }
3485 
3486 typedef enum {
3487     //AOT_NONE             = -1,
3488     //AOT_NULL_OBJECT      = 0,
3489     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3490     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3491     //AOT_AAC_SSR          = 3,
3492     //AOT_AAC_LTP          = 4,
3493     AOT_SBR              = 5,
3494     //AOT_AAC_SCAL         = 6,
3495     //AOT_TWIN_VQ          = 7,
3496     //AOT_CELP             = 8,
3497     //AOT_HVXC             = 9,
3498     //AOT_RSVD_10          = 10, /**< (reserved)                                */
3499     //AOT_RSVD_11          = 11, /**< (reserved)                                */
3500     //AOT_TTSI             = 12, /**< TTSI Object                               */
3501     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3502     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3503     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3504     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3505     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3506     //AOT_RSVD_18          = 18, /**< (reserved)                                */
3507     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3508     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3509     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3510     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3511     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3512     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3513     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3514     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3515     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3516     //AOT_RSVD_28          = 28, /**< might become SSC                          */
3517     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3518     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3519 
3520     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3521 
3522     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3523     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3524     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3525     //AOT_RSVD_35          = 35, /**< might become DST                          */
3526     //AOT_RSVD_36          = 36, /**< might become ALS                          */
3527     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3528     //AOT_SLS              = 38, /**< SLS                                       */
3529     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3530 
3531     //AOT_USAC             = 42, /**< USAC                                      */
3532     //AOT_SAOC             = 43, /**< SAOC                                      */
3533     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3534 
3535     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3536 } AUDIO_OBJECT_TYPE;
3537 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)3538 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3539         const void *esds_data, size_t esds_size) {
3540     ESDS esds(esds_data, esds_size);
3541 
3542     uint8_t objectTypeIndication;
3543     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3544         return ERROR_MALFORMED;
3545     }
3546 
3547     if (objectTypeIndication == 0xe1) {
3548         // This isn't MPEG4 audio at all, it's QCELP 14k...
3549         if (mLastTrack == NULL)
3550             return ERROR_MALFORMED;
3551 
3552         mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3553         return OK;
3554     }
3555 
3556     if (objectTypeIndication  == 0x6b) {
3557         // The media subtype is MP3 audio
3558         // Our software MP3 audio decoder may not be able to handle
3559         // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3560         ALOGE("MP3 track in MP4/3GPP file is not supported");
3561         return ERROR_UNSUPPORTED;
3562     }
3563 
3564     if (mLastTrack != NULL) {
3565         uint32_t maxBitrate = 0;
3566         uint32_t avgBitrate = 0;
3567         esds.getBitRate(&maxBitrate, &avgBitrate);
3568         if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
3569             mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
3570         }
3571         if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
3572             mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
3573         }
3574     }
3575 
3576     const uint8_t *csd;
3577     size_t csd_size;
3578     if (esds.getCodecSpecificInfo(
3579                 (const void **)&csd, &csd_size) != OK) {
3580         return ERROR_MALFORMED;
3581     }
3582 
3583     if (kUseHexDump) {
3584         printf("ESD of size %zu\n", csd_size);
3585         hexdump(csd, csd_size);
3586     }
3587 
3588     if (csd_size == 0) {
3589         // There's no further information, i.e. no codec specific data
3590         // Let's assume that the information provided in the mpeg4 headers
3591         // is accurate and hope for the best.
3592 
3593         return OK;
3594     }
3595 
3596     if (csd_size < 2) {
3597         return ERROR_MALFORMED;
3598     }
3599 
3600     static uint32_t kSamplingRate[] = {
3601         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3602         16000, 12000, 11025, 8000, 7350
3603     };
3604 
3605     ABitReader br(csd, csd_size);
3606     uint32_t objectType = br.getBits(5);
3607 
3608     if (objectType == 31) {  // AAC-ELD => additional 6 bits
3609         objectType = 32 + br.getBits(6);
3610     }
3611 
3612     if (mLastTrack == NULL)
3613         return ERROR_MALFORMED;
3614 
3615     //keep AOT type
3616     mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
3617 
3618     uint32_t freqIndex = br.getBits(4);
3619 
3620     int32_t sampleRate = 0;
3621     int32_t numChannels = 0;
3622     if (freqIndex == 15) {
3623         if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3624         sampleRate = br.getBits(24);
3625         numChannels = br.getBits(4);
3626     } else {
3627         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3628         numChannels = br.getBits(4);
3629 
3630         if (freqIndex == 13 || freqIndex == 14) {
3631             return ERROR_MALFORMED;
3632         }
3633 
3634         sampleRate = kSamplingRate[freqIndex];
3635     }
3636 
3637     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3638         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3639         uint32_t extFreqIndex = br.getBits(4);
3640         int32_t extSampleRate __unused;
3641         if (extFreqIndex == 15) {
3642             if (csd_size < 8) {
3643                 return ERROR_MALFORMED;
3644             }
3645             if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3646             extSampleRate = br.getBits(24);
3647         } else {
3648             if (extFreqIndex == 13 || extFreqIndex == 14) {
3649                 return ERROR_MALFORMED;
3650             }
3651             extSampleRate = kSamplingRate[extFreqIndex];
3652         }
3653         //TODO: save the extension sampling rate value in meta data =>
3654         //      mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
3655     }
3656 
3657     switch (numChannels) {
3658         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3659         case 0:
3660         case 1:// FC
3661         case 2:// FL FR
3662         case 3:// FC, FL FR
3663         case 4:// FC, FL FR, RC
3664         case 5:// FC, FL FR, SL SR
3665         case 6:// FC, FL FR, SL SR, LFE
3666             //numChannels already contains the right value
3667             break;
3668         case 11:// FC, FL FR, SL SR, RC, LFE
3669             numChannels = 7;
3670             break;
3671         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3672         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3673         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3674             numChannels = 8;
3675             break;
3676         default:
3677             return ERROR_UNSUPPORTED;
3678     }
3679 
3680     {
3681         if (objectType == AOT_SBR || objectType == AOT_PS) {
3682             if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3683             objectType = br.getBits(5);
3684 
3685             if (objectType == AOT_ESCAPE) {
3686                 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3687                 objectType = 32 + br.getBits(6);
3688             }
3689         }
3690         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3691                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3692                 objectType == AOT_ER_BSAC) {
3693             if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3694             const int32_t frameLengthFlag __unused = br.getBits(1);
3695 
3696             const int32_t dependsOnCoreCoder = br.getBits(1);
3697 
3698             if (dependsOnCoreCoder ) {
3699                 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3700                 const int32_t coreCoderDelay __unused = br.getBits(14);
3701             }
3702 
3703             int32_t extensionFlag = -1;
3704             if (br.numBitsLeft() > 0) {
3705                 extensionFlag = br.getBits(1);
3706             } else {
3707                 switch (objectType) {
3708                 // 14496-3 4.5.1.1 extensionFlag
3709                 case AOT_AAC_LC:
3710                     extensionFlag = 0;
3711                     break;
3712                 case AOT_ER_AAC_LC:
3713                 case AOT_ER_AAC_SCAL:
3714                 case AOT_ER_BSAC:
3715                 case AOT_ER_AAC_LD:
3716                     extensionFlag = 1;
3717                     break;
3718                 default:
3719                     return ERROR_MALFORMED;
3720                     break;
3721                 }
3722                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
3723                         extensionFlag, objectType);
3724             }
3725 
3726             if (numChannels == 0) {
3727                 int32_t channelsEffectiveNum = 0;
3728                 int32_t channelsNum = 0;
3729                 if (br.numBitsLeft() < 32) {
3730                     return ERROR_MALFORMED;
3731                 }
3732                 const int32_t ElementInstanceTag __unused = br.getBits(4);
3733                 const int32_t Profile __unused = br.getBits(2);
3734                 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3735                 const int32_t NumFrontChannelElements = br.getBits(4);
3736                 const int32_t NumSideChannelElements = br.getBits(4);
3737                 const int32_t NumBackChannelElements = br.getBits(4);
3738                 const int32_t NumLfeChannelElements = br.getBits(2);
3739                 const int32_t NumAssocDataElements __unused = br.getBits(3);
3740                 const int32_t NumValidCcElements __unused = br.getBits(4);
3741 
3742                 const int32_t MonoMixdownPresent = br.getBits(1);
3743 
3744                 if (MonoMixdownPresent != 0) {
3745                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3746                     const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3747                 }
3748 
3749                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3750                 const int32_t StereoMixdownPresent = br.getBits(1);
3751                 if (StereoMixdownPresent != 0) {
3752                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3753                     const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3754                 }
3755 
3756                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3757                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3758                 if (MatrixMixdownIndexPresent != 0) {
3759                     if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3760                     const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3761                     const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3762                 }
3763 
3764                 int i;
3765                 for (i=0; i < NumFrontChannelElements; i++) {
3766                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3767                     const int32_t FrontElementIsCpe = br.getBits(1);
3768                     const int32_t FrontElementTagSelect __unused = br.getBits(4);
3769                     channelsNum += FrontElementIsCpe ? 2 : 1;
3770                 }
3771 
3772                 for (i=0; i < NumSideChannelElements; i++) {
3773                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3774                     const int32_t SideElementIsCpe = br.getBits(1);
3775                     const int32_t SideElementTagSelect __unused = br.getBits(4);
3776                     channelsNum += SideElementIsCpe ? 2 : 1;
3777                 }
3778 
3779                 for (i=0; i < NumBackChannelElements; i++) {
3780                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3781                     const int32_t BackElementIsCpe = br.getBits(1);
3782                     const int32_t BackElementTagSelect __unused = br.getBits(4);
3783                     channelsNum += BackElementIsCpe ? 2 : 1;
3784                 }
3785                 channelsEffectiveNum = channelsNum;
3786 
3787                 for (i=0; i < NumLfeChannelElements; i++) {
3788                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3789                     const int32_t LfeElementTagSelect __unused = br.getBits(4);
3790                     channelsNum += 1;
3791                 }
3792                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3793                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3794                 numChannels = channelsNum;
3795             }
3796         }
3797     }
3798 
3799     if (numChannels == 0) {
3800         return ERROR_UNSUPPORTED;
3801     }
3802 
3803     if (mLastTrack == NULL)
3804         return ERROR_MALFORMED;
3805 
3806     int32_t prevSampleRate;
3807     CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
3808 
3809     if (prevSampleRate != sampleRate) {
3810         ALOGV("mpeg4 audio sample rate different from previous setting. "
3811              "was: %d, now: %d", prevSampleRate, sampleRate);
3812     }
3813 
3814     mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
3815 
3816     int32_t prevChannelCount;
3817     CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
3818 
3819     if (prevChannelCount != numChannels) {
3820         ALOGV("mpeg4 audio channel count different from previous setting. "
3821              "was: %d, now: %d", prevChannelCount, numChannels);
3822     }
3823 
3824     mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
3825 
3826     return OK;
3827 }
3828 
3829 ////////////////////////////////////////////////////////////////////////////////
3830 
MPEG4Source(MetaDataBase & format,DataSourceBase * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable)3831 MPEG4Source::MPEG4Source(
3832         MetaDataBase &format,
3833         DataSourceBase *dataSource,
3834         int32_t timeScale,
3835         const sp<SampleTable> &sampleTable,
3836         Vector<SidxEntry> &sidx,
3837         const Trex *trex,
3838         off64_t firstMoofOffset,
3839         const sp<ItemTable> &itemTable)
3840     : mFormat(format),
3841       mDataSource(dataSource),
3842       mTimescale(timeScale),
3843       mSampleTable(sampleTable),
3844       mCurrentSampleIndex(0),
3845       mCurrentFragmentIndex(0),
3846       mSegments(sidx),
3847       mTrex(trex),
3848       mFirstMoofOffset(firstMoofOffset),
3849       mCurrentMoofOffset(firstMoofOffset),
3850       mNextMoofOffset(-1),
3851       mCurrentTime(0),
3852       mDefaultEncryptedByteBlock(0),
3853       mDefaultSkipByteBlock(0),
3854       mCurrentSampleInfoAllocSize(0),
3855       mCurrentSampleInfoSizes(NULL),
3856       mCurrentSampleInfoOffsetsAllocSize(0),
3857       mCurrentSampleInfoOffsets(NULL),
3858       mIsAVC(false),
3859       mIsHEVC(false),
3860       mNALLengthSize(0),
3861       mStarted(false),
3862       mGroup(NULL),
3863       mBuffer(NULL),
3864       mWantsNALFragments(false),
3865       mSrcBuffer(NULL),
3866       mIsHeif(itemTable != NULL),
3867       mItemTable(itemTable) {
3868 
3869     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3870 
3871     mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
3872     mDefaultIVSize = 0;
3873     mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3874     uint32_t keytype;
3875     const void *key;
3876     size_t keysize;
3877     if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3878         CHECK(keysize <= 16);
3879         memset(mCryptoKey, 0, 16);
3880         memcpy(mCryptoKey, key, keysize);
3881     }
3882 
3883     mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock);
3884     mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock);
3885 
3886     const char *mime;
3887     bool success = mFormat.findCString(kKeyMIMEType, &mime);
3888     CHECK(success);
3889 
3890     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3891     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
3892               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
3893 
3894     if (mIsAVC) {
3895         uint32_t type;
3896         const void *data;
3897         size_t size;
3898         CHECK(format.findData(kKeyAVCC, &type, &data, &size));
3899 
3900         const uint8_t *ptr = (const uint8_t *)data;
3901 
3902         CHECK(size >= 7);
3903         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3904 
3905         // The number of bytes used to encode the length of a NAL unit.
3906         mNALLengthSize = 1 + (ptr[4] & 3);
3907     } else if (mIsHEVC) {
3908         uint32_t type;
3909         const void *data;
3910         size_t size;
3911         CHECK(format.findData(kKeyHVCC, &type, &data, &size));
3912 
3913         const uint8_t *ptr = (const uint8_t *)data;
3914 
3915         CHECK(size >= 22);
3916         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3917 
3918         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3919     }
3920 
3921     CHECK(format.findInt32(kKeyTrackID, &mTrackId));
3922 
3923 }
3924 
init()3925 status_t MPEG4Source::init() {
3926     if (mFirstMoofOffset != 0) {
3927         off64_t offset = mFirstMoofOffset;
3928         return parseChunk(&offset);
3929     }
3930     return OK;
3931 }
3932 
~MPEG4Source()3933 MPEG4Source::~MPEG4Source() {
3934     if (mStarted) {
3935         stop();
3936     }
3937     free(mCurrentSampleInfoSizes);
3938     free(mCurrentSampleInfoOffsets);
3939 }
3940 
start(MetaDataBase * params)3941 status_t MPEG4Source::start(MetaDataBase *params) {
3942     Mutex::Autolock autoLock(mLock);
3943 
3944     CHECK(!mStarted);
3945 
3946     int32_t val;
3947     if (params && params->findInt32(kKeyWantsNALFragments, &val)
3948         && val != 0) {
3949         mWantsNALFragments = true;
3950     } else {
3951         mWantsNALFragments = false;
3952     }
3953 
3954     int32_t tmp;
3955     CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
3956     size_t max_size = tmp;
3957 
3958     // A somewhat arbitrary limit that should be sufficient for 8k video frames
3959     // If you see the message below for a valid input stream: increase the limit
3960     const size_t kMaxBufferSize = 64 * 1024 * 1024;
3961     if (max_size > kMaxBufferSize) {
3962         ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
3963         return ERROR_MALFORMED;
3964     }
3965     if (max_size == 0) {
3966         ALOGE("zero max input size");
3967         return ERROR_MALFORMED;
3968     }
3969 
3970     // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
3971     const size_t kInitialBuffers = 2;
3972     const size_t kMaxBuffers = 8;
3973     const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
3974     mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers);
3975     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3976     if (mSrcBuffer == NULL) {
3977         // file probably specified a bad max size
3978         delete mGroup;
3979         mGroup = NULL;
3980         return ERROR_MALFORMED;
3981     }
3982 
3983     mStarted = true;
3984 
3985     return OK;
3986 }
3987 
stop()3988 status_t MPEG4Source::stop() {
3989     Mutex::Autolock autoLock(mLock);
3990 
3991     CHECK(mStarted);
3992 
3993     if (mBuffer != NULL) {
3994         mBuffer->release();
3995         mBuffer = NULL;
3996     }
3997 
3998     delete[] mSrcBuffer;
3999     mSrcBuffer = NULL;
4000 
4001     delete mGroup;
4002     mGroup = NULL;
4003 
4004     mStarted = false;
4005     mCurrentSampleIndex = 0;
4006 
4007     return OK;
4008 }
4009 
parseChunk(off64_t * offset)4010 status_t MPEG4Source::parseChunk(off64_t *offset) {
4011     uint32_t hdr[2];
4012     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4013         return ERROR_IO;
4014     }
4015     uint64_t chunk_size = ntohl(hdr[0]);
4016     uint32_t chunk_type = ntohl(hdr[1]);
4017     off64_t data_offset = *offset + 8;
4018 
4019     if (chunk_size == 1) {
4020         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4021             return ERROR_IO;
4022         }
4023         chunk_size = ntoh64(chunk_size);
4024         data_offset += 8;
4025 
4026         if (chunk_size < 16) {
4027             // The smallest valid chunk is 16 bytes long in this case.
4028             return ERROR_MALFORMED;
4029         }
4030     } else if (chunk_size < 8) {
4031         // The smallest valid chunk is 8 bytes long.
4032         return ERROR_MALFORMED;
4033     }
4034 
4035     char chunk[5];
4036     MakeFourCCString(chunk_type, chunk);
4037     ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4038 
4039     off64_t chunk_data_size = *offset + chunk_size - data_offset;
4040 
4041     switch(chunk_type) {
4042 
4043         case FOURCC('t', 'r', 'a', 'f'):
4044         case FOURCC('m', 'o', 'o', 'f'): {
4045             off64_t stop_offset = *offset + chunk_size;
4046             *offset = data_offset;
4047             while (*offset < stop_offset) {
4048                 status_t err = parseChunk(offset);
4049                 if (err != OK) {
4050                     return err;
4051                 }
4052             }
4053             if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4054                 // *offset points to the box following this moof. Find the next moof from there.
4055 
4056                 while (true) {
4057                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4058                         // no more box to the end of file.
4059                         break;
4060                     }
4061                     chunk_size = ntohl(hdr[0]);
4062                     chunk_type = ntohl(hdr[1]);
4063                     if (chunk_size == 1) {
4064                         // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4065                         // which is defined in 4.2 Object Structure.
4066                         // When chunk_size==1, 8 bytes follows as "largesize".
4067                         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4068                             return ERROR_IO;
4069                         }
4070                         chunk_size = ntoh64(chunk_size);
4071                         if (chunk_size < 16) {
4072                             // The smallest valid chunk is 16 bytes long in this case.
4073                             return ERROR_MALFORMED;
4074                         }
4075                     } else if (chunk_size == 0) {
4076                         // next box extends to end of file.
4077                     } else if (chunk_size < 8) {
4078                         // The smallest valid chunk is 8 bytes long in this case.
4079                         return ERROR_MALFORMED;
4080                     }
4081 
4082                     if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4083                         mNextMoofOffset = *offset;
4084                         break;
4085                     } else if (chunk_size == 0) {
4086                         break;
4087                     }
4088                     *offset += chunk_size;
4089                 }
4090             }
4091             break;
4092         }
4093 
4094         case FOURCC('t', 'f', 'h', 'd'): {
4095                 status_t err;
4096                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4097                     return err;
4098                 }
4099                 *offset += chunk_size;
4100                 break;
4101         }
4102 
4103         case FOURCC('t', 'r', 'u', 'n'): {
4104                 status_t err;
4105                 if (mLastParsedTrackId == mTrackId) {
4106                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4107                         return err;
4108                     }
4109                 }
4110 
4111                 *offset += chunk_size;
4112                 break;
4113         }
4114 
4115         case FOURCC('s', 'a', 'i', 'z'): {
4116             status_t err;
4117             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4118                 return err;
4119             }
4120             *offset += chunk_size;
4121             break;
4122         }
4123         case FOURCC('s', 'a', 'i', 'o'): {
4124             status_t err;
4125             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
4126                 return err;
4127             }
4128             *offset += chunk_size;
4129             break;
4130         }
4131 
4132         case FOURCC('s', 'e', 'n', 'c'): {
4133             status_t err;
4134             if ((err = parseSampleEncryption(data_offset)) != OK) {
4135                 return err;
4136             }
4137             *offset += chunk_size;
4138             break;
4139         }
4140 
4141         case FOURCC('m', 'd', 'a', 't'): {
4142             // parse DRM info if present
4143             ALOGV("MPEG4Source::parseChunk mdat");
4144             // if saiz/saoi was previously observed, do something with the sampleinfos
4145             *offset += chunk_size;
4146             break;
4147         }
4148 
4149         default: {
4150             *offset += chunk_size;
4151             break;
4152         }
4153     }
4154     return OK;
4155 }
4156 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)4157 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4158         off64_t offset, off64_t /* size */) {
4159     ALOGV("parseSampleAuxiliaryInformationSizes");
4160     // 14496-12 8.7.12
4161     uint8_t version;
4162     if (mDataSource->readAt(
4163             offset, &version, sizeof(version))
4164             < (ssize_t)sizeof(version)) {
4165         return ERROR_IO;
4166     }
4167 
4168     if (version != 0) {
4169         return ERROR_UNSUPPORTED;
4170     }
4171     offset++;
4172 
4173     uint32_t flags;
4174     if (!mDataSource->getUInt24(offset, &flags)) {
4175         return ERROR_IO;
4176     }
4177     offset += 3;
4178 
4179     if (flags & 1) {
4180         uint32_t tmp;
4181         if (!mDataSource->getUInt32(offset, &tmp)) {
4182             return ERROR_MALFORMED;
4183         }
4184         mCurrentAuxInfoType = tmp;
4185         offset += 4;
4186         if (!mDataSource->getUInt32(offset, &tmp)) {
4187             return ERROR_MALFORMED;
4188         }
4189         mCurrentAuxInfoTypeParameter = tmp;
4190         offset += 4;
4191     }
4192 
4193     uint8_t defsize;
4194     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
4195         return ERROR_MALFORMED;
4196     }
4197     mCurrentDefaultSampleInfoSize = defsize;
4198     offset++;
4199 
4200     uint32_t smplcnt;
4201     if (!mDataSource->getUInt32(offset, &smplcnt)) {
4202         return ERROR_MALFORMED;
4203     }
4204     mCurrentSampleInfoCount = smplcnt;
4205     offset += 4;
4206 
4207     if (mCurrentDefaultSampleInfoSize != 0) {
4208         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
4209         return OK;
4210     }
4211     if (smplcnt > mCurrentSampleInfoAllocSize) {
4212         uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
4213         if (newPtr == NULL) {
4214             ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
4215             return NO_MEMORY;
4216         }
4217         mCurrentSampleInfoSizes = newPtr;
4218         mCurrentSampleInfoAllocSize = smplcnt;
4219     }
4220 
4221     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
4222     return OK;
4223 }
4224 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)4225 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
4226         off64_t offset, off64_t /* size */) {
4227     ALOGV("parseSampleAuxiliaryInformationOffsets");
4228     // 14496-12 8.7.13
4229     uint8_t version;
4230     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
4231         return ERROR_IO;
4232     }
4233     offset++;
4234 
4235     uint32_t flags;
4236     if (!mDataSource->getUInt24(offset, &flags)) {
4237         return ERROR_IO;
4238     }
4239     offset += 3;
4240 
4241     uint32_t entrycount;
4242     if (!mDataSource->getUInt32(offset, &entrycount)) {
4243         return ERROR_IO;
4244     }
4245     offset += 4;
4246     if (entrycount == 0) {
4247         return OK;
4248     }
4249     if (entrycount > UINT32_MAX / 8) {
4250         return ERROR_MALFORMED;
4251     }
4252 
4253     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
4254         uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
4255         if (newPtr == NULL) {
4256             ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
4257             return NO_MEMORY;
4258         }
4259         mCurrentSampleInfoOffsets = newPtr;
4260         mCurrentSampleInfoOffsetsAllocSize = entrycount;
4261     }
4262     mCurrentSampleInfoOffsetCount = entrycount;
4263 
4264     if (mCurrentSampleInfoOffsets == NULL) {
4265         return OK;
4266     }
4267 
4268     for (size_t i = 0; i < entrycount; i++) {
4269         if (version == 0) {
4270             uint32_t tmp;
4271             if (!mDataSource->getUInt32(offset, &tmp)) {
4272                 return ERROR_IO;
4273             }
4274             mCurrentSampleInfoOffsets[i] = tmp;
4275             offset += 4;
4276         } else {
4277             uint64_t tmp;
4278             if (!mDataSource->getUInt64(offset, &tmp)) {
4279                 return ERROR_IO;
4280             }
4281             mCurrentSampleInfoOffsets[i] = tmp;
4282             offset += 8;
4283         }
4284     }
4285 
4286     // parse clear/encrypted data
4287 
4288     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
4289 
4290     drmoffset += mCurrentMoofOffset;
4291 
4292     return parseClearEncryptedSizes(drmoffset, false, 0);
4293 }
4294 
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)4295 status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
4296 
4297     int ivlength;
4298     CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
4299 
4300     // only 0, 8 and 16 byte initialization vectors are supported
4301     if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
4302         ALOGW("unsupported IV length: %d", ivlength);
4303         return ERROR_MALFORMED;
4304     }
4305 
4306     uint32_t sampleCount = mCurrentSampleInfoCount;
4307     if (isSubsampleEncryption) {
4308         if (!mDataSource->getUInt32(offset, &sampleCount)) {
4309             return ERROR_IO;
4310         }
4311         offset += 4;
4312     }
4313 
4314     // read CencSampleAuxiliaryDataFormats
4315     for (size_t i = 0; i < sampleCount; i++) {
4316         if (i >= mCurrentSamples.size()) {
4317             ALOGW("too few samples");
4318             break;
4319         }
4320         Sample *smpl = &mCurrentSamples.editItemAt(i);
4321         if (!smpl->clearsizes.isEmpty()) {
4322             continue;
4323         }
4324 
4325         memset(smpl->iv, 0, 16);
4326         if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
4327             return ERROR_IO;
4328         }
4329 
4330         offset += ivlength;
4331 
4332         bool readSubsamples;
4333         if (isSubsampleEncryption) {
4334             readSubsamples = flags & 2;
4335         } else {
4336             int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
4337             if (smplinfosize == 0) {
4338                 smplinfosize = mCurrentSampleInfoSizes[i];
4339             }
4340             readSubsamples = smplinfosize > ivlength;
4341         }
4342 
4343         if (readSubsamples) {
4344             uint16_t numsubsamples;
4345             if (!mDataSource->getUInt16(offset, &numsubsamples)) {
4346                 return ERROR_IO;
4347             }
4348             offset += 2;
4349             for (size_t j = 0; j < numsubsamples; j++) {
4350                 uint16_t numclear;
4351                 uint32_t numencrypted;
4352                 if (!mDataSource->getUInt16(offset, &numclear)) {
4353                     return ERROR_IO;
4354                 }
4355                 offset += 2;
4356                 if (!mDataSource->getUInt32(offset, &numencrypted)) {
4357                     return ERROR_IO;
4358                 }
4359                 offset += 4;
4360                 smpl->clearsizes.add(numclear);
4361                 smpl->encryptedsizes.add(numencrypted);
4362             }
4363         } else {
4364             smpl->clearsizes.add(0);
4365             smpl->encryptedsizes.add(smpl->size);
4366         }
4367     }
4368 
4369     return OK;
4370 }
4371 
parseSampleEncryption(off64_t offset)4372 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
4373     uint32_t flags;
4374     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4375         return ERROR_MALFORMED;
4376     }
4377     return parseClearEncryptedSizes(offset + 4, true, flags);
4378 }
4379 
parseTrackFragmentHeader(off64_t offset,off64_t size)4380 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
4381 
4382     if (size < 8) {
4383         return -EINVAL;
4384     }
4385 
4386     uint32_t flags;
4387     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4388         return ERROR_MALFORMED;
4389     }
4390 
4391     if (flags & 0xff000000) {
4392         return -EINVAL;
4393     }
4394 
4395     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
4396         return ERROR_MALFORMED;
4397     }
4398 
4399     if (mLastParsedTrackId != mTrackId) {
4400         // this is not the right track, skip it
4401         return OK;
4402     }
4403 
4404     mTrackFragmentHeaderInfo.mFlags = flags;
4405     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
4406     offset += 8;
4407     size -= 8;
4408 
4409     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
4410 
4411     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
4412         if (size < 8) {
4413             return -EINVAL;
4414         }
4415 
4416         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
4417             return ERROR_MALFORMED;
4418         }
4419         offset += 8;
4420         size -= 8;
4421     }
4422 
4423     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
4424         if (size < 4) {
4425             return -EINVAL;
4426         }
4427 
4428         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
4429             return ERROR_MALFORMED;
4430         }
4431         offset += 4;
4432         size -= 4;
4433     }
4434 
4435     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4436         if (size < 4) {
4437             return -EINVAL;
4438         }
4439 
4440         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
4441             return ERROR_MALFORMED;
4442         }
4443         offset += 4;
4444         size -= 4;
4445     }
4446 
4447     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4448         if (size < 4) {
4449             return -EINVAL;
4450         }
4451 
4452         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
4453             return ERROR_MALFORMED;
4454         }
4455         offset += 4;
4456         size -= 4;
4457     }
4458 
4459     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4460         if (size < 4) {
4461             return -EINVAL;
4462         }
4463 
4464         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
4465             return ERROR_MALFORMED;
4466         }
4467         offset += 4;
4468         size -= 4;
4469     }
4470 
4471     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
4472         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
4473     }
4474 
4475     mTrackFragmentHeaderInfo.mDataOffset = 0;
4476     return OK;
4477 }
4478 
parseTrackFragmentRun(off64_t offset,off64_t size)4479 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
4480 
4481     ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4482     if (size < 8) {
4483         return -EINVAL;
4484     }
4485 
4486     enum {
4487         kDataOffsetPresent                  = 0x01,
4488         kFirstSampleFlagsPresent            = 0x04,
4489         kSampleDurationPresent              = 0x100,
4490         kSampleSizePresent                  = 0x200,
4491         kSampleFlagsPresent                 = 0x400,
4492         kSampleCompositionTimeOffsetPresent = 0x800,
4493     };
4494 
4495     uint32_t flags;
4496     if (!mDataSource->getUInt32(offset, &flags)) {
4497         return ERROR_MALFORMED;
4498     }
4499     // |version| only affects SampleCompositionTimeOffset field.
4500     // If version == 0, SampleCompositionTimeOffset is uint32_t;
4501     // Otherwise, SampleCompositionTimeOffset is int32_t.
4502     // Sample.compositionOffset is defined as int32_t.
4503     uint8_t version = flags >> 24;
4504     flags &= 0xffffff;
4505     ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
4506 
4507     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4508         // These two shall not be used together.
4509         return -EINVAL;
4510     }
4511 
4512     uint32_t sampleCount;
4513     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4514         return ERROR_MALFORMED;
4515     }
4516     offset += 8;
4517     size -= 8;
4518 
4519     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4520 
4521     uint32_t firstSampleFlags = 0;
4522 
4523     if (flags & kDataOffsetPresent) {
4524         if (size < 4) {
4525             return -EINVAL;
4526         }
4527 
4528         int32_t dataOffsetDelta;
4529         if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4530             return ERROR_MALFORMED;
4531         }
4532 
4533         dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4534 
4535         offset += 4;
4536         size -= 4;
4537     }
4538 
4539     if (flags & kFirstSampleFlagsPresent) {
4540         if (size < 4) {
4541             return -EINVAL;
4542         }
4543 
4544         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4545             return ERROR_MALFORMED;
4546         }
4547         offset += 4;
4548         size -= 4;
4549     }
4550 
4551     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4552              sampleCtsOffset = 0;
4553 
4554     size_t bytesPerSample = 0;
4555     if (flags & kSampleDurationPresent) {
4556         bytesPerSample += 4;
4557     } else if (mTrackFragmentHeaderInfo.mFlags
4558             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4559         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4560     } else if (mTrex) {
4561         sampleDuration = mTrex->default_sample_duration;
4562     }
4563 
4564     if (flags & kSampleSizePresent) {
4565         bytesPerSample += 4;
4566     } else if (mTrackFragmentHeaderInfo.mFlags
4567             & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4568         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4569     } else {
4570         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4571     }
4572 
4573     if (flags & kSampleFlagsPresent) {
4574         bytesPerSample += 4;
4575     } else if (mTrackFragmentHeaderInfo.mFlags
4576             & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4577         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4578     } else {
4579         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4580     }
4581 
4582     if (flags & kSampleCompositionTimeOffsetPresent) {
4583         bytesPerSample += 4;
4584     } else {
4585         sampleCtsOffset = 0;
4586     }
4587 
4588     if (size < (off64_t)(sampleCount * bytesPerSample)) {
4589         return -EINVAL;
4590     }
4591 
4592     Sample tmp;
4593     for (uint32_t i = 0; i < sampleCount; ++i) {
4594         if (flags & kSampleDurationPresent) {
4595             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4596                 return ERROR_MALFORMED;
4597             }
4598             offset += 4;
4599         }
4600 
4601         if (flags & kSampleSizePresent) {
4602             if (!mDataSource->getUInt32(offset, &sampleSize)) {
4603                 return ERROR_MALFORMED;
4604             }
4605             offset += 4;
4606         }
4607 
4608         if (flags & kSampleFlagsPresent) {
4609             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4610                 return ERROR_MALFORMED;
4611             }
4612             offset += 4;
4613         }
4614 
4615         if (flags & kSampleCompositionTimeOffsetPresent) {
4616             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4617                 return ERROR_MALFORMED;
4618             }
4619             offset += 4;
4620         }
4621 
4622         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4623               " flags 0x%08x", i + 1,
4624                 dataOffset, sampleSize, sampleDuration,
4625                 (flags & kFirstSampleFlagsPresent) && i == 0
4626                     ? firstSampleFlags : sampleFlags);
4627         tmp.offset = dataOffset;
4628         tmp.size = sampleSize;
4629         tmp.duration = sampleDuration;
4630         tmp.compositionOffset = sampleCtsOffset;
4631         memset(tmp.iv, 0, sizeof(tmp.iv));
4632         mCurrentSamples.add(tmp);
4633 
4634         dataOffset += sampleSize;
4635     }
4636 
4637     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4638 
4639     return OK;
4640 }
4641 
getFormat(MetaDataBase & meta)4642 status_t MPEG4Source::getFormat(MetaDataBase &meta) {
4643     Mutex::Autolock autoLock(mLock);
4644     meta = mFormat;
4645     return OK;
4646 }
4647 
parseNALSize(const uint8_t * data) const4648 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4649     switch (mNALLengthSize) {
4650         case 1:
4651             return *data;
4652         case 2:
4653             return U16_AT(data);
4654         case 3:
4655             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4656         case 4:
4657             return U32_AT(data);
4658     }
4659 
4660     // This cannot happen, mNALLengthSize springs to life by adding 1 to
4661     // a 2-bit integer.
4662     CHECK(!"Should not be here.");
4663 
4664     return 0;
4665 }
4666 
read(MediaBufferBase ** out,const ReadOptions * options)4667 status_t MPEG4Source::read(
4668         MediaBufferBase **out, const ReadOptions *options) {
4669     Mutex::Autolock autoLock(mLock);
4670 
4671     CHECK(mStarted);
4672 
4673     if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
4674         *out = nullptr;
4675         return WOULD_BLOCK;
4676     }
4677 
4678     if (mFirstMoofOffset > 0) {
4679         return fragmentedRead(out, options);
4680     }
4681 
4682     *out = NULL;
4683 
4684     int64_t targetSampleTimeUs = -1;
4685 
4686     int64_t seekTimeUs;
4687     ReadOptions::SeekMode mode;
4688     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4689         if (mIsHeif) {
4690             CHECK(mSampleTable == NULL);
4691             CHECK(mItemTable != NULL);
4692             int32_t imageIndex;
4693             if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
4694                 return ERROR_MALFORMED;
4695             }
4696 
4697             status_t err;
4698             if (seekTimeUs >= 0) {
4699                 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
4700             } else {
4701                 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
4702             }
4703             if (err != OK) {
4704                 return err;
4705             }
4706         } else {
4707             uint32_t findFlags = 0;
4708             switch (mode) {
4709                 case ReadOptions::SEEK_PREVIOUS_SYNC:
4710                     findFlags = SampleTable::kFlagBefore;
4711                     break;
4712                 case ReadOptions::SEEK_NEXT_SYNC:
4713                     findFlags = SampleTable::kFlagAfter;
4714                     break;
4715                 case ReadOptions::SEEK_CLOSEST_SYNC:
4716                 case ReadOptions::SEEK_CLOSEST:
4717                     findFlags = SampleTable::kFlagClosest;
4718                     break;
4719                 case ReadOptions::SEEK_FRAME_INDEX:
4720                     findFlags = SampleTable::kFlagFrameIndex;
4721                     break;
4722                 default:
4723                     CHECK(!"Should not be here.");
4724                     break;
4725             }
4726 
4727             uint32_t sampleIndex;
4728             status_t err = mSampleTable->findSampleAtTime(
4729                     seekTimeUs, 1000000, mTimescale,
4730                     &sampleIndex, findFlags);
4731 
4732             if (mode == ReadOptions::SEEK_CLOSEST
4733                     || mode == ReadOptions::SEEK_FRAME_INDEX) {
4734                 // We found the closest sample already, now we want the sync
4735                 // sample preceding it (or the sample itself of course), even
4736                 // if the subsequent sync sample is closer.
4737                 findFlags = SampleTable::kFlagBefore;
4738             }
4739 
4740             uint32_t syncSampleIndex;
4741             if (err == OK) {
4742                 err = mSampleTable->findSyncSampleNear(
4743                         sampleIndex, &syncSampleIndex, findFlags);
4744             }
4745 
4746             uint32_t sampleTime;
4747             if (err == OK) {
4748                 err = mSampleTable->getMetaDataForSample(
4749                         sampleIndex, NULL, NULL, &sampleTime);
4750             }
4751 
4752             if (err != OK) {
4753                 if (err == ERROR_OUT_OF_RANGE) {
4754                     // An attempt to seek past the end of the stream would
4755                     // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4756                     // this all the way to the MediaPlayer would cause abnormal
4757                     // termination. Legacy behaviour appears to be to behave as if
4758                     // we had seeked to the end of stream, ending normally.
4759                     err = ERROR_END_OF_STREAM;
4760                 }
4761                 ALOGV("end of stream");
4762                 return err;
4763             }
4764 
4765             if (mode == ReadOptions::SEEK_CLOSEST
4766                 || mode == ReadOptions::SEEK_FRAME_INDEX) {
4767                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4768             }
4769 
4770 #if 0
4771             uint32_t syncSampleTime;
4772             CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4773                         syncSampleIndex, NULL, NULL, &syncSampleTime));
4774 
4775             ALOGI("seek to time %lld us => sample at time %lld us, "
4776                  "sync sample at time %lld us",
4777                  seekTimeUs,
4778                  sampleTime * 1000000ll / mTimescale,
4779                  syncSampleTime * 1000000ll / mTimescale);
4780 #endif
4781 
4782             mCurrentSampleIndex = syncSampleIndex;
4783         }
4784 
4785         if (mBuffer != NULL) {
4786             mBuffer->release();
4787             mBuffer = NULL;
4788         }
4789 
4790         // fall through
4791     }
4792 
4793     off64_t offset = 0;
4794     size_t size = 0;
4795     uint32_t cts, stts;
4796     bool isSyncSample;
4797     bool newBuffer = false;
4798     if (mBuffer == NULL) {
4799         newBuffer = true;
4800 
4801         status_t err;
4802         if (!mIsHeif) {
4803             err = mSampleTable->getMetaDataForSample(
4804                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4805         } else {
4806             err = mItemTable->getImageOffsetAndSize(
4807                     options && options->getSeekTo(&seekTimeUs, &mode) ?
4808                             &mCurrentSampleIndex : NULL, &offset, &size);
4809 
4810             cts = stts = 0;
4811             isSyncSample = 0;
4812             ALOGV("image offset %lld, size %zu", (long long)offset, size);
4813         }
4814 
4815         if (err != OK) {
4816             return err;
4817         }
4818 
4819         err = mGroup->acquire_buffer(&mBuffer);
4820 
4821         if (err != OK) {
4822             CHECK(mBuffer == NULL);
4823             return err;
4824         }
4825         if (size > mBuffer->size()) {
4826             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4827             mBuffer->release();
4828             mBuffer = NULL;
4829             return ERROR_BUFFER_TOO_SMALL;
4830         }
4831     }
4832 
4833     if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4834         if (newBuffer) {
4835             ssize_t num_bytes_read =
4836                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4837 
4838             if (num_bytes_read < (ssize_t)size) {
4839                 mBuffer->release();
4840                 mBuffer = NULL;
4841 
4842                 return ERROR_IO;
4843             }
4844 
4845             CHECK(mBuffer != NULL);
4846             mBuffer->set_range(0, size);
4847             mBuffer->meta_data().clear();
4848             mBuffer->meta_data().setInt64(
4849                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4850             mBuffer->meta_data().setInt64(
4851                     kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4852 
4853             if (targetSampleTimeUs >= 0) {
4854                 mBuffer->meta_data().setInt64(
4855                         kKeyTargetTime, targetSampleTimeUs);
4856             }
4857 
4858             if (isSyncSample) {
4859                 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4860             }
4861 
4862             ++mCurrentSampleIndex;
4863         }
4864 
4865         if (!mIsAVC && !mIsHEVC) {
4866             *out = mBuffer;
4867             mBuffer = NULL;
4868 
4869             return OK;
4870         }
4871 
4872         // Each NAL unit is split up into its constituent fragments and
4873         // each one of them returned in its own buffer.
4874 
4875         CHECK(mBuffer->range_length() >= mNALLengthSize);
4876 
4877         const uint8_t *src =
4878             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4879 
4880         size_t nal_size = parseNALSize(src);
4881         if (mNALLengthSize > SIZE_MAX - nal_size) {
4882             ALOGE("b/24441553, b/24445122");
4883         }
4884         if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4885             ALOGE("incomplete NAL unit.");
4886 
4887             mBuffer->release();
4888             mBuffer = NULL;
4889 
4890             return ERROR_MALFORMED;
4891         }
4892 
4893         MediaBufferBase *clone = mBuffer->clone();
4894         CHECK(clone != NULL);
4895         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4896 
4897         CHECK(mBuffer != NULL);
4898         mBuffer->set_range(
4899                 mBuffer->range_offset() + mNALLengthSize + nal_size,
4900                 mBuffer->range_length() - mNALLengthSize - nal_size);
4901 
4902         if (mBuffer->range_length() == 0) {
4903             mBuffer->release();
4904             mBuffer = NULL;
4905         }
4906 
4907         *out = clone;
4908 
4909         return OK;
4910     } else {
4911         // Whole NAL units are returned but each fragment is prefixed by
4912         // the start code (0x00 00 00 01).
4913         ssize_t num_bytes_read = 0;
4914         int32_t drm = 0;
4915         bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
4916         if (usesDRM) {
4917             num_bytes_read =
4918                 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4919         } else {
4920             num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4921         }
4922 
4923         if (num_bytes_read < (ssize_t)size) {
4924             mBuffer->release();
4925             mBuffer = NULL;
4926 
4927             return ERROR_IO;
4928         }
4929 
4930         if (usesDRM) {
4931             CHECK(mBuffer != NULL);
4932             mBuffer->set_range(0, size);
4933 
4934         } else {
4935             uint8_t *dstData = (uint8_t *)mBuffer->data();
4936             size_t srcOffset = 0;
4937             size_t dstOffset = 0;
4938 
4939             while (srcOffset < size) {
4940                 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4941                 size_t nalLength = 0;
4942                 if (!isMalFormed) {
4943                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4944                     srcOffset += mNALLengthSize;
4945                     isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4946                 }
4947 
4948                 if (isMalFormed) {
4949                     ALOGE("Video is malformed");
4950                     mBuffer->release();
4951                     mBuffer = NULL;
4952                     return ERROR_MALFORMED;
4953                 }
4954 
4955                 if (nalLength == 0) {
4956                     continue;
4957                 }
4958 
4959                 if (dstOffset > SIZE_MAX - 4 ||
4960                         dstOffset + 4 > SIZE_MAX - nalLength ||
4961                         dstOffset + 4 + nalLength > mBuffer->size()) {
4962                     ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4963                     android_errorWriteLog(0x534e4554, "27208621");
4964                     mBuffer->release();
4965                     mBuffer = NULL;
4966                     return ERROR_MALFORMED;
4967                 }
4968 
4969                 dstData[dstOffset++] = 0;
4970                 dstData[dstOffset++] = 0;
4971                 dstData[dstOffset++] = 0;
4972                 dstData[dstOffset++] = 1;
4973                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4974                 srcOffset += nalLength;
4975                 dstOffset += nalLength;
4976             }
4977             CHECK_EQ(srcOffset, size);
4978             CHECK(mBuffer != NULL);
4979             mBuffer->set_range(0, dstOffset);
4980         }
4981 
4982         mBuffer->meta_data().clear();
4983         mBuffer->meta_data().setInt64(
4984                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4985         mBuffer->meta_data().setInt64(
4986                 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4987 
4988         if (targetSampleTimeUs >= 0) {
4989             mBuffer->meta_data().setInt64(
4990                     kKeyTargetTime, targetSampleTimeUs);
4991         }
4992 
4993         if (mIsAVC) {
4994             uint32_t layerId = FindAVCLayerId(
4995                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
4996             mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
4997         }
4998 
4999         if (isSyncSample) {
5000             mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5001         }
5002 
5003         ++mCurrentSampleIndex;
5004 
5005         *out = mBuffer;
5006         mBuffer = NULL;
5007 
5008         return OK;
5009     }
5010 }
5011 
fragmentedRead(MediaBufferBase ** out,const ReadOptions * options)5012 status_t MPEG4Source::fragmentedRead(
5013         MediaBufferBase **out, const ReadOptions *options) {
5014 
5015     ALOGV("MPEG4Source::fragmentedRead");
5016 
5017     CHECK(mStarted);
5018 
5019     *out = NULL;
5020 
5021     int64_t targetSampleTimeUs = -1;
5022 
5023     int64_t seekTimeUs;
5024     ReadOptions::SeekMode mode;
5025     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5026 
5027         int numSidxEntries = mSegments.size();
5028         if (numSidxEntries != 0) {
5029             int64_t totalTime = 0;
5030             off64_t totalOffset = mFirstMoofOffset;
5031             for (int i = 0; i < numSidxEntries; i++) {
5032                 const SidxEntry *se = &mSegments[i];
5033                 if (totalTime + se->mDurationUs > seekTimeUs) {
5034                     // The requested time is somewhere in this segment
5035                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
5036                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
5037                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
5038                         // requested next sync, or closest sync and it was closer to the end of
5039                         // this segment
5040                         totalTime += se->mDurationUs;
5041                         totalOffset += se->mSize;
5042                     }
5043                     break;
5044                 }
5045                 totalTime += se->mDurationUs;
5046                 totalOffset += se->mSize;
5047             }
5048             mCurrentMoofOffset = totalOffset;
5049             mNextMoofOffset = -1;
5050             mCurrentSamples.clear();
5051             mCurrentSampleIndex = 0;
5052             status_t err = parseChunk(&totalOffset);
5053             if (err != OK) {
5054                 return err;
5055             }
5056             mCurrentTime = totalTime * mTimescale / 1000000ll;
5057         } else {
5058             // without sidx boxes, we can only seek to 0
5059             mCurrentMoofOffset = mFirstMoofOffset;
5060             mNextMoofOffset = -1;
5061             mCurrentSamples.clear();
5062             mCurrentSampleIndex = 0;
5063             off64_t tmp = mCurrentMoofOffset;
5064             status_t err = parseChunk(&tmp);
5065             if (err != OK) {
5066                 return err;
5067             }
5068             mCurrentTime = 0;
5069         }
5070 
5071         if (mBuffer != NULL) {
5072             mBuffer->release();
5073             mBuffer = NULL;
5074         }
5075 
5076         // fall through
5077     }
5078 
5079     off64_t offset = 0;
5080     size_t size = 0;
5081     uint32_t cts = 0;
5082     bool isSyncSample = false;
5083     bool newBuffer = false;
5084     if (mBuffer == NULL) {
5085         newBuffer = true;
5086 
5087         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5088             // move to next fragment if there is one
5089             if (mNextMoofOffset <= mCurrentMoofOffset) {
5090                 return ERROR_END_OF_STREAM;
5091             }
5092             off64_t nextMoof = mNextMoofOffset;
5093             mCurrentMoofOffset = nextMoof;
5094             mCurrentSamples.clear();
5095             mCurrentSampleIndex = 0;
5096             status_t err = parseChunk(&nextMoof);
5097             if (err != OK) {
5098                 return err;
5099             }
5100             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5101                 return ERROR_END_OF_STREAM;
5102             }
5103         }
5104 
5105         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5106         offset = smpl->offset;
5107         size = smpl->size;
5108         cts = mCurrentTime + smpl->compositionOffset;
5109         mCurrentTime += smpl->duration;
5110         isSyncSample = (mCurrentSampleIndex == 0); // XXX
5111 
5112         status_t err = mGroup->acquire_buffer(&mBuffer);
5113 
5114         if (err != OK) {
5115             CHECK(mBuffer == NULL);
5116             ALOGV("acquire_buffer returned %d", err);
5117             return err;
5118         }
5119         if (size > mBuffer->size()) {
5120             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5121             mBuffer->release();
5122             mBuffer = NULL;
5123             return ERROR_BUFFER_TOO_SMALL;
5124         }
5125     }
5126 
5127     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5128     MetaDataBase &bufmeta = mBuffer->meta_data();
5129     bufmeta.clear();
5130     if (smpl->encryptedsizes.size()) {
5131         // store clear/encrypted lengths in metadata
5132         bufmeta.setData(kKeyPlainSizes, 0,
5133                 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
5134         bufmeta.setData(kKeyEncryptedSizes, 0,
5135                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
5136         bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
5137         bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
5138         bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
5139         bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock);
5140         bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock);
5141 
5142         uint32_t type = 0;
5143         const void *iv = NULL;
5144         size_t ivlength = 0;
5145         if (!mFormat.findData(
5146                 kKeyCryptoIV, &type, &iv, &ivlength)) {
5147             iv = smpl->iv;
5148             ivlength = 16; // use 16 or the actual size?
5149         }
5150         bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength);
5151 
5152     }
5153 
5154     if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
5155         if (newBuffer) {
5156             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
5157                 mBuffer->release();
5158                 mBuffer = NULL;
5159 
5160                 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
5161                 return ERROR_MALFORMED;
5162             }
5163 
5164             ssize_t num_bytes_read =
5165                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5166 
5167             if (num_bytes_read < (ssize_t)size) {
5168                 mBuffer->release();
5169                 mBuffer = NULL;
5170 
5171                 ALOGE("i/o error");
5172                 return ERROR_IO;
5173             }
5174 
5175             CHECK(mBuffer != NULL);
5176             mBuffer->set_range(0, size);
5177             mBuffer->meta_data().setInt64(
5178                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5179             mBuffer->meta_data().setInt64(
5180                     kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5181 
5182             if (targetSampleTimeUs >= 0) {
5183                 mBuffer->meta_data().setInt64(
5184                         kKeyTargetTime, targetSampleTimeUs);
5185             }
5186 
5187             if (mIsAVC) {
5188                 uint32_t layerId = FindAVCLayerId(
5189                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5190                 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
5191             }
5192 
5193             if (isSyncSample) {
5194                 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5195             }
5196 
5197             ++mCurrentSampleIndex;
5198         }
5199 
5200         if (!mIsAVC && !mIsHEVC) {
5201             *out = mBuffer;
5202             mBuffer = NULL;
5203 
5204             return OK;
5205         }
5206 
5207         // Each NAL unit is split up into its constituent fragments and
5208         // each one of them returned in its own buffer.
5209 
5210         CHECK(mBuffer->range_length() >= mNALLengthSize);
5211 
5212         const uint8_t *src =
5213             (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
5214 
5215         size_t nal_size = parseNALSize(src);
5216         if (mNALLengthSize > SIZE_MAX - nal_size) {
5217             ALOGE("b/24441553, b/24445122");
5218         }
5219 
5220         if (mBuffer->range_length() - mNALLengthSize < nal_size) {
5221             ALOGE("incomplete NAL unit.");
5222 
5223             mBuffer->release();
5224             mBuffer = NULL;
5225 
5226             return ERROR_MALFORMED;
5227         }
5228 
5229         MediaBufferBase *clone = mBuffer->clone();
5230         CHECK(clone != NULL);
5231         clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
5232 
5233         CHECK(mBuffer != NULL);
5234         mBuffer->set_range(
5235                 mBuffer->range_offset() + mNALLengthSize + nal_size,
5236                 mBuffer->range_length() - mNALLengthSize - nal_size);
5237 
5238         if (mBuffer->range_length() == 0) {
5239             mBuffer->release();
5240             mBuffer = NULL;
5241         }
5242 
5243         *out = clone;
5244 
5245         return OK;
5246     } else {
5247         ALOGV("whole NAL");
5248         // Whole NAL units are returned but each fragment is prefixed by
5249         // the start code (0x00 00 00 01).
5250         ssize_t num_bytes_read = 0;
5251         int32_t drm = 0;
5252         bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
5253         void *data = NULL;
5254         bool isMalFormed = false;
5255         if (usesDRM) {
5256             if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
5257                 isMalFormed = true;
5258             } else {
5259                 data = mBuffer->data();
5260             }
5261         } else {
5262             int32_t max_size;
5263             if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
5264                     || !isInRange((size_t)0u, (size_t)max_size, size)) {
5265                 isMalFormed = true;
5266             } else {
5267                 data = mSrcBuffer;
5268             }
5269         }
5270 
5271         if (isMalFormed || data == NULL) {
5272             ALOGE("isMalFormed size %zu", size);
5273             if (mBuffer != NULL) {
5274                 mBuffer->release();
5275                 mBuffer = NULL;
5276             }
5277             return ERROR_MALFORMED;
5278         }
5279         num_bytes_read = mDataSource->readAt(offset, data, size);
5280 
5281         if (num_bytes_read < (ssize_t)size) {
5282             mBuffer->release();
5283             mBuffer = NULL;
5284 
5285             ALOGE("i/o error");
5286             return ERROR_IO;
5287         }
5288 
5289         if (usesDRM) {
5290             CHECK(mBuffer != NULL);
5291             mBuffer->set_range(0, size);
5292 
5293         } else {
5294             uint8_t *dstData = (uint8_t *)mBuffer->data();
5295             size_t srcOffset = 0;
5296             size_t dstOffset = 0;
5297 
5298             while (srcOffset < size) {
5299                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5300                 size_t nalLength = 0;
5301                 if (!isMalFormed) {
5302                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5303                     srcOffset += mNALLengthSize;
5304                     isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
5305                             || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
5306                             || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
5307                 }
5308 
5309                 if (isMalFormed) {
5310                     ALOGE("Video is malformed; nalLength %zu", nalLength);
5311                     mBuffer->release();
5312                     mBuffer = NULL;
5313                     return ERROR_MALFORMED;
5314                 }
5315 
5316                 if (nalLength == 0) {
5317                     continue;
5318                 }
5319 
5320                 if (dstOffset > SIZE_MAX - 4 ||
5321                         dstOffset + 4 > SIZE_MAX - nalLength ||
5322                         dstOffset + 4 + nalLength > mBuffer->size()) {
5323                     ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
5324                     android_errorWriteLog(0x534e4554, "26365349");
5325                     mBuffer->release();
5326                     mBuffer = NULL;
5327                     return ERROR_MALFORMED;
5328                 }
5329 
5330                 dstData[dstOffset++] = 0;
5331                 dstData[dstOffset++] = 0;
5332                 dstData[dstOffset++] = 0;
5333                 dstData[dstOffset++] = 1;
5334                 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5335                 srcOffset += nalLength;
5336                 dstOffset += nalLength;
5337             }
5338             CHECK_EQ(srcOffset, size);
5339             CHECK(mBuffer != NULL);
5340             mBuffer->set_range(0, dstOffset);
5341         }
5342 
5343         mBuffer->meta_data().setInt64(
5344                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5345         mBuffer->meta_data().setInt64(
5346                 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5347 
5348         if (targetSampleTimeUs >= 0) {
5349             mBuffer->meta_data().setInt64(
5350                     kKeyTargetTime, targetSampleTimeUs);
5351         }
5352 
5353         if (isSyncSample) {
5354             mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5355         }
5356 
5357         ++mCurrentSampleIndex;
5358 
5359         *out = mBuffer;
5360         mBuffer = NULL;
5361 
5362         return OK;
5363     }
5364 }
5365 
findTrackByMimePrefix(const char * mimePrefix)5366 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
5367         const char *mimePrefix) {
5368     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
5369         const char *mime;
5370         if (track->meta.findCString(kKeyMIMEType, &mime)
5371                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
5372             return track;
5373         }
5374     }
5375 
5376     return NULL;
5377 }
5378 
LegacySniffMPEG4(DataSourceBase * source,float * confidence)5379 static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
5380     uint8_t header[8];
5381 
5382     ssize_t n = source->readAt(4, header, sizeof(header));
5383     if (n < (ssize_t)sizeof(header)) {
5384         return false;
5385     }
5386 
5387     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
5388         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
5389         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
5390         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
5391         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
5392         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
5393         || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
5394         || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
5395         *confidence = 0.4;
5396 
5397         return true;
5398     }
5399 
5400     return false;
5401 }
5402 
isCompatibleBrand(uint32_t fourcc)5403 static bool isCompatibleBrand(uint32_t fourcc) {
5404     static const uint32_t kCompatibleBrands[] = {
5405         FOURCC('i', 's', 'o', 'm'),
5406         FOURCC('i', 's', 'o', '2'),
5407         FOURCC('a', 'v', 'c', '1'),
5408         FOURCC('h', 'v', 'c', '1'),
5409         FOURCC('h', 'e', 'v', '1'),
5410         FOURCC('3', 'g', 'p', '4'),
5411         FOURCC('m', 'p', '4', '1'),
5412         FOURCC('m', 'p', '4', '2'),
5413         FOURCC('d', 'a', 's', 'h'),
5414 
5415         // Won't promise that the following file types can be played.
5416         // Just give these file types a chance.
5417         FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
5418         FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
5419 
5420         FOURCC('3', 'g', '2', 'a'),  // 3GPP2
5421         FOURCC('3', 'g', '2', 'b'),
5422         FOURCC('m', 'i', 'f', '1'),  // HEIF image
5423         FOURCC('h', 'e', 'i', 'c'),  // HEIF image
5424         FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
5425         FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
5426     };
5427 
5428     for (size_t i = 0;
5429          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
5430          ++i) {
5431         if (kCompatibleBrands[i] == fourcc) {
5432             return true;
5433         }
5434     }
5435 
5436     return false;
5437 }
5438 
5439 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
5440 // compatible brand is present.
5441 // Also try to identify where this file's metadata ends
5442 // (end of the 'moov' atom) and report it to the caller as part of
5443 // the metadata.
BetterSniffMPEG4(DataSourceBase * source,float * confidence)5444 static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
5445     // We scan up to 128 bytes to identify this file as an MP4.
5446     static const off64_t kMaxScanOffset = 128ll;
5447 
5448     off64_t offset = 0ll;
5449     bool foundGoodFileType = false;
5450     off64_t moovAtomEndOffset = -1ll;
5451     bool done = false;
5452 
5453     while (!done && offset < kMaxScanOffset) {
5454         uint32_t hdr[2];
5455         if (source->readAt(offset, hdr, 8) < 8) {
5456             return false;
5457         }
5458 
5459         uint64_t chunkSize = ntohl(hdr[0]);
5460         uint32_t chunkType = ntohl(hdr[1]);
5461         off64_t chunkDataOffset = offset + 8;
5462 
5463         if (chunkSize == 1) {
5464             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
5465                 return false;
5466             }
5467 
5468             chunkSize = ntoh64(chunkSize);
5469             chunkDataOffset += 8;
5470 
5471             if (chunkSize < 16) {
5472                 // The smallest valid chunk is 16 bytes long in this case.
5473                 return false;
5474             }
5475 
5476         } else if (chunkSize < 8) {
5477             // The smallest valid chunk is 8 bytes long.
5478             return false;
5479         }
5480 
5481         // (data_offset - offset) is either 8 or 16
5482         off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
5483         if (chunkDataSize < 0) {
5484             ALOGE("b/23540914");
5485             return false;
5486         }
5487 
5488         char chunkstring[5];
5489         MakeFourCCString(chunkType, chunkstring);
5490         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
5491         switch (chunkType) {
5492             case FOURCC('f', 't', 'y', 'p'):
5493             {
5494                 if (chunkDataSize < 8) {
5495                     return false;
5496                 }
5497 
5498                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
5499                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
5500                     if (i == 1) {
5501                         // Skip this index, it refers to the minorVersion,
5502                         // not a brand.
5503                         continue;
5504                     }
5505 
5506                     uint32_t brand;
5507                     if (source->readAt(
5508                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
5509                         return false;
5510                     }
5511 
5512                     brand = ntohl(brand);
5513 
5514                     if (isCompatibleBrand(brand)) {
5515                         foundGoodFileType = true;
5516                         break;
5517                     }
5518                 }
5519 
5520                 if (!foundGoodFileType) {
5521                     return false;
5522                 }
5523 
5524                 break;
5525             }
5526 
5527             case FOURCC('m', 'o', 'o', 'v'):
5528             {
5529                 moovAtomEndOffset = offset + chunkSize;
5530 
5531                 done = true;
5532                 break;
5533             }
5534 
5535             default:
5536                 break;
5537         }
5538 
5539         offset += chunkSize;
5540     }
5541 
5542     if (!foundGoodFileType) {
5543         return false;
5544     }
5545 
5546     *confidence = 0.4f;
5547 
5548     return true;
5549 }
5550 
CreateExtractor(DataSourceBase * source,void *)5551 static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
5552     return new MPEG4Extractor(source);
5553 }
5554 
Sniff(DataSourceBase * source,float * confidence,void **,MediaExtractor::FreeMetaFunc *)5555 static MediaExtractor::CreatorFunc Sniff(
5556         DataSourceBase *source, float *confidence, void **,
5557         MediaExtractor::FreeMetaFunc *) {
5558     if (BetterSniffMPEG4(source, confidence)) {
5559         return CreateExtractor;
5560     }
5561 
5562     if (LegacySniffMPEG4(source, confidence)) {
5563         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5564         return CreateExtractor;
5565     }
5566 
5567     return NULL;
5568 }
5569 
5570 extern "C" {
5571 // This is the only symbol that needs to be exported
5572 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()5573 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
5574     return {
5575         MediaExtractor::EXTRACTORDEF_VERSION,
5576         UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
5577         1, // version
5578         "MP4 Extractor",
5579         Sniff
5580     };
5581 }
5582 
5583 } // extern "C"
5584 
5585 }  // namespace android
5586