1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include <log/log.h>
28 #include <utils/Log.h>
29 
30 #include "AC4Parser.h"
31 #include "MPEG4Extractor.h"
32 #include "SampleTable.h"
33 #include "ItemTable.h"
34 #include "include/ESDS.h"
35 
36 #include <media/DataSourceBase.h>
37 #include <media/ExtractorUtils.h>
38 #include <media/stagefright/foundation/ABitReader.h>
39 #include <media/stagefright/foundation/ABuffer.h>
40 #include <media/stagefright/foundation/ADebug.h>
41 #include <media/stagefright/foundation/AMessage.h>
42 #include <media/stagefright/foundation/AudioPresentationInfo.h>
43 #include <media/stagefright/foundation/AUtils.h>
44 #include <media/stagefright/foundation/ByteUtils.h>
45 #include <media/stagefright/foundation/ColorUtils.h>
46 #include <media/stagefright/foundation/avc_utils.h>
47 #include <media/stagefright/foundation/hexdump.h>
48 #include <media/stagefright/foundation/OpusHeader.h>
49 #include <media/stagefright/MediaBufferGroup.h>
50 #include <media/stagefright/MediaDefs.h>
51 #include <media/stagefright/MetaDataBase.h>
52 #include <utils/String8.h>
53 
54 #include <byteswap.h>
55 #include "include/ID3.h"
56 
57 #ifndef UINT32_MAX
58 #define UINT32_MAX       (4294967295U)
59 #endif
60 
61 #define ALAC_SPECIFIC_INFO_SIZE (36)
62 
63 namespace android {
64 
65 enum {
66     // max track header chunk to return
67     kMaxTrackHeaderSize = 32,
68 
69     // maximum size of an atom. Some atoms can be bigger according to the spec,
70     // but we only allow up to this size.
71     kMaxAtomSize = 64 * 1024 * 1024,
72 };
73 
74 class MPEG4Source : public MediaTrackHelper {
75 static const size_t  kMaxPcmFrameSize = 8192;
76 public:
77     // Caller retains ownership of both "dataSource" and "sampleTable".
78     MPEG4Source(AMediaFormat *format,
79                 DataSourceHelper *dataSource,
80                 int32_t timeScale,
81                 const sp<SampleTable> &sampleTable,
82                 Vector<SidxEntry> &sidx,
83                 const Trex *trex,
84                 off64_t firstMoofOffset,
85                 const sp<ItemTable> &itemTable,
86                 uint64_t elstShiftStartTicks);
87     virtual status_t init();
88 
89     virtual media_status_t start();
90     virtual media_status_t stop();
91 
92     virtual media_status_t getFormat(AMediaFormat *);
93 
94     virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()95     bool supportsNonBlockingRead() override { return true; }
96     virtual media_status_t fragmentedRead(
97             MediaBufferHelper **buffer, const ReadOptions *options = NULL);
98 
99     virtual ~MPEG4Source();
100 
101 private:
102     Mutex mLock;
103 
104     AMediaFormat *mFormat;
105     DataSourceHelper *mDataSource;
106     int32_t mTimescale;
107     sp<SampleTable> mSampleTable;
108     uint32_t mCurrentSampleIndex;
109     uint32_t mCurrentFragmentIndex;
110     Vector<SidxEntry> &mSegments;
111     const Trex *mTrex;
112     off64_t mFirstMoofOffset;
113     off64_t mCurrentMoofOffset;
114     off64_t mNextMoofOffset;
115     uint32_t mCurrentTime; // in media timescale ticks
116     int32_t mLastParsedTrackId;
117     int32_t mTrackId;
118 
119     int32_t mCryptoMode;    // passed in from extractor
120     int32_t mDefaultIVSize; // passed in from extractor
121     uint8_t mCryptoKey[16]; // passed in from extractor
122     int32_t mDefaultEncryptedByteBlock;
123     int32_t mDefaultSkipByteBlock;
124     uint32_t mCurrentAuxInfoType;
125     uint32_t mCurrentAuxInfoTypeParameter;
126     int32_t mCurrentDefaultSampleInfoSize;
127     uint32_t mCurrentSampleInfoCount;
128     uint32_t mCurrentSampleInfoAllocSize;
129     uint8_t* mCurrentSampleInfoSizes;
130     uint32_t mCurrentSampleInfoOffsetCount;
131     uint32_t mCurrentSampleInfoOffsetsAllocSize;
132     uint64_t* mCurrentSampleInfoOffsets;
133 
134     bool mIsAVC;
135     bool mIsHEVC;
136     bool mIsAC4;
137     bool mIsPcm;
138     size_t mNALLengthSize;
139 
140     bool mStarted;
141 
142     MediaBufferHelper *mBuffer;
143 
144     uint8_t *mSrcBuffer;
145 
146     bool mIsHeif;
147     bool mIsAudio;
148     sp<ItemTable> mItemTable;
149 
150     // Start offset from composition time to presentation time.
151     // Support shift only for video tracks through mElstShiftStartTicks for now.
152     uint64_t mElstShiftStartTicks;
153 
154     size_t parseNALSize(const uint8_t *data) const;
155     status_t parseChunk(off64_t *offset);
156     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
157     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
158     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
159     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
160     status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
161     status_t parseSampleEncryption(off64_t offset);
162     // returns -1 for invalid layer ID
163     int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
164 
165     struct TrackFragmentHeaderInfo {
166         enum Flags {
167             kBaseDataOffsetPresent         = 0x01,
168             kSampleDescriptionIndexPresent = 0x02,
169             kDefaultSampleDurationPresent  = 0x08,
170             kDefaultSampleSizePresent      = 0x10,
171             kDefaultSampleFlagsPresent     = 0x20,
172             kDurationIsEmpty               = 0x10000,
173         };
174 
175         uint32_t mTrackID;
176         uint32_t mFlags;
177         uint64_t mBaseDataOffset;
178         uint32_t mSampleDescriptionIndex;
179         uint32_t mDefaultSampleDuration;
180         uint32_t mDefaultSampleSize;
181         uint32_t mDefaultSampleFlags;
182 
183         uint64_t mDataOffset;
184     };
185     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
186 
187     struct Sample {
188         off64_t offset;
189         size_t size;
190         uint32_t duration;
191         int32_t compositionOffset;
192         uint8_t iv[16];
193         Vector<size_t> clearsizes;
194         Vector<size_t> encryptedsizes;
195     };
196     Vector<Sample> mCurrentSamples;
197 
198     MPEG4Source(const MPEG4Source &);
199     MPEG4Source &operator=(const MPEG4Source &);
200 };
201 
202 // This custom data source wraps an existing one and satisfies requests
203 // falling entirely within a cached range from the cache while forwarding
204 // all remaining requests to the wrapped datasource.
205 // This is used to cache the full sampletable metadata for a single track,
206 // possibly wrapping multiple times to cover all tracks, i.e.
207 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
208 
209 class CachedRangedDataSource : public DataSourceHelper {
210 public:
211     explicit CachedRangedDataSource(DataSourceHelper *source);
212     virtual ~CachedRangedDataSource();
213 
214     ssize_t readAt(off64_t offset, void *data, size_t size) override;
215     status_t getSize(off64_t *size) override;
216     uint32_t flags() override;
217 
218     status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
219 
220 
221 private:
222     Mutex mLock;
223 
224     DataSourceHelper *mSource;
225     bool mOwnsDataSource;
226     off64_t mCachedOffset;
227     size_t mCachedSize;
228     uint8_t *mCache;
229 
230     void clearCache();
231 
232     CachedRangedDataSource(const CachedRangedDataSource &);
233     CachedRangedDataSource &operator=(const CachedRangedDataSource &);
234 };
235 
CachedRangedDataSource(DataSourceHelper * source)236 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
237     : DataSourceHelper(source),
238       mSource(source),
239       mOwnsDataSource(false),
240       mCachedOffset(0),
241       mCachedSize(0),
242       mCache(NULL) {
243 }
244 
~CachedRangedDataSource()245 CachedRangedDataSource::~CachedRangedDataSource() {
246     clearCache();
247     if (mOwnsDataSource) {
248         delete mSource;
249     }
250 }
251 
clearCache()252 void CachedRangedDataSource::clearCache() {
253     if (mCache) {
254         free(mCache);
255         mCache = NULL;
256     }
257 
258     mCachedOffset = 0;
259     mCachedSize = 0;
260 }
261 
readAt(off64_t offset,void * data,size_t size)262 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
263     Mutex::Autolock autoLock(mLock);
264 
265     if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
266         memcpy(data, &mCache[offset - mCachedOffset], size);
267         return size;
268     }
269 
270     return mSource->readAt(offset, data, size);
271 }
272 
getSize(off64_t * size)273 status_t CachedRangedDataSource::getSize(off64_t *size) {
274     return mSource->getSize(size);
275 }
276 
flags()277 uint32_t CachedRangedDataSource::flags() {
278     return mSource->flags();
279 }
280 
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)281 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
282         size_t size,
283         bool assumeSourceOwnershipOnSuccess) {
284     Mutex::Autolock autoLock(mLock);
285 
286     clearCache();
287 
288     mCache = (uint8_t *)malloc(size);
289 
290     if (mCache == NULL) {
291         return -ENOMEM;
292     }
293 
294     mCachedOffset = offset;
295     mCachedSize = size;
296 
297     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
298 
299     if (err < (ssize_t)size) {
300         clearCache();
301 
302         return ERROR_IO;
303     }
304     mOwnsDataSource = assumeSourceOwnershipOnSuccess;
305     return OK;
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 
310 static const bool kUseHexDump = false;
311 
FourCC2MIME(uint32_t fourcc)312 static const char *FourCC2MIME(uint32_t fourcc) {
313     switch (fourcc) {
314         case FOURCC("mp4a"):
315             return MEDIA_MIMETYPE_AUDIO_AAC;
316 
317         case FOURCC("samr"):
318             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
319 
320         case FOURCC("sawb"):
321             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
322 
323         case FOURCC("ec-3"):
324             return MEDIA_MIMETYPE_AUDIO_EAC3;
325 
326         case FOURCC("mp4v"):
327             return MEDIA_MIMETYPE_VIDEO_MPEG4;
328 
329         case FOURCC("s263"):
330         case FOURCC("h263"):
331         case FOURCC("H263"):
332             return MEDIA_MIMETYPE_VIDEO_H263;
333 
334         case FOURCC("avc1"):
335             return MEDIA_MIMETYPE_VIDEO_AVC;
336 
337         case FOURCC("hvc1"):
338         case FOURCC("hev1"):
339             return MEDIA_MIMETYPE_VIDEO_HEVC;
340         case FOURCC("ac-4"):
341             return MEDIA_MIMETYPE_AUDIO_AC4;
342         case FOURCC("Opus"):
343             return MEDIA_MIMETYPE_AUDIO_OPUS;
344 
345         case FOURCC("twos"):
346         case FOURCC("sowt"):
347             return MEDIA_MIMETYPE_AUDIO_RAW;
348         case FOURCC("alac"):
349             return MEDIA_MIMETYPE_AUDIO_ALAC;
350         case FOURCC("fLaC"):
351             return MEDIA_MIMETYPE_AUDIO_FLAC;
352         case FOURCC("av01"):
353             return MEDIA_MIMETYPE_VIDEO_AV1;
354         case FOURCC(".mp3"):
355         case 0x6D730055: // "ms U" mp3 audio
356             return MEDIA_MIMETYPE_AUDIO_MPEG;
357         default:
358             ALOGW("Unknown fourcc: %c%c%c%c",
359                    (fourcc >> 24) & 0xff,
360                    (fourcc >> 16) & 0xff,
361                    (fourcc >> 8) & 0xff,
362                    fourcc & 0xff
363                    );
364             return "application/octet-stream";
365     }
366 }
367 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)368 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
369     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
370         // AMR NB audio is always mono, 8kHz
371         *channels = 1;
372         *rate = 8000;
373         return true;
374     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
375         // AMR WB audio is always mono, 16kHz
376         *channels = 1;
377         *rate = 16000;
378         return true;
379     }
380     return false;
381 }
382 
MPEG4Extractor(DataSourceHelper * source,const char * mime)383 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
384     : mMoofOffset(0),
385       mMoofFound(false),
386       mMdatFound(false),
387       mDataSource(source),
388       mInitCheck(NO_INIT),
389       mHeaderTimescale(0),
390       mIsQT(false),
391       mIsHeif(false),
392       mHasMoovBox(false),
393       mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
394       mFirstTrack(NULL),
395       mLastTrack(NULL) {
396     ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
397     mFileMetaData = AMediaFormat_new();
398 }
399 
~MPEG4Extractor()400 MPEG4Extractor::~MPEG4Extractor() {
401     Track *track = mFirstTrack;
402     while (track) {
403         Track *next = track->next;
404 
405         delete track;
406         track = next;
407     }
408     mFirstTrack = mLastTrack = NULL;
409 
410     for (size_t i = 0; i < mPssh.size(); i++) {
411         delete [] mPssh[i].data;
412     }
413     mPssh.clear();
414 
415     delete mDataSource;
416     AMediaFormat_delete(mFileMetaData);
417 }
418 
flags() const419 uint32_t MPEG4Extractor::flags() const {
420     return CAN_PAUSE |
421             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
422                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
423 }
424 
getMetaData(AMediaFormat * meta)425 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
426     status_t err;
427     if ((err = readMetaData()) != OK) {
428         return AMEDIA_ERROR_UNKNOWN;
429     }
430     AMediaFormat_copy(meta, mFileMetaData);
431     return AMEDIA_OK;
432 }
433 
countTracks()434 size_t MPEG4Extractor::countTracks() {
435     status_t err;
436     if ((err = readMetaData()) != OK) {
437         ALOGV("MPEG4Extractor::countTracks: no tracks");
438         return 0;
439     }
440 
441     size_t n = 0;
442     Track *track = mFirstTrack;
443     while (track) {
444         ++n;
445         track = track->next;
446     }
447 
448     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
449     return n;
450 }
451 
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)452 media_status_t MPEG4Extractor::getTrackMetaData(
453         AMediaFormat *meta,
454         size_t index, uint32_t flags) {
455     status_t err;
456     if ((err = readMetaData()) != OK) {
457         return AMEDIA_ERROR_UNKNOWN;
458     }
459 
460     Track *track = mFirstTrack;
461     while (index > 0) {
462         if (track == NULL) {
463             return AMEDIA_ERROR_UNKNOWN;
464         }
465 
466         track = track->next;
467         --index;
468     }
469 
470     if (track == NULL) {
471         return AMEDIA_ERROR_UNKNOWN;
472     }
473 
474     [=] {
475         int64_t duration;
476         int32_t samplerate;
477         // Only for audio track.
478         if (track->has_elst && mHeaderTimescale != 0 &&
479                 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
480                 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
481 
482             // Elst has to be processed only the first time this function is called.
483             track->has_elst = false;
484 
485             if (track->elst_segment_duration > INT64_MAX) {
486                 return;
487             }
488             int64_t segment_duration = track->elst_segment_duration;
489             int64_t media_time = track->elst_media_time;
490             int64_t halfscale = track->timescale / 2;
491 
492             ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
493                   ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
494                   segment_duration, media_time,
495                   halfscale, mHeaderTimescale, track->timescale);
496 
497             if ((uint32_t)samplerate != track->timescale){
498                 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
499                     samplerate);
500             }
501             // Both delay and paddingsamples have to be set inorder for either to be
502             // effective in the lower layers.
503             int64_t delay = 0;
504             if (media_time > 0) { // Gapless playback
505                 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
506                 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
507                         __builtin_add_overflow(delay, halfscale, &delay) ||
508                         (delay /= track->timescale, false) ||
509                         delay > INT32_MAX ||
510                         delay < INT32_MIN) {
511                     ALOGW("ignoring edit list with bogus values");
512                     return;
513                 }
514             }
515             ALOGV("delay = %" PRId64, delay);
516             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
517 
518             int64_t paddingsamples = 0;
519             if (segment_duration > 0) {
520                 int64_t scaled_duration;
521                 // scaled_duration = duration * mHeaderTimescale;
522                 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
523                     return;
524                 }
525                 ALOGV("scaled_duration = %" PRId64, scaled_duration);
526 
527                 int64_t segment_end;
528                 int64_t padding;
529                 int64_t segment_duration_e6;
530                 int64_t media_time_scaled_e6;
531                 int64_t media_time_scaled;
532                 // padding = scaled_duration - ((segment_duration * 1000000) +
533                 //                  ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
534                 // segment_duration is based on timescale in movie header box(mdhd)
535                 // media_time is based on timescale track header/media timescale
536                 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
537                     __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
538                     __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
539                     return;
540                 }
541                 media_time_scaled_e6 /= track->timescale;
542                 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
543                     || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
544                     return;
545                 }
546                 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
547                 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
548                 // might be slightly shorter than the segment duration, which would make the
549                 // padding negative. Clamp to zero.
550                 if (padding > 0) {
551                     int64_t halfscale_mht = mHeaderTimescale / 2;
552                     int64_t halfscale_e6;
553                     int64_t timescale_e6;
554                     // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
555                     //                / (mHeaderTimescale * 1000000);
556                     if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
557                             __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
558                             __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
559                             __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
560                             (paddingsamples /= timescale_e6, false) ||
561                             paddingsamples > INT32_MAX) {
562                         return;
563                     }
564                 }
565             }
566             ALOGV("paddingsamples = %" PRId64, paddingsamples);
567             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
568         }
569     }();
570 
571     if ((flags & kIncludeExtensiveMetaData)
572             && !track->includes_expensive_metadata) {
573         track->includes_expensive_metadata = true;
574 
575         const char *mime;
576         CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
577         if (!strncasecmp("video/", mime, 6)) {
578             // MPEG2 tracks do not provide CSD, so read the stream header
579             if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
580                 off64_t offset;
581                 size_t size;
582                 if (track->sampleTable->getMetaDataForSample(
583                             0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
584                     if (size > kMaxTrackHeaderSize) {
585                         size = kMaxTrackHeaderSize;
586                     }
587                     uint8_t header[kMaxTrackHeaderSize];
588                     if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
589                         AMediaFormat_setBuffer(track->meta,
590                                 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
591                     }
592                 }
593             }
594 
595             if (mMoofOffset > 0) {
596                 int64_t duration;
597                 if (AMediaFormat_getInt64(track->meta,
598                         AMEDIAFORMAT_KEY_DURATION, &duration)) {
599                     // nothing fancy, just pick a frame near 1/4th of the duration
600                     AMediaFormat_setInt64(track->meta,
601                             AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
602                 }
603             } else {
604                 uint32_t sampleIndex;
605                 uint64_t sampleTime;
606                 if (track->timescale != 0 &&
607                         track->sampleTable->findThumbnailSample(&sampleIndex) == OK
608                         && track->sampleTable->getMetaDataForSample(
609                             sampleIndex, NULL /* offset */, NULL /* size */,
610                             &sampleTime) == OK) {
611                         AMediaFormat_setInt64(track->meta,
612                                 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
613                                 ((int64_t)sampleTime * 1000000) / track->timescale);
614                 }
615             }
616         }
617     }
618 
619     AMediaFormat_copy(meta, track->meta);
620     return AMEDIA_OK;
621 }
622 
readMetaData()623 status_t MPEG4Extractor::readMetaData() {
624     if (mInitCheck != NO_INIT) {
625         return mInitCheck;
626     }
627 
628     off64_t offset = 0;
629     status_t err;
630     bool sawMoovOrSidx = false;
631 
632     while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
633              (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
634                      (mItemTable != NULL) && mItemTable->isValid()))) {
635         off64_t orig_offset = offset;
636         err = parseChunk(&offset, 0);
637 
638         if (err != OK && err != UNKNOWN_ERROR) {
639             break;
640         } else if (offset <= orig_offset) {
641             // only continue parsing if the offset was advanced,
642             // otherwise we might end up in an infinite loop
643             ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
644             err = ERROR_MALFORMED;
645             break;
646         } else if (err == UNKNOWN_ERROR) {
647             sawMoovOrSidx = true;
648         }
649     }
650 
651     if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
652         off64_t exifOffset;
653         size_t exifSize;
654         if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
655             AMediaFormat_setInt64(mFileMetaData,
656                     AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
657             AMediaFormat_setInt64(mFileMetaData,
658                     AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
659         }
660         for (uint32_t imageIndex = 0;
661                 imageIndex < mItemTable->countImages(); imageIndex++) {
662             AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
663             if (meta == NULL) {
664                 ALOGE("heif image %u has no meta!", imageIndex);
665                 continue;
666             }
667             // Some heif files advertise image sequence brands (eg. 'hevc') in
668             // ftyp box, but don't have any valid tracks in them. Instead of
669             // reporting the entire file as malformed, we override the error
670             // to allow still images to be extracted.
671             if (err != OK) {
672                 ALOGW("Extracting still images only");
673                 err = OK;
674             }
675             mInitCheck = OK;
676 
677             ALOGV("adding HEIF image track %u", imageIndex);
678             Track *track = new Track;
679             if (mLastTrack != NULL) {
680                 mLastTrack->next = track;
681             } else {
682                 mFirstTrack = track;
683             }
684             mLastTrack = track;
685 
686             track->meta = meta;
687             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
688             track->timescale = 1000000;
689         }
690     }
691 
692     if (mInitCheck == OK) {
693         if (findTrackByMimePrefix("video/") != NULL) {
694             AMediaFormat_setString(mFileMetaData,
695                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
696         } else if (findTrackByMimePrefix("audio/") != NULL) {
697             AMediaFormat_setString(mFileMetaData,
698                     AMEDIAFORMAT_KEY_MIME, "audio/mp4");
699         } else if (findTrackByMimePrefix(
700                 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
701             AMediaFormat_setString(mFileMetaData,
702                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
703         } else {
704             AMediaFormat_setString(mFileMetaData,
705                     AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
706         }
707     } else {
708         mInitCheck = err;
709     }
710 
711     CHECK_NE(err, (status_t)NO_INIT);
712 
713     // copy pssh data into file metadata
714     uint64_t psshsize = 0;
715     for (size_t i = 0; i < mPssh.size(); i++) {
716         psshsize += 20 + mPssh[i].datalen;
717     }
718     if (psshsize > 0 && psshsize <= UINT32_MAX) {
719         char *buf = (char*)malloc(psshsize);
720         if (!buf) {
721             ALOGE("b/28471206");
722             return NO_MEMORY;
723         }
724         char *ptr = buf;
725         for (size_t i = 0; i < mPssh.size(); i++) {
726             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
727             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
728             ptr += (20 + mPssh[i].datalen);
729         }
730         AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
731         free(buf);
732     }
733 
734     return mInitCheck;
735 }
736 
737 struct PathAdder {
PathAdderandroid::PathAdder738     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
739         : mPath(path) {
740         mPath->push(chunkType);
741     }
742 
~PathAdderandroid::PathAdder743     ~PathAdder() {
744         mPath->pop();
745     }
746 
747 private:
748     Vector<uint32_t> *mPath;
749 
750     PathAdder(const PathAdder &);
751     PathAdder &operator=(const PathAdder &);
752 };
753 
underMetaDataPath(const Vector<uint32_t> & path)754 static bool underMetaDataPath(const Vector<uint32_t> &path) {
755     return path.size() >= 5
756         && path[0] == FOURCC("moov")
757         && path[1] == FOURCC("udta")
758         && path[2] == FOURCC("meta")
759         && path[3] == FOURCC("ilst");
760 }
761 
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)762 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
763     return path.size() >= 2
764             && path[0] == FOURCC("moov")
765             && path[1] == FOURCC("meta")
766             && (depth == 2
767             || (depth == 3
768                     && (path[2] == FOURCC("hdlr")
769                     ||  path[2] == FOURCC("ilst")
770                     ||  path[2] == FOURCC("keys"))));
771 }
772 
773 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)774 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
775     // delta between mpeg4 time and unix epoch time
776     static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
777     if (time_1904 < INT64_MIN + delta) {
778         return false;
779     }
780     time_t time_1970 = time_1904 - delta;
781 
782     char tmp[32];
783     struct tm* tm = gmtime(&time_1970);
784     if (tm != NULL &&
785             strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
786         s->setTo(tmp);
787         return true;
788     }
789     return false;
790 }
791 
parseChunk(off64_t * offset,int depth)792 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
793     ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
794 
795     if (*offset < 0) {
796         ALOGE("b/23540914");
797         return ERROR_MALFORMED;
798     }
799     if (depth > 100) {
800         ALOGE("b/27456299");
801         return ERROR_MALFORMED;
802     }
803     uint32_t hdr[2];
804     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
805         return ERROR_IO;
806     }
807     uint64_t chunk_size = ntohl(hdr[0]);
808     int32_t chunk_type = ntohl(hdr[1]);
809     off64_t data_offset = *offset + 8;
810 
811     if (chunk_size == 1) {
812         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
813             return ERROR_IO;
814         }
815         chunk_size = ntoh64(chunk_size);
816         data_offset += 8;
817 
818         if (chunk_size < 16) {
819             // The smallest valid chunk is 16 bytes long in this case.
820             return ERROR_MALFORMED;
821         }
822     } else if (chunk_size == 0) {
823         if (depth == 0) {
824             // atom extends to end of file
825             off64_t sourceSize;
826             if (mDataSource->getSize(&sourceSize) == OK) {
827                 chunk_size = (sourceSize - *offset);
828             } else {
829                 // XXX could we just pick a "sufficiently large" value here?
830                 ALOGE("atom size is 0, and data source has no size");
831                 return ERROR_MALFORMED;
832             }
833         } else {
834             // not allowed for non-toplevel atoms, skip it
835             *offset += 4;
836             return OK;
837         }
838     } else if (chunk_size < 8) {
839         // The smallest valid chunk is 8 bytes long.
840         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
841         return ERROR_MALFORMED;
842     }
843 
844     char chunk[5];
845     MakeFourCCString(chunk_type, chunk);
846     ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
847 
848     if (kUseHexDump) {
849         static const char kWhitespace[] = "                                        ";
850         const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
851         printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
852 
853         char buffer[256];
854         size_t n = chunk_size;
855         if (n > sizeof(buffer)) {
856             n = sizeof(buffer);
857         }
858         if (mDataSource->readAt(*offset, buffer, n)
859                 < (ssize_t)n) {
860             return ERROR_IO;
861         }
862 
863         hexdump(buffer, n);
864     }
865 
866     PathAdder autoAdder(&mPath, chunk_type);
867 
868     // (data_offset - *offset) is either 8 or 16
869     off64_t chunk_data_size = chunk_size - (data_offset - *offset);
870     if (chunk_data_size < 0) {
871         ALOGE("b/23540914");
872         return ERROR_MALFORMED;
873     }
874     if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
875         char errMsg[100];
876         sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
877         ALOGE("%s (b/28615448)", errMsg);
878         android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
879         return ERROR_MALFORMED;
880     }
881 
882     if (chunk_type != FOURCC("cprt")
883             && chunk_type != FOURCC("covr")
884             && mPath.size() == 5 && underMetaDataPath(mPath)) {
885         off64_t stop_offset = *offset + chunk_size;
886         *offset = data_offset;
887         while (*offset < stop_offset) {
888             status_t err = parseChunk(offset, depth + 1);
889             if (err != OK) {
890                 return err;
891             }
892         }
893 
894         if (*offset != stop_offset) {
895             return ERROR_MALFORMED;
896         }
897 
898         return OK;
899     }
900 
901     switch(chunk_type) {
902         case FOURCC("moov"):
903         case FOURCC("trak"):
904         case FOURCC("mdia"):
905         case FOURCC("minf"):
906         case FOURCC("dinf"):
907         case FOURCC("stbl"):
908         case FOURCC("mvex"):
909         case FOURCC("moof"):
910         case FOURCC("traf"):
911         case FOURCC("mfra"):
912         case FOURCC("udta"):
913         case FOURCC("ilst"):
914         case FOURCC("sinf"):
915         case FOURCC("schi"):
916         case FOURCC("edts"):
917         case FOURCC("wave"):
918         {
919             if (chunk_type == FOURCC("moov") && depth != 0) {
920                 ALOGE("moov: depth %d", depth);
921                 return ERROR_MALFORMED;
922             }
923 
924             if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
925                 ALOGE("duplicate moov");
926                 return ERROR_MALFORMED;
927             }
928 
929             if (chunk_type == FOURCC("moof") && !mMoofFound) {
930                 // store the offset of the first segment
931                 mMoofFound = true;
932                 mMoofOffset = *offset;
933             }
934 
935             if (chunk_type == FOURCC("stbl")) {
936                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
937 
938                 if (mDataSource->flags()
939                         & (DataSourceBase::kWantsPrefetching
940                             | DataSourceBase::kIsCachingDataSource)) {
941                     CachedRangedDataSource *cachedSource =
942                         new CachedRangedDataSource(mDataSource);
943 
944                     if (cachedSource->setCachedRange(
945                             *offset, chunk_size,
946                             true /* assume ownership on success */) == OK) {
947                         mDataSource = cachedSource;
948                     } else {
949                         delete cachedSource;
950                     }
951                 }
952 
953                 if (mLastTrack == NULL) {
954                     return ERROR_MALFORMED;
955                 }
956 
957                 mLastTrack->sampleTable = new SampleTable(mDataSource);
958             }
959 
960             bool isTrack = false;
961             if (chunk_type == FOURCC("trak")) {
962                 if (depth != 1) {
963                     ALOGE("trak: depth %d", depth);
964                     return ERROR_MALFORMED;
965                 }
966                 isTrack = true;
967 
968                 ALOGV("adding new track");
969                 Track *track = new Track;
970                 if (mLastTrack) {
971                     mLastTrack->next = track;
972                 } else {
973                     mFirstTrack = track;
974                 }
975                 mLastTrack = track;
976 
977                 track->meta = AMediaFormat_new();
978                 AMediaFormat_setString(track->meta,
979                         AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
980             }
981 
982             off64_t stop_offset = *offset + chunk_size;
983             *offset = data_offset;
984             while (*offset < stop_offset) {
985 
986                 // pass udata terminate
987                 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
988                     // handle the case that udta terminates with terminate code x00000000
989                     // note that 0 terminator is optional and we just handle this case.
990                     uint32_t terminate_code = 1;
991                     mDataSource->readAt(*offset, &terminate_code, 4);
992                     if (0 == terminate_code) {
993                         *offset += 4;
994                         ALOGD("Terminal code for udta");
995                         continue;
996                     } else {
997                         ALOGW("invalid udta Terminal code");
998                     }
999                 }
1000 
1001                 status_t err = parseChunk(offset, depth + 1);
1002                 if (err != OK) {
1003                     if (isTrack) {
1004                         mLastTrack->skipTrack = true;
1005                         break;
1006                     }
1007                     return err;
1008                 }
1009             }
1010 
1011             if (*offset != stop_offset) {
1012                 return ERROR_MALFORMED;
1013             }
1014 
1015             if (isTrack) {
1016                 int32_t trackId;
1017                 // There must be exactly one track header per track.
1018 
1019                 if (!AMediaFormat_getInt32(mLastTrack->meta,
1020                         AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1021                     mLastTrack->skipTrack = true;
1022                 }
1023 
1024                 status_t err = verifyTrack(mLastTrack);
1025                 if (err != OK) {
1026                     mLastTrack->skipTrack = true;
1027                 }
1028 
1029 
1030                 if (mLastTrack->skipTrack) {
1031                     ALOGV("skipping this track...");
1032                     Track *cur = mFirstTrack;
1033 
1034                     if (cur == mLastTrack) {
1035                         delete cur;
1036                         mFirstTrack = mLastTrack = NULL;
1037                     } else {
1038                         while (cur && cur->next != mLastTrack) {
1039                             cur = cur->next;
1040                         }
1041                         if (cur) {
1042                             cur->next = NULL;
1043                         }
1044                         delete mLastTrack;
1045                         mLastTrack = cur;
1046                     }
1047 
1048                     return OK;
1049                 }
1050 
1051                 // place things we built elsewhere into their final locations
1052 
1053                 // put aggregated tx3g data into the metadata
1054                 if (mLastTrack->mTx3gFilled > 0) {
1055                     ALOGV("Putting %zu bytes of tx3g data into meta data",
1056                           mLastTrack->mTx3gFilled);
1057                     AMediaFormat_setBuffer(mLastTrack->meta,
1058                         AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1059                         mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1060                     // drop it now to reduce our footprint
1061                     free(mLastTrack->mTx3gBuffer);
1062                     mLastTrack->mTx3gBuffer = NULL;
1063                 }
1064 
1065             } else if (chunk_type == FOURCC("moov")) {
1066                 mInitCheck = OK;
1067 
1068                 return UNKNOWN_ERROR;  // Return a dummy error.
1069             }
1070             break;
1071         }
1072 
1073         case FOURCC("schm"):
1074         {
1075 
1076             *offset += chunk_size;
1077             if (!mLastTrack) {
1078                 return ERROR_MALFORMED;
1079             }
1080 
1081             uint32_t scheme_type;
1082             if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1083                 return ERROR_IO;
1084             }
1085             scheme_type = ntohl(scheme_type);
1086             int32_t mode = kCryptoModeUnencrypted;
1087             switch(scheme_type) {
1088                 case FOURCC("cbc1"):
1089                 {
1090                     mode = kCryptoModeAesCbc;
1091                     break;
1092                 }
1093                 case FOURCC("cbcs"):
1094                 {
1095                     mode = kCryptoModeAesCbc;
1096                     mLastTrack->subsample_encryption = true;
1097                     break;
1098                 }
1099                 case FOURCC("cenc"):
1100                 {
1101                     mode = kCryptoModeAesCtr;
1102                     break;
1103                 }
1104                 case FOURCC("cens"):
1105                 {
1106                     mode = kCryptoModeAesCtr;
1107                     mLastTrack->subsample_encryption = true;
1108                     break;
1109                 }
1110             }
1111             if (mode != kCryptoModeUnencrypted) {
1112                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1113             }
1114             break;
1115         }
1116 
1117 
1118         case FOURCC("elst"):
1119         {
1120             *offset += chunk_size;
1121 
1122             if (!mLastTrack) {
1123                 return ERROR_MALFORMED;
1124             }
1125 
1126             // See 14496-12 8.6.6
1127             uint8_t version;
1128             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1129                 return ERROR_IO;
1130             }
1131 
1132             uint32_t entry_count;
1133             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1134                 return ERROR_IO;
1135             }
1136 
1137             if (entry_count != 1) {
1138                 // we only support a single entry at the moment, for gapless playback
1139                 // or start offset
1140                 ALOGW("ignoring edit list with %d entries", entry_count);
1141             } else {
1142                 off64_t entriesoffset = data_offset + 8;
1143                 uint64_t segment_duration;
1144                 int64_t media_time;
1145 
1146                 if (version == 1) {
1147                     if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1148                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1149                         return ERROR_IO;
1150                     }
1151                 } else if (version == 0) {
1152                     uint32_t sd;
1153                     int32_t mt;
1154                     if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1155                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1156                         return ERROR_IO;
1157                     }
1158                     segment_duration = sd;
1159                     media_time = mt;
1160                 } else {
1161                     return ERROR_IO;
1162                 }
1163 
1164                 // save these for later, because the elst atom might precede
1165                 // the atoms that actually gives us the duration and sample rate
1166                 // needed to calculate the padding and delay values
1167                 mLastTrack->has_elst = true;
1168                 mLastTrack->elst_media_time = media_time;
1169                 mLastTrack->elst_segment_duration = segment_duration;
1170             }
1171             break;
1172         }
1173 
1174         case FOURCC("frma"):
1175         {
1176             *offset += chunk_size;
1177 
1178             uint32_t original_fourcc;
1179             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1180                 return ERROR_IO;
1181             }
1182             original_fourcc = ntohl(original_fourcc);
1183             ALOGV("read original format: %d", original_fourcc);
1184 
1185             if (mLastTrack == NULL) {
1186                 return ERROR_MALFORMED;
1187             }
1188 
1189             AMediaFormat_setString(mLastTrack->meta,
1190                     AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1191             uint32_t num_channels = 0;
1192             uint32_t sample_rate = 0;
1193             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1194                 AMediaFormat_setInt32(mLastTrack->meta,
1195                         AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1196                 AMediaFormat_setInt32(mLastTrack->meta,
1197                         AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1198             }
1199 
1200             if (!mIsQT && original_fourcc == FOURCC("alac")) {
1201                 off64_t tmpOffset = *offset;
1202                 status_t err = parseALACSampleEntry(&tmpOffset);
1203                 if (err != OK) {
1204                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1205                     return err;
1206                 }
1207                 *offset = tmpOffset + 8;
1208             }
1209 
1210             break;
1211         }
1212 
1213         case FOURCC("tenc"):
1214         {
1215             *offset += chunk_size;
1216 
1217             if (chunk_size < 32) {
1218                 return ERROR_MALFORMED;
1219             }
1220 
1221             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1222             // default IV size, 16 bytes default KeyID
1223             // (ISO 23001-7)
1224 
1225             uint8_t version;
1226             if (mDataSource->readAt(data_offset, &version, sizeof(version))
1227                     < (ssize_t)sizeof(version)) {
1228                 return ERROR_IO;
1229             }
1230 
1231             uint8_t buf[4];
1232             memset(buf, 0, 4);
1233             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1234                 return ERROR_IO;
1235             }
1236 
1237             if (mLastTrack == NULL) {
1238                 return ERROR_MALFORMED;
1239             }
1240 
1241             uint8_t defaultEncryptedByteBlock = 0;
1242             uint8_t defaultSkipByteBlock = 0;
1243             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1244             if (version == 1) {
1245                 uint32_t pattern = buf[2];
1246                 defaultEncryptedByteBlock = pattern >> 4;
1247                 defaultSkipByteBlock = pattern & 0xf;
1248                 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1249                     // use (1,0) to mean "encrypt everything"
1250                     defaultEncryptedByteBlock = 1;
1251                 }
1252             } else if (mLastTrack->subsample_encryption) {
1253                 ALOGW("subsample_encryption should be version 1");
1254             } else if (defaultAlgorithmId > 1) {
1255                 // only 0 (clear) and 1 (AES-128) are valid
1256                 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1257                 defaultAlgorithmId = 1;
1258             }
1259 
1260             memset(buf, 0, 4);
1261             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1262                 return ERROR_IO;
1263             }
1264             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1265 
1266             if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1267                 // only unencrypted data must have 0 IV size
1268                 return ERROR_MALFORMED;
1269             } else if (defaultIVSize != 0 &&
1270                     defaultIVSize != 8 &&
1271                     defaultIVSize != 16) {
1272                 return ERROR_MALFORMED;
1273             }
1274 
1275             uint8_t defaultKeyId[16];
1276 
1277             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1278                 return ERROR_IO;
1279             }
1280 
1281             sp<ABuffer> defaultConstantIv;
1282             if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1283 
1284                 uint8_t ivlength;
1285                 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1286                         < (ssize_t)sizeof(ivlength)) {
1287                     return ERROR_IO;
1288                 }
1289 
1290                 if (ivlength != 8 && ivlength != 16) {
1291                     ALOGW("unsupported IV length: %u", ivlength);
1292                     return ERROR_MALFORMED;
1293                 }
1294 
1295                 defaultConstantIv = new ABuffer(ivlength);
1296                 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1297                         < (ssize_t)ivlength) {
1298                     return ERROR_IO;
1299                 }
1300 
1301                 defaultConstantIv->setRange(0, ivlength);
1302             }
1303 
1304             int32_t tmpAlgorithmId;
1305             if (!AMediaFormat_getInt32(mLastTrack->meta,
1306                     AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1307                 AMediaFormat_setInt32(mLastTrack->meta,
1308                         AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1309             }
1310 
1311             AMediaFormat_setInt32(mLastTrack->meta,
1312                     AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1313             AMediaFormat_setBuffer(mLastTrack->meta,
1314                     AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1315             AMediaFormat_setInt32(mLastTrack->meta,
1316                     AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1317             AMediaFormat_setInt32(mLastTrack->meta,
1318                     AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1319             if (defaultConstantIv != NULL) {
1320                 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1321                         defaultConstantIv->data(), defaultConstantIv->size());
1322             }
1323             break;
1324         }
1325 
1326         case FOURCC("tkhd"):
1327         {
1328             *offset += chunk_size;
1329 
1330             status_t err;
1331             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1332                 return err;
1333             }
1334 
1335             break;
1336         }
1337 
1338         case FOURCC("tref"):
1339         {
1340             off64_t stop_offset = *offset + chunk_size;
1341             *offset = data_offset;
1342             while (*offset < stop_offset) {
1343                 status_t err = parseChunk(offset, depth + 1);
1344                 if (err != OK) {
1345                     return err;
1346                 }
1347             }
1348             if (*offset != stop_offset) {
1349                 return ERROR_MALFORMED;
1350             }
1351             break;
1352         }
1353 
1354         case FOURCC("thmb"):
1355         {
1356             *offset += chunk_size;
1357 
1358             if (mLastTrack != NULL) {
1359                 // Skip thumbnail track for now since we don't have an
1360                 // API to retrieve it yet.
1361                 // The thumbnail track can't be accessed by negative index or time,
1362                 // because each timed sample has its own corresponding thumbnail
1363                 // in the thumbnail track. We'll need a dedicated API to retrieve
1364                 // thumbnail at time instead.
1365                 mLastTrack->skipTrack = true;
1366             }
1367 
1368             break;
1369         }
1370 
1371         case FOURCC("pssh"):
1372         {
1373             *offset += chunk_size;
1374 
1375             PsshInfo pssh;
1376 
1377             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1378                 return ERROR_IO;
1379             }
1380 
1381             uint32_t psshdatalen = 0;
1382             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1383                 return ERROR_IO;
1384             }
1385             pssh.datalen = ntohl(psshdatalen);
1386             ALOGV("pssh data size: %d", pssh.datalen);
1387             if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1388                 // pssh data length exceeds size of containing box
1389                 return ERROR_MALFORMED;
1390             }
1391 
1392             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1393             if (pssh.data == NULL) {
1394                 return ERROR_MALFORMED;
1395             }
1396             ALOGV("allocated pssh @ %p", pssh.data);
1397             ssize_t requested = (ssize_t) pssh.datalen;
1398             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1399                 delete[] pssh.data;
1400                 return ERROR_IO;
1401             }
1402             mPssh.push_back(pssh);
1403 
1404             break;
1405         }
1406 
1407         case FOURCC("mdhd"):
1408         {
1409             *offset += chunk_size;
1410 
1411             if (chunk_data_size < 4 || mLastTrack == NULL) {
1412                 return ERROR_MALFORMED;
1413             }
1414 
1415             uint8_t version;
1416             if (mDataSource->readAt(
1417                         data_offset, &version, sizeof(version))
1418                     < (ssize_t)sizeof(version)) {
1419                 return ERROR_IO;
1420             }
1421 
1422             off64_t timescale_offset;
1423 
1424             if (version == 1) {
1425                 timescale_offset = data_offset + 4 + 16;
1426             } else if (version == 0) {
1427                 timescale_offset = data_offset + 4 + 8;
1428             } else {
1429                 return ERROR_IO;
1430             }
1431 
1432             uint32_t timescale;
1433             if (mDataSource->readAt(
1434                         timescale_offset, &timescale, sizeof(timescale))
1435                     < (ssize_t)sizeof(timescale)) {
1436                 return ERROR_IO;
1437             }
1438 
1439             if (!timescale) {
1440                 ALOGE("timescale should not be ZERO.");
1441                 return ERROR_MALFORMED;
1442             }
1443 
1444             mLastTrack->timescale = ntohl(timescale);
1445 
1446             // 14496-12 says all ones means indeterminate, but some files seem to use
1447             // 0 instead. We treat both the same.
1448             int64_t duration = 0;
1449             if (version == 1) {
1450                 if (mDataSource->readAt(
1451                             timescale_offset + 4, &duration, sizeof(duration))
1452                         < (ssize_t)sizeof(duration)) {
1453                     return ERROR_IO;
1454                 }
1455                 if (duration != -1) {
1456                     duration = ntoh64(duration);
1457                 }
1458             } else {
1459                 uint32_t duration32;
1460                 if (mDataSource->readAt(
1461                             timescale_offset + 4, &duration32, sizeof(duration32))
1462                         < (ssize_t)sizeof(duration32)) {
1463                     return ERROR_IO;
1464                 }
1465                 if (duration32 != 0xffffffff) {
1466                     duration = ntohl(duration32);
1467                 }
1468             }
1469             if (duration != 0 && mLastTrack->timescale != 0) {
1470                 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1471                 if (durationUs < 0 || durationUs > INT64_MAX) {
1472                     ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1473                           (long long) duration, (long long) mLastTrack->timescale);
1474                     return ERROR_MALFORMED;
1475                 }
1476                 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1477             }
1478 
1479             uint8_t lang[2];
1480             off64_t lang_offset;
1481             if (version == 1) {
1482                 lang_offset = timescale_offset + 4 + 8;
1483             } else if (version == 0) {
1484                 lang_offset = timescale_offset + 4 + 4;
1485             } else {
1486                 return ERROR_IO;
1487             }
1488 
1489             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1490                     < (ssize_t)sizeof(lang)) {
1491                 return ERROR_IO;
1492             }
1493 
1494             // To get the ISO-639-2/T three character language code
1495             // 1 bit pad followed by 3 5-bits characters. Each character
1496             // is packed as the difference between its ASCII value and 0x60.
1497             char lang_code[4];
1498             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1499             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1500             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1501             lang_code[3] = '\0';
1502 
1503             AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1504 
1505             break;
1506         }
1507 
1508         case FOURCC("stsd"):
1509         {
1510             uint8_t buffer[8];
1511             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1512                 return ERROR_MALFORMED;
1513             }
1514 
1515             if (mDataSource->readAt(
1516                         data_offset, buffer, 8) < 8) {
1517                 return ERROR_IO;
1518             }
1519 
1520             if (U32_AT(buffer) != 0) {
1521                 // Should be version 0, flags 0.
1522                 return ERROR_MALFORMED;
1523             }
1524 
1525             uint32_t entry_count = U32_AT(&buffer[4]);
1526 
1527             if (entry_count > 1) {
1528                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1529                 // multiple text display formats. These formats will be used to
1530                 // display the timed text.
1531                 // For encrypted files, there may also be more than one entry.
1532                 const char *mime;
1533 
1534                 if (mLastTrack == NULL)
1535                     return ERROR_MALFORMED;
1536 
1537                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1538                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1539                         strcasecmp(mime, "application/octet-stream")) {
1540                     // For now we only support a single type of media per track.
1541                     mLastTrack->skipTrack = true;
1542                     *offset += chunk_size;
1543                     break;
1544                 }
1545             }
1546             off64_t stop_offset = *offset + chunk_size;
1547             *offset = data_offset + 8;
1548             for (uint32_t i = 0; i < entry_count; ++i) {
1549                 status_t err = parseChunk(offset, depth + 1);
1550                 if (err != OK) {
1551                     return err;
1552                 }
1553             }
1554 
1555             if (*offset != stop_offset) {
1556                 return ERROR_MALFORMED;
1557             }
1558             break;
1559         }
1560         case FOURCC("mett"):
1561         {
1562             *offset += chunk_size;
1563 
1564             if (mLastTrack == NULL)
1565                 return ERROR_MALFORMED;
1566 
1567             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1568             if (buffer.get() == NULL) {
1569                 return NO_MEMORY;
1570             }
1571 
1572             if (mDataSource->readAt(
1573                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1574                 return ERROR_IO;
1575             }
1576 
1577             // Prior to API 29, the metadata track was not compliant with ISO/IEC
1578             // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1579             // metatrack. As of API 29 and onwards, a change was made to metadata track to
1580             // make it compliant with the standard. The workaround is to write the
1581             // null-terminated mime_format string twice. This allows compliant parsers to
1582             // read the missing reserved, data_reference_index, and content_encoding fields
1583             // from the first mime_type string. The actual mime_format field would then be
1584             // read correctly from the second string. The non-compliant Android frameworks
1585             // from API 28 and earlier would still be able to read the mime_format correctly
1586             // as it would only read the first null-terminated mime_format string. To enable
1587             // reading metadata tracks generated from both the non-compliant and compliant
1588             // formats, a check needs to be done to see which format is used.
1589             int null_pos = 0;
1590             const unsigned char *str = buffer.get();
1591             while (null_pos < chunk_data_size) {
1592               if (*(str + null_pos) == '\0') {
1593                 break;
1594               }
1595               ++null_pos;
1596             }
1597 
1598             if (null_pos == chunk_data_size - 1) {
1599               // This is not a standard ompliant metadata track.
1600               String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1601               AMediaFormat_setString(mLastTrack->meta,
1602                   AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1603             } else {
1604               // This is a standard compliant metadata track.
1605               String8 contentEncoding((const char *)(buffer.get() + 8));
1606               String8 mimeFormat((const char *)(buffer.get() + 8 + contentEncoding.size() + 1),
1607                   chunk_data_size - 8 - contentEncoding.size() - 1);
1608               AMediaFormat_setString(mLastTrack->meta,
1609                   AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1610             }
1611             break;
1612         }
1613 
1614         case FOURCC("mp4a"):
1615         case FOURCC("enca"):
1616         case FOURCC("samr"):
1617         case FOURCC("sawb"):
1618         case FOURCC("Opus"):
1619         case FOURCC("twos"):
1620         case FOURCC("sowt"):
1621         case FOURCC("alac"):
1622         case FOURCC("fLaC"):
1623         case FOURCC(".mp3"):
1624         case 0x6D730055: // "ms U" mp3 audio
1625         {
1626             if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1627 
1628                 if (chunk_type == FOURCC("alac")) {
1629                     off64_t offsetTmp = *offset;
1630                     status_t err = parseALACSampleEntry(&offsetTmp);
1631                     if (err != OK) {
1632                         ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1633                         return err;
1634                     }
1635                 }
1636 
1637                 // Ignore all atoms embedded in QT wave atom
1638                 ALOGV("Ignore all atoms embedded in QT wave atom");
1639                 *offset += chunk_size;
1640                 break;
1641             }
1642 
1643             uint8_t buffer[8 + 20];
1644             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1645                 // Basic AudioSampleEntry size.
1646                 return ERROR_MALFORMED;
1647             }
1648 
1649             if (mDataSource->readAt(
1650                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1651                 return ERROR_IO;
1652             }
1653 
1654             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1655             uint16_t version = U16_AT(&buffer[8]);
1656             uint32_t num_channels = U16_AT(&buffer[16]);
1657 
1658             uint16_t sample_size = U16_AT(&buffer[18]);
1659             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1660 
1661             if (mLastTrack == NULL)
1662                 return ERROR_MALFORMED;
1663 
1664             off64_t stop_offset = *offset + chunk_size;
1665             *offset = data_offset + sizeof(buffer);
1666 
1667             if (mIsQT) {
1668                 if (version == 1) {
1669                     if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1670                         return ERROR_IO;
1671                     }
1672 
1673 #if 0
1674                     U32_AT(buffer);  // samples per packet
1675                     U32_AT(&buffer[4]);  // bytes per packet
1676                     U32_AT(&buffer[8]);  // bytes per frame
1677                     U32_AT(&buffer[12]);  // bytes per sample
1678 #endif
1679                     *offset += 16;
1680                 } else if (version == 2) {
1681                     uint8_t v2buffer[36];
1682                     if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1683                         return ERROR_IO;
1684                     }
1685 
1686 #if 0
1687                     U32_AT(v2buffer);  // size of struct only
1688                     sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1689                     num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1690                     U32_AT(&v2buffer[16]);  // always 0x7f000000
1691                     sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1692                     U32_AT(&v2buffer[24]);  // format specifc flags
1693                     U32_AT(&v2buffer[28]);  // const bytes per audio packet
1694                     U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1695 #endif
1696                     *offset += 36;
1697                 }
1698             }
1699 
1700             if (chunk_type != FOURCC("enca")) {
1701                 // if the chunk type is enca, we'll get the type from the frma box later
1702                 AMediaFormat_setString(mLastTrack->meta,
1703                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1704                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1705 
1706                 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1707                     AMediaFormat_setInt32(mLastTrack->meta,
1708                             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1709                     if (chunk_type == FOURCC("twos")) {
1710                         AMediaFormat_setInt32(mLastTrack->meta,
1711                                 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1712                     }
1713                 }
1714             }
1715             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1716                    chunk, num_channels, sample_size, sample_rate);
1717             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1718             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1719 
1720             if (chunk_type == FOURCC("Opus")) {
1721                 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1722                 data_offset += sizeof(buffer);
1723                 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1724 
1725                 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1726                     opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1727                     return ERROR_MALFORMED;
1728                 }
1729                 // Read Opus Header
1730                 if (mDataSource->readAt(
1731                         data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1732                     return ERROR_IO;
1733                 }
1734 
1735                 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1736                 // http://wiki.xiph.org/OggOpus#ID_Header
1737                 strncpy((char *)opusInfo, "OpusHead", 8);
1738 
1739                 // Version shall be 0 as per mp4 Opus Specific Box
1740                 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1741                 if (opusInfo[8]) {
1742                     return ERROR_MALFORMED;
1743                 }
1744                 // Force version to 1 as per OpusHead definition
1745                 // (http://wiki.xiph.org/OggOpus#ID_Header)
1746                 opusInfo[8] = 1;
1747 
1748                 // Read Opus Specific Box values
1749                 size_t opusOffset = 10;
1750                 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1751                 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1752                 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1753 
1754                 // Convert Opus Specific Box values. ParseOpusHeader expects
1755                 // the values in LE, however MP4 stores these values as BE
1756                 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1757                 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1758                 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1759                 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1760 
1761                 static const int64_t kSeekPreRollNs = 80000000;  // Fixed 80 msec
1762                 static const int32_t kOpusSampleRate = 48000;
1763                 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1764 
1765                 AMediaFormat_setBuffer(mLastTrack->meta,
1766                             AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1767                 AMediaFormat_setBuffer(mLastTrack->meta,
1768                         AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1769                 AMediaFormat_setBuffer(mLastTrack->meta,
1770                         AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1771 
1772                 data_offset += opusInfoSize;
1773                 *offset = data_offset;
1774                 CHECK_EQ(*offset, stop_offset);
1775             }
1776 
1777             if (!mIsQT && chunk_type == FOURCC("alac")) {
1778                 data_offset += sizeof(buffer);
1779 
1780                 status_t err = parseALACSampleEntry(&data_offset);
1781                 if (err != OK) {
1782                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1783                     return err;
1784                 }
1785                 *offset = data_offset;
1786                 CHECK_EQ(*offset, stop_offset);
1787             }
1788 
1789             if (chunk_type == FOURCC("fLaC")) {
1790 
1791                 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1792                 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1793                 uint8_t flacInfo[4 + 4 + 34];
1794                 // skipping dFla, version
1795                 data_offset += sizeof(buffer) + 12;
1796                 size_t flacOffset = 4;
1797                 // Add flaC header mime type to CSD
1798                 strncpy((char *)flacInfo, "fLaC", 4);
1799                 if (mDataSource->readAt(
1800                         data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1801                         (ssize_t)sizeof(flacInfo) - flacOffset) {
1802                     return ERROR_IO;
1803                 }
1804                 data_offset += sizeof(flacInfo) - flacOffset;
1805 
1806                 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1807                                        sizeof(flacInfo));
1808                 *offset = data_offset;
1809                 CHECK_EQ(*offset, stop_offset);
1810             }
1811 
1812             while (*offset < stop_offset) {
1813                 status_t err = parseChunk(offset, depth + 1);
1814                 if (err != OK) {
1815                     return err;
1816                 }
1817             }
1818 
1819             if (*offset != stop_offset) {
1820                 return ERROR_MALFORMED;
1821             }
1822             break;
1823         }
1824 
1825         case FOURCC("mp4v"):
1826         case FOURCC("encv"):
1827         case FOURCC("s263"):
1828         case FOURCC("H263"):
1829         case FOURCC("h263"):
1830         case FOURCC("avc1"):
1831         case FOURCC("hvc1"):
1832         case FOURCC("hev1"):
1833         case FOURCC("av01"):
1834         {
1835             uint8_t buffer[78];
1836             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1837                 // Basic VideoSampleEntry size.
1838                 return ERROR_MALFORMED;
1839             }
1840 
1841             if (mDataSource->readAt(
1842                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1843                 return ERROR_IO;
1844             }
1845 
1846             uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1847             uint16_t width = U16_AT(&buffer[6 + 18]);
1848             uint16_t height = U16_AT(&buffer[6 + 20]);
1849 
1850             // The video sample is not standard-compliant if it has invalid dimension.
1851             // Use some default width and height value, and
1852             // let the decoder figure out the actual width and height (and thus
1853             // be prepared for INFO_FOMRAT_CHANGED event).
1854             if (width == 0)  width  = 352;
1855             if (height == 0) height = 288;
1856 
1857             // printf("*** coding='%s' width=%d height=%d\n",
1858             //        chunk, width, height);
1859 
1860             if (mLastTrack == NULL)
1861                 return ERROR_MALFORMED;
1862 
1863             if (chunk_type != FOURCC("encv")) {
1864                 // if the chunk type is encv, we'll get the type from the frma box later
1865                 AMediaFormat_setString(mLastTrack->meta,
1866                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1867             }
1868             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
1869             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
1870 
1871             off64_t stop_offset = *offset + chunk_size;
1872             *offset = data_offset + sizeof(buffer);
1873             while (*offset < stop_offset) {
1874                 status_t err = parseChunk(offset, depth + 1);
1875                 if (err != OK) {
1876                     return err;
1877                 }
1878             }
1879 
1880             if (*offset != stop_offset) {
1881                 return ERROR_MALFORMED;
1882             }
1883             break;
1884         }
1885 
1886         case FOURCC("stco"):
1887         case FOURCC("co64"):
1888         {
1889             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1890                 return ERROR_MALFORMED;
1891             }
1892 
1893             status_t err =
1894                 mLastTrack->sampleTable->setChunkOffsetParams(
1895                         chunk_type, data_offset, chunk_data_size);
1896 
1897             *offset += chunk_size;
1898 
1899             if (err != OK) {
1900                 return err;
1901             }
1902 
1903             break;
1904         }
1905 
1906         case FOURCC("stsc"):
1907         {
1908             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1909                 return ERROR_MALFORMED;
1910 
1911             status_t err =
1912                 mLastTrack->sampleTable->setSampleToChunkParams(
1913                         data_offset, chunk_data_size);
1914 
1915             *offset += chunk_size;
1916 
1917             if (err != OK) {
1918                 return err;
1919             }
1920 
1921             break;
1922         }
1923 
1924         case FOURCC("stsz"):
1925         case FOURCC("stz2"):
1926         {
1927             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1928                 return ERROR_MALFORMED;
1929             }
1930 
1931             status_t err =
1932                 mLastTrack->sampleTable->setSampleSizeParams(
1933                         chunk_type, data_offset, chunk_data_size);
1934 
1935             *offset += chunk_size;
1936 
1937             if (err != OK) {
1938                 return err;
1939             }
1940 
1941             adjustRawDefaultFrameSize();
1942 
1943             size_t max_size;
1944             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1945 
1946             if (err != OK) {
1947                 return err;
1948             }
1949 
1950             if (max_size != 0) {
1951                 // Assume that a given buffer only contains at most 10 chunks,
1952                 // each chunk originally prefixed with a 2 byte length will
1953                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1954                 // and thus will grow by 2 bytes per chunk.
1955                 if (max_size > SIZE_MAX - 10 * 2) {
1956                     ALOGE("max sample size too big: %zu", max_size);
1957                     return ERROR_MALFORMED;
1958                 }
1959                 AMediaFormat_setInt32(mLastTrack->meta,
1960                         AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
1961             } else {
1962                 // No size was specified. Pick a conservatively large size.
1963                 uint32_t width, height;
1964                 if (!AMediaFormat_getInt32(mLastTrack->meta,
1965                         AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
1966                     !AMediaFormat_getInt32(mLastTrack->meta,
1967                             AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
1968                     ALOGE("No width or height, assuming worst case 1080p");
1969                     width = 1920;
1970                     height = 1080;
1971                 } else {
1972                     // A resolution was specified, check that it's not too big. The values below
1973                     // were chosen so that the calculations below don't cause overflows, they're
1974                     // not indicating that resolutions up to 32kx32k are actually supported.
1975                     if (width > 32768 || height > 32768) {
1976                         ALOGE("can't support %u x %u video", width, height);
1977                         return ERROR_MALFORMED;
1978                     }
1979                 }
1980 
1981                 const char *mime;
1982                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1983                 if (!strncmp(mime, "audio/", 6)) {
1984                     // for audio, use 128KB
1985                     max_size = 1024 * 128;
1986                 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1987                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1988                     // AVC & HEVC requires compression ratio of at least 2, and uses
1989                     // macroblocks
1990                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1991                 } else {
1992                     // For all other formats there is no minimum compression
1993                     // ratio. Use compression ratio of 1.
1994                     max_size = width * height * 3 / 2;
1995                 }
1996                 // HACK: allow 10% overhead
1997                 // TODO: read sample size from traf atom for fragmented MPEG4.
1998                 max_size += max_size / 10;
1999                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2000             }
2001 
2002             // NOTE: setting another piece of metadata invalidates any pointers (such as the
2003             // mimetype) previously obtained, so don't cache them.
2004             const char *mime;
2005             CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2006             // Calculate average frame rate.
2007             if (!strncasecmp("video/", mime, 6)) {
2008                 size_t nSamples = mLastTrack->sampleTable->countSamples();
2009                 if (nSamples == 0) {
2010                     int32_t trackId;
2011                     if (AMediaFormat_getInt32(mLastTrack->meta,
2012                             AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2013                         for (size_t i = 0; i < mTrex.size(); i++) {
2014                             Trex *t = &mTrex.editItemAt(i);
2015                             if (t->track_ID == (uint32_t) trackId) {
2016                                 if (t->default_sample_duration > 0) {
2017                                     int32_t frameRate =
2018                                             mLastTrack->timescale / t->default_sample_duration;
2019                                     AMediaFormat_setInt32(mLastTrack->meta,
2020                                             AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2021                                 }
2022                                 break;
2023                             }
2024                         }
2025                     }
2026                 } else {
2027                     int64_t durationUs;
2028                     if (AMediaFormat_getInt64(mLastTrack->meta,
2029                             AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2030                         if (durationUs > 0) {
2031                             int32_t frameRate = (nSamples * 1000000LL +
2032                                         (durationUs >> 1)) / durationUs;
2033                             AMediaFormat_setInt32(mLastTrack->meta,
2034                                     AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2035                         }
2036                     }
2037                     ALOGV("setting frame count %zu", nSamples);
2038                     AMediaFormat_setInt32(mLastTrack->meta,
2039                             AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2040                 }
2041             }
2042 
2043             break;
2044         }
2045 
2046         case FOURCC("stts"):
2047         {
2048             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2049                 return ERROR_MALFORMED;
2050 
2051             *offset += chunk_size;
2052 
2053             if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2054                 char chunk[5];
2055                 MakeFourCCString(mPath[depth - 1], chunk);
2056                 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2057                 break;
2058             }
2059 
2060             status_t err =
2061                 mLastTrack->sampleTable->setTimeToSampleParams(
2062                         data_offset, chunk_data_size);
2063 
2064             if (err != OK) {
2065                 return err;
2066             }
2067 
2068             break;
2069         }
2070 
2071         case FOURCC("ctts"):
2072         {
2073             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2074                 return ERROR_MALFORMED;
2075 
2076             *offset += chunk_size;
2077 
2078             status_t err =
2079                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2080                         data_offset, chunk_data_size);
2081 
2082             if (err != OK) {
2083                 return err;
2084             }
2085 
2086             break;
2087         }
2088 
2089         case FOURCC("stss"):
2090         {
2091             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2092                 return ERROR_MALFORMED;
2093 
2094             *offset += chunk_size;
2095 
2096             status_t err =
2097                 mLastTrack->sampleTable->setSyncSampleParams(
2098                         data_offset, chunk_data_size);
2099 
2100             if (err != OK) {
2101                 return err;
2102             }
2103 
2104             break;
2105         }
2106 
2107         // \xA9xyz
2108         case FOURCC("\251xyz"):
2109         {
2110             *offset += chunk_size;
2111 
2112             // Best case the total data length inside "\xA9xyz" box would
2113             // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2114             // where "\x00\x05" is the text string length with value = 5,
2115             // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2116             // location (string) value with longitude = 0 and latitude = 0.
2117             // Since some devices encountered in the wild omit the trailing
2118             // slash, we'll allow that.
2119             if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2120                 return ERROR_MALFORMED;
2121             }
2122 
2123             uint16_t len;
2124             if (!mDataSource->getUInt16(data_offset, &len)) {
2125                 return ERROR_IO;
2126             }
2127 
2128             // allow "+0+0" without trailing slash
2129             if (len < 4 || len > chunk_data_size - 4) {
2130                 return ERROR_MALFORMED;
2131             }
2132             // The location string following the language code is formatted
2133             // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2134             // Allocate 2 extra bytes, in case we need to add a trailing slash,
2135             // and to add a terminating 0.
2136             std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2137             if (!buffer) {
2138                 return NO_MEMORY;
2139             }
2140 
2141             if (mDataSource->readAt(
2142                         data_offset + 4, &buffer[0], len) < len) {
2143                 return ERROR_IO;
2144             }
2145 
2146             len = strlen(&buffer[0]);
2147             if (len < 4) {
2148                 return ERROR_MALFORMED;
2149             }
2150             // Add a trailing slash if there wasn't one.
2151             if (buffer[len - 1] != '/') {
2152                 buffer[len] = '/';
2153             }
2154             AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2155             break;
2156         }
2157 
2158         case FOURCC("esds"):
2159         {
2160             *offset += chunk_size;
2161 
2162             if (chunk_data_size < 4) {
2163                 return ERROR_MALFORMED;
2164             }
2165 
2166             auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2167             uint8_t *buffer = tmp.get();
2168             if (buffer == NULL) {
2169                 return -ENOMEM;
2170             }
2171 
2172             if (mDataSource->readAt(
2173                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
2174                 return ERROR_IO;
2175             }
2176 
2177             if (U32_AT(buffer) != 0) {
2178                 // Should be version 0, flags 0.
2179                 return ERROR_MALFORMED;
2180             }
2181 
2182             if (mLastTrack == NULL)
2183                 return ERROR_MALFORMED;
2184 
2185             AMediaFormat_setBuffer(mLastTrack->meta,
2186                     AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2187 
2188             if (mPath.size() >= 2
2189                     && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2190                 // Information from the ESDS must be relied on for proper
2191                 // setup of sample rate and channel count for MPEG4 Audio.
2192                 // The generic header appears to only contain generic
2193                 // information...
2194 
2195                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2196                         &buffer[4], chunk_data_size - 4);
2197 
2198                 if (err != OK) {
2199                     return err;
2200                 }
2201             }
2202             if (mPath.size() >= 2
2203                     && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2204                 // Check if the video is MPEG2
2205                 ESDS esds(&buffer[4], chunk_data_size - 4);
2206 
2207                 uint8_t objectTypeIndication;
2208                 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2209                     if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2210                         AMediaFormat_setString(mLastTrack->meta,
2211                                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2212                     }
2213                 }
2214             }
2215             break;
2216         }
2217 
2218         case FOURCC("btrt"):
2219         {
2220             *offset += chunk_size;
2221             if (mLastTrack == NULL) {
2222                 return ERROR_MALFORMED;
2223             }
2224 
2225             uint8_t buffer[12];
2226             if (chunk_data_size != sizeof(buffer)) {
2227                 return ERROR_MALFORMED;
2228             }
2229 
2230             if (mDataSource->readAt(
2231                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2232                 return ERROR_IO;
2233             }
2234 
2235             uint32_t maxBitrate = U32_AT(&buffer[4]);
2236             uint32_t avgBitrate = U32_AT(&buffer[8]);
2237             if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2238                 AMediaFormat_setInt32(mLastTrack->meta,
2239                         AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2240             }
2241             if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2242                 AMediaFormat_setInt32(mLastTrack->meta,
2243                         AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2244             }
2245             break;
2246         }
2247 
2248         case FOURCC("avcC"):
2249         {
2250             *offset += chunk_size;
2251 
2252             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2253 
2254             if (buffer.get() == NULL) {
2255                 ALOGE("b/28471206");
2256                 return NO_MEMORY;
2257             }
2258 
2259             if (mDataSource->readAt(
2260                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2261                 return ERROR_IO;
2262             }
2263 
2264             if (mLastTrack == NULL)
2265                 return ERROR_MALFORMED;
2266 
2267             AMediaFormat_setBuffer(mLastTrack->meta,
2268                     AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2269 
2270             break;
2271         }
2272         case FOURCC("hvcC"):
2273         {
2274             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2275 
2276             if (buffer.get() == NULL) {
2277                 ALOGE("b/28471206");
2278                 return NO_MEMORY;
2279             }
2280 
2281             if (mDataSource->readAt(
2282                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2283                 return ERROR_IO;
2284             }
2285 
2286             if (mLastTrack == NULL)
2287                 return ERROR_MALFORMED;
2288 
2289             AMediaFormat_setBuffer(mLastTrack->meta,
2290                     AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2291 
2292             *offset += chunk_size;
2293             break;
2294         }
2295         case FOURCC("av1C"):
2296         {
2297             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2298 
2299             if (buffer.get() == NULL) {
2300                 ALOGE("b/28471206");
2301                 return NO_MEMORY;
2302             }
2303 
2304             if (mDataSource->readAt(
2305                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2306                 return ERROR_IO;
2307             }
2308 
2309             if (mLastTrack == NULL)
2310                 return ERROR_MALFORMED;
2311 
2312             AMediaFormat_setBuffer(mLastTrack->meta,
2313                    AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2314 
2315             *offset += chunk_size;
2316             break;
2317         }
2318         case FOURCC("d263"):
2319         {
2320             *offset += chunk_size;
2321             /*
2322              * d263 contains a fixed 7 bytes part:
2323              *   vendor - 4 bytes
2324              *   version - 1 byte
2325              *   level - 1 byte
2326              *   profile - 1 byte
2327              * optionally, "d263" box itself may contain a 16-byte
2328              * bit rate box (bitr)
2329              *   average bit rate - 4 bytes
2330              *   max bit rate - 4 bytes
2331              */
2332             char buffer[23];
2333             if (chunk_data_size != 7 &&
2334                 chunk_data_size != 23) {
2335                 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2336                 return ERROR_MALFORMED;
2337             }
2338 
2339             if (mDataSource->readAt(
2340                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2341                 return ERROR_IO;
2342             }
2343 
2344             if (mLastTrack == NULL)
2345                 return ERROR_MALFORMED;
2346 
2347             AMediaFormat_setBuffer(mLastTrack->meta,
2348                     AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2349 
2350             break;
2351         }
2352 
2353         case FOURCC("meta"):
2354         {
2355             off64_t stop_offset = *offset + chunk_size;
2356             *offset = data_offset;
2357             bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2358             if (!isParsingMetaKeys) {
2359                 uint8_t buffer[4];
2360                 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2361                     *offset = stop_offset;
2362                     return ERROR_MALFORMED;
2363                 }
2364 
2365                 if (mDataSource->readAt(
2366                             data_offset, buffer, 4) < 4) {
2367                     *offset = stop_offset;
2368                     return ERROR_IO;
2369                 }
2370 
2371                 if (U32_AT(buffer) != 0) {
2372                     // Should be version 0, flags 0.
2373 
2374                     // If it's not, let's assume this is one of those
2375                     // apparently malformed chunks that don't have flags
2376                     // and completely different semantics than what's
2377                     // in the MPEG4 specs and skip it.
2378                     *offset = stop_offset;
2379                     return OK;
2380                 }
2381                 *offset +=  sizeof(buffer);
2382             }
2383 
2384             while (*offset < stop_offset) {
2385                 status_t err = parseChunk(offset, depth + 1);
2386                 if (err != OK) {
2387                     return err;
2388                 }
2389             }
2390 
2391             if (*offset != stop_offset) {
2392                 return ERROR_MALFORMED;
2393             }
2394             break;
2395         }
2396 
2397         case FOURCC("iloc"):
2398         case FOURCC("iinf"):
2399         case FOURCC("iprp"):
2400         case FOURCC("pitm"):
2401         case FOURCC("idat"):
2402         case FOURCC("iref"):
2403         case FOURCC("ipro"):
2404         {
2405             if (mIsHeif) {
2406                 if (mItemTable == NULL) {
2407                     mItemTable = new ItemTable(mDataSource);
2408                 }
2409                 status_t err = mItemTable->parse(
2410                         chunk_type, data_offset, chunk_data_size);
2411                 if (err != OK) {
2412                     return err;
2413                 }
2414             }
2415             *offset += chunk_size;
2416             break;
2417         }
2418 
2419         case FOURCC("mean"):
2420         case FOURCC("name"):
2421         case FOURCC("data"):
2422         {
2423             *offset += chunk_size;
2424 
2425             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2426                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2427 
2428                 if (err != OK) {
2429                     return err;
2430                 }
2431             }
2432 
2433             break;
2434         }
2435 
2436         case FOURCC("mvhd"):
2437         {
2438             *offset += chunk_size;
2439 
2440             if (depth != 1) {
2441                 ALOGE("mvhd: depth %d", depth);
2442                 return ERROR_MALFORMED;
2443             }
2444             if (chunk_data_size < 32) {
2445                 return ERROR_MALFORMED;
2446             }
2447 
2448             uint8_t header[32];
2449             if (mDataSource->readAt(
2450                         data_offset, header, sizeof(header))
2451                     < (ssize_t)sizeof(header)) {
2452                 return ERROR_IO;
2453             }
2454 
2455             uint64_t creationTime;
2456             uint64_t duration = 0;
2457             if (header[0] == 1) {
2458                 creationTime = U64_AT(&header[4]);
2459                 mHeaderTimescale = U32_AT(&header[20]);
2460                 duration = U64_AT(&header[24]);
2461                 if (duration == 0xffffffffffffffff) {
2462                     duration = 0;
2463                 }
2464             } else if (header[0] != 0) {
2465                 return ERROR_MALFORMED;
2466             } else {
2467                 creationTime = U32_AT(&header[4]);
2468                 mHeaderTimescale = U32_AT(&header[12]);
2469                 uint32_t d32 = U32_AT(&header[16]);
2470                 if (d32 == 0xffffffff) {
2471                     d32 = 0;
2472                 }
2473                 duration = d32;
2474             }
2475             if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2476                 AMediaFormat_setInt64(mFileMetaData,
2477                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2478             }
2479 
2480             String8 s;
2481             if (convertTimeToDate(creationTime, &s)) {
2482                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2483             }
2484 
2485             break;
2486         }
2487 
2488         case FOURCC("mehd"):
2489         {
2490             *offset += chunk_size;
2491 
2492             if (chunk_data_size < 8) {
2493                 return ERROR_MALFORMED;
2494             }
2495 
2496             uint8_t flags[4];
2497             if (mDataSource->readAt(
2498                         data_offset, flags, sizeof(flags))
2499                     < (ssize_t)sizeof(flags)) {
2500                 return ERROR_IO;
2501             }
2502 
2503             uint64_t duration = 0;
2504             if (flags[0] == 1) {
2505                 // 64 bit
2506                 if (chunk_data_size < 12) {
2507                     return ERROR_MALFORMED;
2508                 }
2509                 mDataSource->getUInt64(data_offset + 4, &duration);
2510                 if (duration == 0xffffffffffffffff) {
2511                     duration = 0;
2512                 }
2513             } else if (flags[0] == 0) {
2514                 // 32 bit
2515                 uint32_t d32;
2516                 mDataSource->getUInt32(data_offset + 4, &d32);
2517                 if (d32 == 0xffffffff) {
2518                     d32 = 0;
2519                 }
2520                 duration = d32;
2521             } else {
2522                 return ERROR_MALFORMED;
2523             }
2524 
2525             if (duration != 0 && mHeaderTimescale != 0) {
2526                 AMediaFormat_setInt64(mFileMetaData,
2527                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2528             }
2529 
2530             break;
2531         }
2532 
2533         case FOURCC("mdat"):
2534         {
2535             mMdatFound = true;
2536 
2537             *offset += chunk_size;
2538             break;
2539         }
2540 
2541         case FOURCC("hdlr"):
2542         {
2543             *offset += chunk_size;
2544 
2545             if (underQTMetaPath(mPath, 3)) {
2546                 break;
2547             }
2548 
2549             uint32_t buffer;
2550             if (mDataSource->readAt(
2551                         data_offset + 8, &buffer, 4) < 4) {
2552                 return ERROR_IO;
2553             }
2554 
2555             uint32_t type = ntohl(buffer);
2556             // For the 3GPP file format, the handler-type within the 'hdlr' box
2557             // shall be 'text'. We also want to support 'sbtl' handler type
2558             // for a practical reason as various MPEG4 containers use it.
2559             if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2560                 if (mLastTrack != NULL) {
2561                     AMediaFormat_setString(mLastTrack->meta,
2562                             AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2563                 }
2564             }
2565 
2566             break;
2567         }
2568 
2569         case FOURCC("keys"):
2570         {
2571             *offset += chunk_size;
2572 
2573             if (underQTMetaPath(mPath, 3)) {
2574                 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2575                 if (err != OK) {
2576                     return err;
2577                 }
2578             }
2579             break;
2580         }
2581 
2582         case FOURCC("trex"):
2583         {
2584             *offset += chunk_size;
2585 
2586             if (chunk_data_size < 24) {
2587                 return ERROR_IO;
2588             }
2589             Trex trex;
2590             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2591                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2592                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2593                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2594                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2595                 return ERROR_IO;
2596             }
2597             mTrex.add(trex);
2598             break;
2599         }
2600 
2601         case FOURCC("tx3g"):
2602         {
2603             if (mLastTrack == NULL)
2604                 return ERROR_MALFORMED;
2605 
2606             // complain about ridiculous chunks
2607             if (chunk_size > kMaxAtomSize) {
2608                 return ERROR_MALFORMED;
2609             }
2610 
2611             // complain about empty atoms
2612             if (chunk_data_size <= 0) {
2613                 ALOGE("b/124330204");
2614                 android_errorWriteLog(0x534e4554, "124330204");
2615                 return ERROR_MALFORMED;
2616             }
2617 
2618             // should fill buffer based on "data_offset" and "chunk_data_size"
2619             // instead of *offset and chunk_size;
2620             // but we've been feeding the extra data to consumers for multiple releases and
2621             // if those apps are compensating for it, we'd break them with such a change
2622             //
2623 
2624             if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2625                 size_t growth = kTx3gGrowth;
2626                 if (growth < chunk_size) {
2627                     growth = chunk_size;
2628                 }
2629                 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2630                 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2631                     ALOGE("b/124330204 - too much space");
2632                     android_errorWriteLog(0x534e4554, "124330204");
2633                     return ERROR_MALFORMED;
2634                 }
2635                 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2636                                                 mLastTrack->mTx3gSize + growth);
2637                 if (updated == NULL) {
2638                     return ERROR_MALFORMED;
2639                 }
2640                 mLastTrack->mTx3gBuffer = updated;
2641                 mLastTrack->mTx3gSize += growth;
2642             }
2643 
2644             if ((size_t)(mDataSource->readAt(*offset,
2645                                              mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2646                                              chunk_size))
2647                     < chunk_size) {
2648 
2649                 // advance read pointer so we don't end up reading this again
2650                 *offset += chunk_size;
2651                 return ERROR_IO;
2652             }
2653 
2654             mLastTrack->mTx3gFilled += chunk_size;
2655             *offset += chunk_size;
2656             break;
2657         }
2658 
2659         case FOURCC("covr"):
2660         {
2661             *offset += chunk_size;
2662 
2663             ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2664                   chunk_data_size, data_offset);
2665 
2666             if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2667                 return ERROR_MALFORMED;
2668             }
2669             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2670             if (buffer.get() == NULL) {
2671                 ALOGE("b/28471206");
2672                 return NO_MEMORY;
2673             }
2674             if (mDataSource->readAt(
2675                 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2676                 return ERROR_IO;
2677             }
2678             const int kSkipBytesOfDataBox = 16;
2679             if (chunk_data_size <= kSkipBytesOfDataBox) {
2680                 return ERROR_MALFORMED;
2681             }
2682 
2683             AMediaFormat_setBuffer(mFileMetaData,
2684                 AMEDIAFORMAT_KEY_ALBUMART,
2685                 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2686 
2687             break;
2688         }
2689 
2690         case FOURCC("colr"):
2691         {
2692             *offset += chunk_size;
2693             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2694             // ignore otherwise
2695             if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
2696                 status_t err = parseColorInfo(data_offset, chunk_data_size);
2697                 if (err != OK) {
2698                     return err;
2699                 }
2700             }
2701 
2702             break;
2703         }
2704 
2705         case FOURCC("titl"):
2706         case FOURCC("perf"):
2707         case FOURCC("auth"):
2708         case FOURCC("gnre"):
2709         case FOURCC("albm"):
2710         case FOURCC("yrrc"):
2711         {
2712             *offset += chunk_size;
2713 
2714             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2715 
2716             if (err != OK) {
2717                 return err;
2718             }
2719 
2720             break;
2721         }
2722 
2723         case FOURCC("ID32"):
2724         {
2725             *offset += chunk_size;
2726 
2727             if (chunk_data_size < 6) {
2728                 return ERROR_MALFORMED;
2729             }
2730 
2731             parseID3v2MetaData(data_offset + 6);
2732 
2733             break;
2734         }
2735 
2736         case FOURCC("----"):
2737         {
2738             mLastCommentMean.clear();
2739             mLastCommentName.clear();
2740             mLastCommentData.clear();
2741             *offset += chunk_size;
2742             break;
2743         }
2744 
2745         case FOURCC("sidx"):
2746         {
2747             status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2748             if (err != OK) {
2749                 return err;
2750             }
2751             *offset += chunk_size;
2752             return UNKNOWN_ERROR; // stop parsing after sidx
2753         }
2754 
2755         case FOURCC("ac-3"):
2756         {
2757             *offset += chunk_size;
2758             // bypass ac-3 if parse fail
2759             if (parseAC3SpecificBox(data_offset) != OK) {
2760                 if (mLastTrack != NULL) {
2761                     ALOGW("Fail to parse ac-3");
2762                     mLastTrack->skipTrack = true;
2763                 }
2764             }
2765             return OK;
2766         }
2767 
2768         case FOURCC("ec-3"):
2769         {
2770             *offset += chunk_size;
2771             // bypass ec-3 if parse fail
2772             if (parseEAC3SpecificBox(data_offset) != OK) {
2773                 if (mLastTrack != NULL) {
2774                     ALOGW("Fail to parse ec-3");
2775                     mLastTrack->skipTrack = true;
2776                 }
2777             }
2778             return OK;
2779         }
2780 
2781         case FOURCC("ac-4"):
2782         {
2783             *offset += chunk_size;
2784             // bypass ac-4 if parse fail
2785             if (parseAC4SpecificBox(data_offset) != OK) {
2786                 if (mLastTrack != NULL) {
2787                     ALOGW("Fail to parse ac-4");
2788                     mLastTrack->skipTrack = true;
2789                 }
2790             }
2791             return OK;
2792         }
2793 
2794         case FOURCC("ftyp"):
2795         {
2796             if (chunk_data_size < 8 || depth != 0) {
2797                 return ERROR_MALFORMED;
2798             }
2799 
2800             off64_t stop_offset = *offset + chunk_size;
2801             uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2802             std::set<uint32_t> brandSet;
2803             for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2804                 if (i == 1) {
2805                     // Skip this index, it refers to the minorVersion,
2806                     // not a brand.
2807                     continue;
2808                 }
2809 
2810                 uint32_t brand;
2811                 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2812                     return ERROR_MALFORMED;
2813                 }
2814 
2815                 brand = ntohl(brand);
2816                 brandSet.insert(brand);
2817             }
2818 
2819             if (brandSet.count(FOURCC("qt  ")) > 0) {
2820                 mIsQT = true;
2821             } else {
2822                 if (brandSet.count(FOURCC("mif1")) > 0
2823                  && brandSet.count(FOURCC("heic")) > 0) {
2824                     ALOGV("identified HEIF image");
2825 
2826                     mIsHeif = true;
2827                     brandSet.erase(FOURCC("mif1"));
2828                     brandSet.erase(FOURCC("heic"));
2829                 }
2830 
2831                 if (!brandSet.empty()) {
2832                     // This means that the file should have moov box.
2833                     // It could be any iso files (mp4, heifs, etc.)
2834                     mHasMoovBox = true;
2835                     if (mIsHeif) {
2836                         ALOGV("identified HEIF image with other tracks");
2837                     }
2838                 }
2839             }
2840 
2841             *offset = stop_offset;
2842 
2843             break;
2844         }
2845 
2846         default:
2847         {
2848             // check if we're parsing 'ilst' for meta keys
2849             // if so, treat type as a number (key-id).
2850             if (underQTMetaPath(mPath, 3)) {
2851                 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2852                 if (err != OK) {
2853                     return err;
2854                 }
2855             }
2856 
2857             *offset += chunk_size;
2858             break;
2859         }
2860     }
2861 
2862     return OK;
2863 }
2864 
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)2865 status_t MPEG4Extractor::parseChannelCountSampleRate(
2866         off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
2867     // skip 16 bytes:
2868     //  + 6-byte reserved,
2869     //  + 2-byte data reference index,
2870     //  + 8-byte reserved
2871     *offset += 16;
2872     if (!mDataSource->getUInt16(*offset, channelCount)) {
2873         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
2874         return ERROR_MALFORMED;
2875     }
2876     // skip 8 bytes:
2877     //  + 2-byte channelCount,
2878     //  + 2-byte sample size,
2879     //  + 4-byte reserved
2880     *offset += 8;
2881     if (!mDataSource->getUInt16(*offset, sampleRate)) {
2882         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
2883         return ERROR_MALFORMED;
2884     }
2885     // skip 4 bytes:
2886     //  + 2-byte sampleRate,
2887     //  + 2-byte reserved
2888     *offset += 4;
2889     return OK;
2890 }
2891 
parseAC4SpecificBox(off64_t offset)2892 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
2893     if (mLastTrack == NULL) {
2894         return ERROR_MALFORMED;
2895     }
2896 
2897     uint16_t sampleRate, channelCount;
2898     status_t status;
2899     if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
2900         return status;
2901     }
2902     uint32_t size;
2903     // + 4-byte size
2904     // + 4-byte type
2905     // + 3-byte payload
2906     const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
2907     if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
2908         ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
2909         return ERROR_MALFORMED;
2910     }
2911 
2912     // + 4-byte size
2913     offset += 4;
2914     uint32_t type;
2915     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
2916         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
2917         return ERROR_MALFORMED;
2918     }
2919 
2920     // + 4-byte type
2921     offset += 4;
2922     const uint32_t kAC4SpecificBoxPayloadSize = 1176;
2923     uint8_t chunk[kAC4SpecificBoxPayloadSize];
2924     ssize_t dsiSize = size - 8; // size of box - size and type fields
2925     if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
2926         mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
2927         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
2928         return ERROR_MALFORMED;
2929     }
2930     // + size-byte payload
2931     offset += dsiSize;
2932     ABitReader br(chunk, dsiSize);
2933     AC4DSIParser parser(br);
2934     if (!parser.parse()){
2935         ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
2936         return ERROR_MALFORMED;
2937     }
2938 
2939     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
2940     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
2941     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
2942 
2943     AudioPresentationCollection presentations;
2944     // translate the AC4 presentation information to audio presentations for this track
2945     AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
2946     if (!ac4Presentations.empty()) {
2947         for (const auto& ac4Presentation : ac4Presentations) {
2948             auto& presentation = ac4Presentation.second;
2949             if (!presentation.mEnabled) {
2950                 continue;
2951             }
2952             AudioPresentationV1 ap;
2953             ap.mPresentationId = presentation.mGroupIndex;
2954             ap.mProgramId = presentation.mProgramID;
2955             ap.mLanguage = presentation.mLanguage;
2956             if (presentation.mPreVirtualized) {
2957                 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
2958             } else {
2959                 switch (presentation.mChannelMode) {
2960                     case AC4Parser::AC4Presentation::kChannelMode_Mono:
2961                     case AC4Parser::AC4Presentation::kChannelMode_Stereo:
2962                         ap.mMasteringIndication = MASTERED_FOR_STEREO;
2963                         break;
2964                     case AC4Parser::AC4Presentation::kChannelMode_3_0:
2965                     case AC4Parser::AC4Presentation::kChannelMode_5_0:
2966                     case AC4Parser::AC4Presentation::kChannelMode_5_1:
2967                     case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
2968                     case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
2969                     case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
2970                     case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
2971                         ap.mMasteringIndication = MASTERED_FOR_SURROUND;
2972                         break;
2973                     case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
2974                     case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
2975                     case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
2976                     case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
2977                     case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
2978                     case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
2979                     case AC4Parser::AC4Presentation::kChannelMode_22_2:
2980                         ap.mMasteringIndication = MASTERED_FOR_3D;
2981                         break;
2982                     default:
2983                         ALOGE("Invalid channel mode in AC4 presentation");
2984                         return ERROR_MALFORMED;
2985                 }
2986             }
2987 
2988             ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
2989                     AC4Parser::AC4Presentation::kVisuallyImpaired);
2990             ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
2991                     AC4Parser::AC4Presentation::kVoiceOver);
2992             ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
2993             if (!ap.mLanguage.empty()) {
2994                 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
2995             }
2996             presentations.push_back(std::move(ap));
2997         }
2998     }
2999 
3000     if (presentations.empty()) {
3001         // Clear audio presentation info in metadata.
3002         AMediaFormat_setBuffer(
3003                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3004     } else {
3005         std::ostringstream outStream(std::ios::out);
3006         serializeAudioPresentations(presentations, &outStream);
3007         AMediaFormat_setBuffer(
3008                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3009                 outStream.str().data(), outStream.str().size());
3010     }
3011     return OK;
3012 }
3013 
parseEAC3SpecificBox(off64_t offset)3014 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3015     if (mLastTrack == NULL) {
3016         return ERROR_MALFORMED;
3017     }
3018 
3019     uint16_t sampleRate, channels;
3020     status_t status;
3021     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3022         return status;
3023     }
3024     uint32_t size;
3025     // + 4-byte size
3026     // + 4-byte type
3027     // + 3-byte payload
3028     const uint32_t kEAC3SpecificBoxMinSize = 11;
3029     // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3030     // calculated from the required bits read below as well as the maximum number of independent
3031     // and dependant sub streams you can have
3032     const uint32_t kEAC3SpecificBoxMaxSize = 152;
3033     if (!mDataSource->getUInt32(offset, &size) ||
3034         size < kEAC3SpecificBoxMinSize ||
3035         size > kEAC3SpecificBoxMaxSize) {
3036         ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3037         return ERROR_MALFORMED;
3038     }
3039 
3040     offset += 4;
3041     uint32_t type;
3042     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3043         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3044         return ERROR_MALFORMED;
3045     }
3046 
3047     offset += 4;
3048     uint8_t* chunk = new (std::nothrow) uint8_t[size];
3049     if (chunk == NULL) {
3050         return ERROR_MALFORMED;
3051     }
3052 
3053     if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3054         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3055         delete[] chunk;
3056         return ERROR_MALFORMED;
3057     }
3058 
3059     ABitReader br(chunk, size);
3060     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3061     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3062 
3063     if (br.numBitsLeft() < 16) {
3064         delete[] chunk;
3065         return ERROR_MALFORMED;
3066     }
3067     unsigned data_rate = br.getBits(13);
3068     ALOGV("EAC3 data rate = %d", data_rate);
3069 
3070     unsigned num_ind_sub = br.getBits(3) + 1;
3071     ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3072     if (br.numBitsLeft() < (num_ind_sub * 23)) {
3073         delete[] chunk;
3074         return ERROR_MALFORMED;
3075     }
3076 
3077     unsigned channelCount = 0;
3078     for (unsigned i = 0; i < num_ind_sub; i++) {
3079         unsigned fscod = br.getBits(2);
3080         if (fscod == 3) {
3081             ALOGE("Incorrect fscod (3) in EAC3 header");
3082             delete[] chunk;
3083             return ERROR_MALFORMED;
3084         }
3085         unsigned boxSampleRate = sampleRateTable[fscod];
3086         if (boxSampleRate != sampleRate) {
3087             ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3088                 boxSampleRate, sampleRate);
3089             delete[] chunk;
3090             return ERROR_MALFORMED;
3091         }
3092 
3093         unsigned bsid = br.getBits(5);
3094         if (bsid == 9 || bsid == 10) {
3095             ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3096         } else if (bsid > 16) {
3097             ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3098             delete[] chunk;
3099             return ERROR_MALFORMED;
3100         }
3101 
3102         // skip
3103         br.skipBits(2);
3104         unsigned bsmod = br.getBits(3);
3105         unsigned acmod = br.getBits(3);
3106         unsigned lfeon = br.getBits(1);
3107         // we currently only support the first stream
3108         if (i == 0)
3109             channelCount = channelCountTable[acmod] + lfeon;
3110         ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3111 
3112         br.skipBits(3);
3113         unsigned num_dep_sub = br.getBits(4);
3114         ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3115         if (num_dep_sub != 0) {
3116             if (br.numBitsLeft() < 9) {
3117                 delete[] chunk;
3118                 return ERROR_MALFORMED;
3119             }
3120             static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3121                 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3122             unsigned chan_loc = br.getBits(9);
3123             unsigned mask = 1;
3124             for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3125                 if ((chan_loc & mask) != 0) {
3126                     // we currently only support the first stream
3127                     if (i == 0) {
3128                         channelCount++;
3129                         // these are 2 channels in the mask
3130                         if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3131                             channelCount++;
3132                         }
3133                     }
3134                     ALOGV(" %s", chan_loc_tbl[j]);
3135                 }
3136             }
3137         } else {
3138             if (br.numBitsLeft() == 0) {
3139                 delete[] chunk;
3140                 return ERROR_MALFORMED;
3141             }
3142             br.skipBits(1);
3143         }
3144     }
3145 
3146     if (br.numBitsLeft() != 0) {
3147         if (br.numBitsLeft() < 8) {
3148             delete[] chunk;
3149             return ERROR_MALFORMED;
3150         }
3151         unsigned mask = br.getBits(8);
3152         for (unsigned i = 0; i < 8; i++) {
3153             if (((0x1 << i) && mask) == 0)
3154                 continue;
3155 
3156             if (br.numBitsLeft() < 8) {
3157                 delete[] chunk;
3158                 return ERROR_MALFORMED;
3159             }
3160             switch (i) {
3161                 case 0: {
3162                     unsigned complexity = br.getBits(8);
3163                     ALOGV("Found a JOC stream with complexity = %d", complexity);
3164                 }break;
3165                 default: {
3166                     br.skipBits(8);
3167                 }break;
3168             }
3169         }
3170     }
3171     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3172     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3173     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3174 
3175     delete[] chunk;
3176     return OK;
3177 }
3178 
parseAC3SpecificBox(off64_t offset)3179 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3180     if (mLastTrack == NULL) {
3181         return ERROR_MALFORMED;
3182     }
3183 
3184     uint16_t sampleRate, channels;
3185     status_t status;
3186     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3187         return status;
3188     }
3189     uint32_t size;
3190     // + 4-byte size
3191     // + 4-byte type
3192     // + 3-byte payload
3193     const uint32_t kAC3SpecificBoxSize = 11;
3194     if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3195         ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3196         return ERROR_MALFORMED;
3197     }
3198 
3199     offset += 4;
3200     uint32_t type;
3201     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3202         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3203         return ERROR_MALFORMED;
3204     }
3205 
3206     offset += 4;
3207     const uint32_t kAC3SpecificBoxPayloadSize = 3;
3208     uint8_t chunk[kAC3SpecificBoxPayloadSize];
3209     if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3210         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3211         return ERROR_MALFORMED;
3212     }
3213 
3214     ABitReader br(chunk, sizeof(chunk));
3215     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3216     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3217 
3218     unsigned fscod = br.getBits(2);
3219     if (fscod == 3) {
3220         ALOGE("Incorrect fscod (3) in AC3 header");
3221         return ERROR_MALFORMED;
3222     }
3223     unsigned boxSampleRate = sampleRateTable[fscod];
3224     if (boxSampleRate != sampleRate) {
3225         ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3226             boxSampleRate, sampleRate);
3227         return ERROR_MALFORMED;
3228     }
3229 
3230     unsigned bsid = br.getBits(5);
3231     if (bsid > 8) {
3232         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3233         return ERROR_MALFORMED;
3234     }
3235 
3236     // skip
3237     unsigned bsmod __unused = br.getBits(3);
3238 
3239     unsigned acmod = br.getBits(3);
3240     unsigned lfeon = br.getBits(1);
3241     unsigned channelCount = channelCountTable[acmod] + lfeon;
3242 
3243     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3244     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3245     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3246     return OK;
3247 }
3248 
parseALACSampleEntry(off64_t * offset)3249 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3250     // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3251     // Store ALAC magic cookie (decoder needs it).
3252     uint8_t alacInfo[12];
3253     off64_t data_offset = *offset;
3254 
3255     if (mDataSource->readAt(
3256             data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3257         return ERROR_IO;
3258     }
3259     uint32_t size = U32_AT(&alacInfo[0]);
3260     if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3261             (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3262             (U32_AT(&alacInfo[8]) != 0)) {
3263         ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3264             size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3265         return ERROR_MALFORMED;
3266     }
3267     data_offset += sizeof(alacInfo);
3268     uint8_t cookie[size - sizeof(alacInfo)];
3269     if (mDataSource->readAt(
3270             data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3271         return ERROR_IO;
3272     }
3273 
3274     uint8_t bitsPerSample = cookie[5];
3275     AMediaFormat_setInt32(mLastTrack->meta,
3276             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3277     AMediaFormat_setInt32(mLastTrack->meta,
3278             AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3279     AMediaFormat_setInt32(mLastTrack->meta,
3280             AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3281     AMediaFormat_setBuffer(mLastTrack->meta,
3282             AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3283     data_offset += sizeof(cookie);
3284     *offset = data_offset;
3285     return OK;
3286 }
3287 
parseSegmentIndex(off64_t offset,size_t size)3288 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3289   ALOGV("MPEG4Extractor::parseSegmentIndex");
3290 
3291     if (size < 12) {
3292       return -EINVAL;
3293     }
3294 
3295     uint32_t flags;
3296     if (!mDataSource->getUInt32(offset, &flags)) {
3297         return ERROR_MALFORMED;
3298     }
3299 
3300     uint32_t version = flags >> 24;
3301     flags &= 0xffffff;
3302 
3303     ALOGV("sidx version %d", version);
3304 
3305     uint32_t referenceId;
3306     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3307         return ERROR_MALFORMED;
3308     }
3309 
3310     uint32_t timeScale;
3311     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3312         return ERROR_MALFORMED;
3313     }
3314     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3315     if (timeScale == 0)
3316         return ERROR_MALFORMED;
3317 
3318     uint64_t earliestPresentationTime;
3319     uint64_t firstOffset;
3320 
3321     offset += 12;
3322     size -= 12;
3323 
3324     if (version == 0) {
3325         if (size < 8) {
3326             return -EINVAL;
3327         }
3328         uint32_t tmp;
3329         if (!mDataSource->getUInt32(offset, &tmp)) {
3330             return ERROR_MALFORMED;
3331         }
3332         earliestPresentationTime = tmp;
3333         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3334             return ERROR_MALFORMED;
3335         }
3336         firstOffset = tmp;
3337         offset += 8;
3338         size -= 8;
3339     } else {
3340         if (size < 16) {
3341             return -EINVAL;
3342         }
3343         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3344             return ERROR_MALFORMED;
3345         }
3346         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3347             return ERROR_MALFORMED;
3348         }
3349         offset += 16;
3350         size -= 16;
3351     }
3352     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3353 
3354     if (size < 4) {
3355         return -EINVAL;
3356     }
3357 
3358     uint16_t referenceCount;
3359     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3360         return ERROR_MALFORMED;
3361     }
3362     offset += 4;
3363     size -= 4;
3364     ALOGV("refcount: %d", referenceCount);
3365 
3366     if (size < referenceCount * 12) {
3367         return -EINVAL;
3368     }
3369 
3370     uint64_t total_duration = 0;
3371     for (unsigned int i = 0; i < referenceCount; i++) {
3372         uint32_t d1, d2, d3;
3373 
3374         if (!mDataSource->getUInt32(offset, &d1) ||     // size
3375             !mDataSource->getUInt32(offset + 4, &d2) || // duration
3376             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3377             return ERROR_MALFORMED;
3378         }
3379 
3380         if (d1 & 0x80000000) {
3381             ALOGW("sub-sidx boxes not supported yet");
3382         }
3383         bool sap = d3 & 0x80000000;
3384         uint32_t saptype = (d3 >> 28) & 7;
3385         if (!sap || (saptype != 1 && saptype != 2)) {
3386             // type 1 and 2 are sync samples
3387             ALOGW("not a stream access point, or unsupported type: %08x", d3);
3388         }
3389         total_duration += d2;
3390         offset += 12;
3391         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3392         SidxEntry se;
3393         se.mSize = d1 & 0x7fffffff;
3394         se.mDurationUs = 1000000LL * d2 / timeScale;
3395         mSidxEntries.add(se);
3396     }
3397 
3398     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3399 
3400     if (mLastTrack == NULL)
3401         return ERROR_MALFORMED;
3402 
3403     int64_t metaDuration;
3404     if (!AMediaFormat_getInt64(mLastTrack->meta,
3405                 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3406         AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3407     }
3408     return OK;
3409 }
3410 
parseQTMetaKey(off64_t offset,size_t size)3411 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3412     if (size < 8) {
3413         return ERROR_MALFORMED;
3414     }
3415 
3416     uint32_t count;
3417     if (!mDataSource->getUInt32(offset + 4, &count)) {
3418         return ERROR_MALFORMED;
3419     }
3420 
3421     if (mMetaKeyMap.size() > 0) {
3422         ALOGW("'keys' atom seen again, discarding existing entries");
3423         mMetaKeyMap.clear();
3424     }
3425 
3426     off64_t keyOffset = offset + 8;
3427     off64_t stopOffset = offset + size;
3428     for (size_t i = 1; i <= count; i++) {
3429         if (keyOffset + 8 > stopOffset) {
3430             return ERROR_MALFORMED;
3431         }
3432 
3433         uint32_t keySize;
3434         if (!mDataSource->getUInt32(keyOffset, &keySize)
3435                 || keySize < 8
3436                 || keyOffset + keySize > stopOffset) {
3437             return ERROR_MALFORMED;
3438         }
3439 
3440         uint32_t type;
3441         if (!mDataSource->getUInt32(keyOffset + 4, &type)
3442                 || type != FOURCC("mdta")) {
3443             return ERROR_MALFORMED;
3444         }
3445 
3446         keySize -= 8;
3447         keyOffset += 8;
3448 
3449         auto keyData = heapbuffer<uint8_t>(keySize);
3450         if (keyData.get() == NULL) {
3451             return ERROR_MALFORMED;
3452         }
3453         if (mDataSource->readAt(
3454                 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3455             return ERROR_MALFORMED;
3456         }
3457 
3458         AString key((const char *)keyData.get(), keySize);
3459         mMetaKeyMap.add(i, key);
3460 
3461         keyOffset += keySize;
3462     }
3463     return OK;
3464 }
3465 
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3466 status_t MPEG4Extractor::parseQTMetaVal(
3467         int32_t keyId, off64_t offset, size_t size) {
3468     ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3469     if (index < 0) {
3470         // corresponding key is not present, ignore
3471         return ERROR_MALFORMED;
3472     }
3473 
3474     if (size <= 16) {
3475         return ERROR_MALFORMED;
3476     }
3477     uint32_t dataSize;
3478     if (!mDataSource->getUInt32(offset, &dataSize)
3479             || dataSize > size || dataSize <= 16) {
3480         return ERROR_MALFORMED;
3481     }
3482     uint32_t atomFourCC;
3483     if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3484             || atomFourCC != FOURCC("data")) {
3485         return ERROR_MALFORMED;
3486     }
3487     uint32_t dataType;
3488     if (!mDataSource->getUInt32(offset + 8, &dataType)
3489             || ((dataType & 0xff000000) != 0)) {
3490         // not well-known type
3491         return ERROR_MALFORMED;
3492     }
3493 
3494     dataSize -= 16;
3495     offset += 16;
3496 
3497     if (dataType == 23 && dataSize >= 4) {
3498         // BE Float32
3499         uint32_t val;
3500         if (!mDataSource->getUInt32(offset, &val)) {
3501             return ERROR_MALFORMED;
3502         }
3503         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3504             AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3505         }
3506     } else if (dataType == 67 && dataSize >= 4) {
3507         // BE signed int32
3508         uint32_t val;
3509         if (!mDataSource->getUInt32(offset, &val)) {
3510             return ERROR_MALFORMED;
3511         }
3512         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3513             AMediaFormat_setInt32(mFileMetaData,
3514                     AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3515         }
3516     } else {
3517         // add more keys if needed
3518         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3519     }
3520 
3521     return OK;
3522 }
3523 
parseTrackHeader(off64_t data_offset,off64_t data_size)3524 status_t MPEG4Extractor::parseTrackHeader(
3525         off64_t data_offset, off64_t data_size) {
3526     if (data_size < 4) {
3527         return ERROR_MALFORMED;
3528     }
3529 
3530     uint8_t version;
3531     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3532         return ERROR_IO;
3533     }
3534 
3535     size_t dynSize = (version == 1) ? 36 : 24;
3536 
3537     uint8_t buffer[36 + 60];
3538 
3539     if (data_size != (off64_t)dynSize + 60) {
3540         return ERROR_MALFORMED;
3541     }
3542 
3543     if (mDataSource->readAt(
3544                 data_offset, buffer, data_size) < (ssize_t)data_size) {
3545         return ERROR_IO;
3546     }
3547 
3548     uint64_t ctime __unused, mtime __unused, duration __unused;
3549     int32_t id;
3550 
3551     if (version == 1) {
3552         ctime = U64_AT(&buffer[4]);
3553         mtime = U64_AT(&buffer[12]);
3554         id = U32_AT(&buffer[20]);
3555         duration = U64_AT(&buffer[28]);
3556     } else if (version == 0) {
3557         ctime = U32_AT(&buffer[4]);
3558         mtime = U32_AT(&buffer[8]);
3559         id = U32_AT(&buffer[12]);
3560         duration = U32_AT(&buffer[20]);
3561     } else {
3562         return ERROR_UNSUPPORTED;
3563     }
3564 
3565     if (mLastTrack == NULL)
3566         return ERROR_MALFORMED;
3567 
3568     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3569 
3570     size_t matrixOffset = dynSize + 16;
3571     int32_t a00 = U32_AT(&buffer[matrixOffset]);
3572     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3573     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3574     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3575 
3576 #if 0
3577     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3578     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3579 
3580     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3581          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3582     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3583          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3584 #endif
3585 
3586     uint32_t rotationDegrees;
3587 
3588     static const int32_t kFixedOne = 0x10000;
3589     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3590         // Identity, no rotation
3591         rotationDegrees = 0;
3592     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3593         rotationDegrees = 90;
3594     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3595         rotationDegrees = 270;
3596     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3597         rotationDegrees = 180;
3598     } else {
3599         ALOGW("We only support 0,90,180,270 degree rotation matrices");
3600         rotationDegrees = 0;
3601     }
3602 
3603     if (rotationDegrees != 0) {
3604         AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3605     }
3606 
3607     // Handle presentation display size, which could be different
3608     // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3609     uint32_t width = U32_AT(&buffer[dynSize + 52]);
3610     uint32_t height = U32_AT(&buffer[dynSize + 56]);
3611     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3612     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3613 
3614     return OK;
3615 }
3616 
parseITunesMetaData(off64_t offset,size_t size)3617 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3618     if (size == 0) {
3619         return OK;
3620     }
3621 
3622     if (size < 4 || size == SIZE_MAX) {
3623         return ERROR_MALFORMED;
3624     }
3625 
3626     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3627     if (buffer == NULL) {
3628         return ERROR_MALFORMED;
3629     }
3630     if (mDataSource->readAt(
3631                 offset, buffer, size) != (ssize_t)size) {
3632         delete[] buffer;
3633         buffer = NULL;
3634 
3635         return ERROR_IO;
3636     }
3637 
3638     uint32_t flags = U32_AT(buffer);
3639 
3640     const char *metadataKey = nullptr;
3641     char chunk[5];
3642     MakeFourCCString(mPath[4], chunk);
3643     ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3644     switch ((int32_t)mPath[4]) {
3645         case FOURCC("\251alb"):
3646         {
3647             metadataKey = "album";
3648             break;
3649         }
3650         case FOURCC("\251ART"):
3651         {
3652             metadataKey = "artist";
3653             break;
3654         }
3655         case FOURCC("aART"):
3656         {
3657             metadataKey = "albumartist";
3658             break;
3659         }
3660         case FOURCC("\251day"):
3661         {
3662             metadataKey = "year";
3663             break;
3664         }
3665         case FOURCC("\251nam"):
3666         {
3667             metadataKey = "title";
3668             break;
3669         }
3670         case FOURCC("\251wrt"):
3671         {
3672             metadataKey = "writer";
3673             break;
3674         }
3675         case FOURCC("covr"):
3676         {
3677             metadataKey = "albumart";
3678             break;
3679         }
3680         case FOURCC("gnre"):
3681         case FOURCC("\251gen"):
3682         {
3683             metadataKey = "genre";
3684             break;
3685         }
3686         case FOURCC("cpil"):
3687         {
3688             if (size == 9 && flags == 21) {
3689                 char tmp[16];
3690                 sprintf(tmp, "%d",
3691                         (int)buffer[size - 1]);
3692 
3693                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
3694             }
3695             break;
3696         }
3697         case FOURCC("trkn"):
3698         {
3699             if (size == 16 && flags == 0) {
3700                 char tmp[16];
3701                 uint16_t* pTrack = (uint16_t*)&buffer[10];
3702                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3703                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3704 
3705                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3706             }
3707             break;
3708         }
3709         case FOURCC("disk"):
3710         {
3711             if ((size == 14 || size == 16) && flags == 0) {
3712                 char tmp[16];
3713                 uint16_t* pDisc = (uint16_t*)&buffer[10];
3714                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3715                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3716 
3717                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
3718             }
3719             break;
3720         }
3721         case FOURCC("----"):
3722         {
3723             buffer[size] = '\0';
3724             switch (mPath[5]) {
3725                 case FOURCC("mean"):
3726                     mLastCommentMean.setTo((const char *)buffer + 4);
3727                     break;
3728                 case FOURCC("name"):
3729                     mLastCommentName.setTo((const char *)buffer + 4);
3730                     break;
3731                 case FOURCC("data"):
3732                     if (size < 8) {
3733                         delete[] buffer;
3734                         buffer = NULL;
3735                         ALOGE("b/24346430");
3736                         return ERROR_MALFORMED;
3737                     }
3738                     mLastCommentData.setTo((const char *)buffer + 8);
3739                     break;
3740             }
3741 
3742             // Once we have a set of mean/name/data info, go ahead and process
3743             // it to see if its something we are interested in.  Whether or not
3744             // were are interested in the specific tag, make sure to clear out
3745             // the set so we can be ready to process another tuple should one
3746             // show up later in the file.
3747             if ((mLastCommentMean.length() != 0) &&
3748                 (mLastCommentName.length() != 0) &&
3749                 (mLastCommentData.length() != 0)) {
3750 
3751                 if (mLastCommentMean == "com.apple.iTunes"
3752                         && mLastCommentName == "iTunSMPB") {
3753                     int32_t delay, padding;
3754                     if (sscanf(mLastCommentData,
3755                                " %*x %x %x %*x", &delay, &padding) == 2) {
3756                         if (mLastTrack == NULL) {
3757                             delete[] buffer;
3758                             return ERROR_MALFORMED;
3759                         }
3760 
3761                         AMediaFormat_setInt32(mLastTrack->meta,
3762                                 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
3763                         AMediaFormat_setInt32(mLastTrack->meta,
3764                                 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
3765                     }
3766                 }
3767 
3768                 mLastCommentMean.clear();
3769                 mLastCommentName.clear();
3770                 mLastCommentData.clear();
3771             }
3772             break;
3773         }
3774 
3775         default:
3776             break;
3777     }
3778 
3779     void *tmpData;
3780     size_t tmpDataSize;
3781     const char *s;
3782     if (size >= 8 && metadataKey &&
3783             !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
3784             !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
3785         if (!strcmp(metadataKey, "albumart")) {
3786             AMediaFormat_setBuffer(mFileMetaData, metadataKey,
3787                     buffer + 8, size - 8);
3788         } else if (!strcmp(metadataKey, "genre")) {
3789             if (flags == 0) {
3790                 // uint8_t genre code, iTunes genre codes are
3791                 // the standard id3 codes, except they start
3792                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3793                 // We use standard id3 numbering, so subtract 1.
3794                 int genrecode = (int)buffer[size - 1];
3795                 genrecode--;
3796                 if (genrecode < 0) {
3797                     genrecode = 255; // reserved for 'unknown genre'
3798                 }
3799                 char genre[10];
3800                 sprintf(genre, "%d", genrecode);
3801 
3802                 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
3803             } else if (flags == 1) {
3804                 // custom genre string
3805                 buffer[size] = '\0';
3806 
3807                 AMediaFormat_setString(mFileMetaData,
3808                         metadataKey, (const char *)buffer + 8);
3809             }
3810         } else {
3811             buffer[size] = '\0';
3812 
3813             AMediaFormat_setString(mFileMetaData,
3814                     metadataKey, (const char *)buffer + 8);
3815         }
3816     }
3817 
3818     delete[] buffer;
3819     buffer = NULL;
3820 
3821     return OK;
3822 }
3823 
parseColorInfo(off64_t offset,size_t size)3824 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3825     if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3826         return ERROR_MALFORMED;
3827     }
3828 
3829     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3830     if (buffer == NULL) {
3831         return ERROR_MALFORMED;
3832     }
3833     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3834         delete[] buffer;
3835         buffer = NULL;
3836 
3837         return ERROR_IO;
3838     }
3839 
3840     int32_t type = U32_AT(&buffer[0]);
3841     if ((type == FOURCC("nclx") && size >= 11)
3842             || (type == FOURCC("nclc") && size >= 10)) {
3843         // only store the first color specification
3844         int32_t existingColor;
3845         if (!AMediaFormat_getInt32(mLastTrack->meta,
3846                 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
3847             int32_t primaries = U16_AT(&buffer[4]);
3848             int32_t isotransfer = U16_AT(&buffer[6]);
3849             int32_t coeffs = U16_AT(&buffer[8]);
3850             bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
3851 
3852             int32_t range = 0;
3853             int32_t standard = 0;
3854             int32_t transfer = 0;
3855             ColorUtils::convertIsoColorAspectsToPlatformAspects(
3856                     primaries, isotransfer, coeffs, fullRange,
3857                     &range, &standard, &transfer);
3858 
3859             if (range != 0) {
3860                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
3861             }
3862             if (standard != 0) {
3863                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
3864             }
3865             if (transfer != 0) {
3866                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
3867             }
3868         }
3869     }
3870 
3871     delete[] buffer;
3872     buffer = NULL;
3873 
3874     return OK;
3875 }
3876 
parse3GPPMetaData(off64_t offset,size_t size,int depth)3877 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3878     if (size < 4 || size == SIZE_MAX) {
3879         return ERROR_MALFORMED;
3880     }
3881 
3882     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3883     if (buffer == NULL) {
3884         return ERROR_MALFORMED;
3885     }
3886     if (mDataSource->readAt(
3887                 offset, buffer, size) != (ssize_t)size) {
3888         delete[] buffer;
3889         buffer = NULL;
3890 
3891         return ERROR_IO;
3892     }
3893 
3894     const char *metadataKey = nullptr;
3895     switch (mPath[depth]) {
3896         case FOURCC("titl"):
3897         {
3898             metadataKey = "title";
3899             break;
3900         }
3901         case FOURCC("perf"):
3902         {
3903             metadataKey = "artist";
3904             break;
3905         }
3906         case FOURCC("auth"):
3907         {
3908             metadataKey = "writer";
3909             break;
3910         }
3911         case FOURCC("gnre"):
3912         {
3913             metadataKey = "genre";
3914             break;
3915         }
3916         case FOURCC("albm"):
3917         {
3918             if (buffer[size - 1] != '\0') {
3919               char tmp[4];
3920               sprintf(tmp, "%u", buffer[size - 1]);
3921 
3922               AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3923             }
3924 
3925             metadataKey = "album";
3926             break;
3927         }
3928         case FOURCC("yrrc"):
3929         {
3930             if (size < 6) {
3931                 delete[] buffer;
3932                 buffer = NULL;
3933                 ALOGE("b/62133227");
3934                 android_errorWriteLog(0x534e4554, "62133227");
3935                 return ERROR_MALFORMED;
3936             }
3937             char tmp[5];
3938             uint16_t year = U16_AT(&buffer[4]);
3939 
3940             if (year < 10000) {
3941                 sprintf(tmp, "%u", year);
3942 
3943                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
3944             }
3945             break;
3946         }
3947 
3948         default:
3949             break;
3950     }
3951 
3952     if (metadataKey) {
3953         bool isUTF8 = true; // Common case
3954         char16_t *framedata = NULL;
3955         int len16 = 0; // Number of UTF-16 characters
3956 
3957         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3958         if (size < 6) {
3959             delete[] buffer;
3960             buffer = NULL;
3961             return ERROR_MALFORMED;
3962         }
3963 
3964         if (size - 6 >= 4) {
3965             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3966             framedata = (char16_t *)(buffer + 6);
3967             if (0xfffe == *framedata) {
3968                 // endianness marker (BOM) doesn't match host endianness
3969                 for (int i = 0; i < len16; i++) {
3970                     framedata[i] = bswap_16(framedata[i]);
3971                 }
3972                 // BOM is now swapped to 0xfeff, we will execute next block too
3973             }
3974 
3975             if (0xfeff == *framedata) {
3976                 // Remove the BOM
3977                 framedata++;
3978                 len16--;
3979                 isUTF8 = false;
3980             }
3981             // else normal non-zero-length UTF-8 string
3982             // we can't handle UTF-16 without BOM as there is no other
3983             // indication of encoding.
3984         }
3985 
3986         if (isUTF8) {
3987             buffer[size] = 0;
3988             AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
3989         } else {
3990             // Convert from UTF-16 string to UTF-8 string.
3991             String8 tmpUTF8str(framedata, len16);
3992             AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
3993         }
3994     }
3995 
3996     delete[] buffer;
3997     buffer = NULL;
3998 
3999     return OK;
4000 }
4001 
parseID3v2MetaData(off64_t offset)4002 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
4003     ID3 id3(mDataSource, true /* ignorev1 */, offset);
4004 
4005     if (id3.isValid()) {
4006         struct Map {
4007             const char *key;
4008             const char *tag1;
4009             const char *tag2;
4010         };
4011         static const Map kMap[] = {
4012             { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4013             { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4014             { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4015             { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4016             { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4017             { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4018             { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4019             { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4020             { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4021             { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4022             { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4023         };
4024         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4025 
4026         for (size_t i = 0; i < kNumMapEntries; ++i) {
4027             const char *ss;
4028             if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4029                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4030                 if (it->done()) {
4031                     delete it;
4032                     it = new ID3::Iterator(id3, kMap[i].tag2);
4033                 }
4034 
4035                 if (it->done()) {
4036                     delete it;
4037                     continue;
4038                 }
4039 
4040                 String8 s;
4041                 it->getString(&s);
4042                 delete it;
4043 
4044                 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4045             }
4046         }
4047 
4048         size_t dataSize;
4049         String8 mime;
4050         const void *data = id3.getAlbumArt(&dataSize, &mime);
4051 
4052         if (data) {
4053             AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4054         }
4055     }
4056 }
4057 
getTrack(size_t index)4058 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4059     status_t err;
4060     if ((err = readMetaData()) != OK) {
4061         return NULL;
4062     }
4063 
4064     Track *track = mFirstTrack;
4065     while (index > 0) {
4066         if (track == NULL) {
4067             return NULL;
4068         }
4069 
4070         track = track->next;
4071         --index;
4072     }
4073 
4074     if (track == NULL) {
4075         return NULL;
4076     }
4077 
4078 
4079     Trex *trex = NULL;
4080     int32_t trackId;
4081     if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4082         for (size_t i = 0; i < mTrex.size(); i++) {
4083             Trex *t = &mTrex.editItemAt(i);
4084             if (t->track_ID == (uint32_t) trackId) {
4085                 trex = t;
4086                 break;
4087             }
4088         }
4089     } else {
4090         ALOGE("b/21657957");
4091         return NULL;
4092     }
4093 
4094     ALOGV("getTrack called, pssh: %zu", mPssh.size());
4095 
4096     const char *mime;
4097     if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4098         return NULL;
4099     }
4100 
4101     sp<ItemTable> itemTable;
4102     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4103         void *data;
4104         size_t size;
4105         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4106             return NULL;
4107         }
4108 
4109         const uint8_t *ptr = (const uint8_t *)data;
4110 
4111         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
4112             return NULL;
4113         }
4114     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4115             || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4116         void *data;
4117         size_t size;
4118         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4119             return NULL;
4120         }
4121 
4122         const uint8_t *ptr = (const uint8_t *)data;
4123 
4124         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
4125             return NULL;
4126         }
4127         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4128             itemTable = mItemTable;
4129         }
4130     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4131         void *data;
4132         size_t size;
4133         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4134             return NULL;
4135         }
4136 
4137         const uint8_t *ptr = (const uint8_t *)data;
4138 
4139         if (size < 5 || ptr[0] != 0x81) {  // configurationVersion == 1
4140             return NULL;
4141         }
4142     }
4143 
4144     if (track->has_elst and !strncasecmp("video/", mime, 6) and track->elst_media_time > 0) {
4145         track->elstShiftStartTicks = track->elst_media_time;
4146         ALOGV("video track->elstShiftStartTicks :%" PRIu64, track->elstShiftStartTicks);
4147     }
4148 
4149     MPEG4Source *source =  new MPEG4Source(
4150             track->meta, mDataSource, track->timescale, track->sampleTable,
4151             mSidxEntries, trex, mMoofOffset, itemTable,
4152             track->elstShiftStartTicks);
4153     if (source->init() != OK) {
4154         delete source;
4155         return NULL;
4156     }
4157     return source;
4158 }
4159 
4160 // static
verifyTrack(Track * track)4161 status_t MPEG4Extractor::verifyTrack(Track *track) {
4162     const char *mime;
4163     CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4164 
4165     void *data;
4166     size_t size;
4167     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4168         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4169             return ERROR_MALFORMED;
4170         }
4171     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4172         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4173             return ERROR_MALFORMED;
4174         }
4175     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4176         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4177             return ERROR_MALFORMED;
4178         }
4179     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4180             || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4181             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4182         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4183             return ERROR_MALFORMED;
4184         }
4185     }
4186 
4187     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4188         // Make sure we have all the metadata we need.
4189         ALOGE("stbl atom missing/invalid.");
4190         return ERROR_MALFORMED;
4191     }
4192 
4193     if (track->timescale == 0) {
4194         ALOGE("timescale invalid.");
4195         return ERROR_MALFORMED;
4196     }
4197 
4198     return OK;
4199 }
4200 
4201 typedef enum {
4202     //AOT_NONE             = -1,
4203     //AOT_NULL_OBJECT      = 0,
4204     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
4205     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
4206     //AOT_AAC_SSR          = 3,
4207     //AOT_AAC_LTP          = 4,
4208     AOT_SBR              = 5,
4209     //AOT_AAC_SCAL         = 6,
4210     //AOT_TWIN_VQ          = 7,
4211     //AOT_CELP             = 8,
4212     //AOT_HVXC             = 9,
4213     //AOT_RSVD_10          = 10, /**< (reserved)                                */
4214     //AOT_RSVD_11          = 11, /**< (reserved)                                */
4215     //AOT_TTSI             = 12, /**< TTSI Object                               */
4216     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
4217     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
4218     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
4219     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4220     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
4221     //AOT_RSVD_18          = 18, /**< (reserved)                                */
4222     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
4223     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
4224     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
4225     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
4226     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
4227     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
4228     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
4229     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
4230     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
4231     //AOT_RSVD_28          = 28, /**< might become SSC                          */
4232     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
4233     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
4234 
4235     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
4236 
4237     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
4238     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
4239     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
4240     //AOT_RSVD_35          = 35, /**< might become DST                          */
4241     //AOT_RSVD_36          = 36, /**< might become ALS                          */
4242     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
4243     //AOT_SLS              = 38, /**< SLS                                       */
4244     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
4245 
4246     //AOT_USAC             = 42, /**< USAC                                      */
4247     //AOT_SAOC             = 43, /**< SAOC                                      */
4248     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
4249 
4250     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
4251 } AUDIO_OBJECT_TYPE;
4252 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4253 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4254         const void *esds_data, size_t esds_size) {
4255     ESDS esds(esds_data, esds_size);
4256 
4257     uint8_t objectTypeIndication;
4258     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4259         return ERROR_MALFORMED;
4260     }
4261 
4262     if (objectTypeIndication == 0xe1) {
4263         // This isn't MPEG4 audio at all, it's QCELP 14k...
4264         if (mLastTrack == NULL)
4265             return ERROR_MALFORMED;
4266 
4267         AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4268         return OK;
4269     }
4270 
4271     if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4272         // mp3 audio
4273         AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4274         return OK;
4275     }
4276 
4277     if (mLastTrack != NULL) {
4278         uint32_t maxBitrate = 0;
4279         uint32_t avgBitrate = 0;
4280         esds.getBitRate(&maxBitrate, &avgBitrate);
4281         if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4282             AMediaFormat_setInt32(mLastTrack->meta,
4283                     AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4284         }
4285         if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4286             AMediaFormat_setInt32(mLastTrack->meta,
4287                     AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4288         }
4289     }
4290 
4291     const uint8_t *csd;
4292     size_t csd_size;
4293     if (esds.getCodecSpecificInfo(
4294                 (const void **)&csd, &csd_size) != OK) {
4295         return ERROR_MALFORMED;
4296     }
4297 
4298     if (kUseHexDump) {
4299         printf("ESD of size %zu\n", csd_size);
4300         hexdump(csd, csd_size);
4301     }
4302 
4303     if (csd_size == 0) {
4304         // There's no further information, i.e. no codec specific data
4305         // Let's assume that the information provided in the mpeg4 headers
4306         // is accurate and hope for the best.
4307 
4308         return OK;
4309     }
4310 
4311     if (csd_size < 2) {
4312         return ERROR_MALFORMED;
4313     }
4314 
4315     if (objectTypeIndication == 0xdd) {
4316         // vorbis audio
4317         if (csd[0] != 0x02) {
4318             return ERROR_MALFORMED;
4319         }
4320 
4321         // codecInfo starts with two lengths, len1 and len2, that are
4322         // "Xiph-style-lacing encoded"..
4323 
4324         size_t offset = 1;
4325         size_t len1 = 0;
4326         while (offset < csd_size && csd[offset] == 0xff) {
4327             if (__builtin_add_overflow(len1, 0xff, &len1)) {
4328                 return ERROR_MALFORMED;
4329             }
4330             ++offset;
4331         }
4332         if (offset >= csd_size) {
4333             return ERROR_MALFORMED;
4334         }
4335         if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4336             return ERROR_MALFORMED;
4337         }
4338         ++offset;
4339         if (len1 == 0) {
4340             return ERROR_MALFORMED;
4341         }
4342 
4343         size_t len2 = 0;
4344         while (offset < csd_size && csd[offset] == 0xff) {
4345             if (__builtin_add_overflow(len2, 0xff, &len2)) {
4346                 return ERROR_MALFORMED;
4347             }
4348             ++offset;
4349         }
4350         if (offset >= csd_size) {
4351             return ERROR_MALFORMED;
4352         }
4353         if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4354             return ERROR_MALFORMED;
4355         }
4356         ++offset;
4357         if (len2 == 0) {
4358             return ERROR_MALFORMED;
4359         }
4360         if (offset >= csd_size || csd[offset] != 0x01) {
4361             return ERROR_MALFORMED;
4362         }
4363         // formerly kKeyVorbisInfo
4364         AMediaFormat_setBuffer(mLastTrack->meta,
4365                 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4366 
4367         if (__builtin_add_overflow(offset, len1, &offset) ||
4368                 offset >= csd_size || csd[offset] != 0x03) {
4369             return ERROR_MALFORMED;
4370         }
4371 
4372         if (__builtin_add_overflow(offset, len2, &offset) ||
4373                 offset >= csd_size || csd[offset] != 0x05) {
4374             return ERROR_MALFORMED;
4375         }
4376 
4377         // formerly kKeyVorbisBooks
4378         AMediaFormat_setBuffer(mLastTrack->meta,
4379                 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4380         AMediaFormat_setString(mLastTrack->meta,
4381                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4382 
4383         return OK;
4384     }
4385 
4386     static uint32_t kSamplingRate[] = {
4387         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4388         16000, 12000, 11025, 8000, 7350
4389     };
4390 
4391     ABitReader br(csd, csd_size);
4392     uint32_t objectType = br.getBits(5);
4393 
4394     if (objectType == 31) {  // AAC-ELD => additional 6 bits
4395         objectType = 32 + br.getBits(6);
4396     }
4397 
4398     if (mLastTrack == NULL)
4399         return ERROR_MALFORMED;
4400 
4401     //keep AOT type
4402     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4403 
4404     uint32_t freqIndex = br.getBits(4);
4405 
4406     int32_t sampleRate = 0;
4407     int32_t numChannels = 0;
4408     if (freqIndex == 15) {
4409         if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4410         sampleRate = br.getBits(24);
4411         numChannels = br.getBits(4);
4412     } else {
4413         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4414         numChannels = br.getBits(4);
4415 
4416         if (freqIndex == 13 || freqIndex == 14) {
4417             return ERROR_MALFORMED;
4418         }
4419 
4420         sampleRate = kSamplingRate[freqIndex];
4421     }
4422 
4423     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4424         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4425         uint32_t extFreqIndex = br.getBits(4);
4426         int32_t extSampleRate __unused;
4427         if (extFreqIndex == 15) {
4428             if (csd_size < 8) {
4429                 return ERROR_MALFORMED;
4430             }
4431             if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4432             extSampleRate = br.getBits(24);
4433         } else {
4434             if (extFreqIndex == 13 || extFreqIndex == 14) {
4435                 return ERROR_MALFORMED;
4436             }
4437             extSampleRate = kSamplingRate[extFreqIndex];
4438         }
4439         //TODO: save the extension sampling rate value in meta data =>
4440         //      AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4441     }
4442 
4443     switch (numChannels) {
4444         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4445         case 0:
4446         case 1:// FC
4447         case 2:// FL FR
4448         case 3:// FC, FL FR
4449         case 4:// FC, FL FR, RC
4450         case 5:// FC, FL FR, SL SR
4451         case 6:// FC, FL FR, SL SR, LFE
4452             //numChannels already contains the right value
4453             break;
4454         case 11:// FC, FL FR, SL SR, RC, LFE
4455             numChannels = 7;
4456             break;
4457         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4458         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
4459         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
4460             numChannels = 8;
4461             break;
4462         default:
4463             return ERROR_UNSUPPORTED;
4464     }
4465 
4466     {
4467         if (objectType == AOT_SBR || objectType == AOT_PS) {
4468             if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4469             objectType = br.getBits(5);
4470 
4471             if (objectType == AOT_ESCAPE) {
4472                 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4473                 objectType = 32 + br.getBits(6);
4474             }
4475         }
4476         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4477                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4478                 objectType == AOT_ER_BSAC) {
4479             if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4480             const int32_t frameLengthFlag __unused = br.getBits(1);
4481 
4482             const int32_t dependsOnCoreCoder = br.getBits(1);
4483 
4484             if (dependsOnCoreCoder ) {
4485                 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4486                 const int32_t coreCoderDelay __unused = br.getBits(14);
4487             }
4488 
4489             int32_t extensionFlag = -1;
4490             if (br.numBitsLeft() > 0) {
4491                 extensionFlag = br.getBits(1);
4492             } else {
4493                 switch (objectType) {
4494                 // 14496-3 4.5.1.1 extensionFlag
4495                 case AOT_AAC_LC:
4496                     extensionFlag = 0;
4497                     break;
4498                 case AOT_ER_AAC_LC:
4499                 case AOT_ER_AAC_SCAL:
4500                 case AOT_ER_BSAC:
4501                 case AOT_ER_AAC_LD:
4502                     extensionFlag = 1;
4503                     break;
4504                 default:
4505                     return ERROR_MALFORMED;
4506                     break;
4507                 }
4508                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4509                         extensionFlag, objectType);
4510             }
4511 
4512             if (numChannels == 0) {
4513                 int32_t channelsEffectiveNum = 0;
4514                 int32_t channelsNum = 0;
4515                 if (br.numBitsLeft() < 32) {
4516                     return ERROR_MALFORMED;
4517                 }
4518                 const int32_t ElementInstanceTag __unused = br.getBits(4);
4519                 const int32_t Profile __unused = br.getBits(2);
4520                 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
4521                 const int32_t NumFrontChannelElements = br.getBits(4);
4522                 const int32_t NumSideChannelElements = br.getBits(4);
4523                 const int32_t NumBackChannelElements = br.getBits(4);
4524                 const int32_t NumLfeChannelElements = br.getBits(2);
4525                 const int32_t NumAssocDataElements __unused = br.getBits(3);
4526                 const int32_t NumValidCcElements __unused = br.getBits(4);
4527 
4528                 const int32_t MonoMixdownPresent = br.getBits(1);
4529 
4530                 if (MonoMixdownPresent != 0) {
4531                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4532                     const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
4533                 }
4534 
4535                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4536                 const int32_t StereoMixdownPresent = br.getBits(1);
4537                 if (StereoMixdownPresent != 0) {
4538                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4539                     const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
4540                 }
4541 
4542                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4543                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4544                 if (MatrixMixdownIndexPresent != 0) {
4545                     if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4546                     const int32_t MatrixMixdownIndex __unused = br.getBits(2);
4547                     const int32_t PseudoSurroundEnable __unused = br.getBits(1);
4548                 }
4549 
4550                 int i;
4551                 for (i=0; i < NumFrontChannelElements; i++) {
4552                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4553                     const int32_t FrontElementIsCpe = br.getBits(1);
4554                     const int32_t FrontElementTagSelect __unused = br.getBits(4);
4555                     channelsNum += FrontElementIsCpe ? 2 : 1;
4556                 }
4557 
4558                 for (i=0; i < NumSideChannelElements; i++) {
4559                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4560                     const int32_t SideElementIsCpe = br.getBits(1);
4561                     const int32_t SideElementTagSelect __unused = br.getBits(4);
4562                     channelsNum += SideElementIsCpe ? 2 : 1;
4563                 }
4564 
4565                 for (i=0; i < NumBackChannelElements; i++) {
4566                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4567                     const int32_t BackElementIsCpe = br.getBits(1);
4568                     const int32_t BackElementTagSelect __unused = br.getBits(4);
4569                     channelsNum += BackElementIsCpe ? 2 : 1;
4570                 }
4571                 channelsEffectiveNum = channelsNum;
4572 
4573                 for (i=0; i < NumLfeChannelElements; i++) {
4574                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4575                     const int32_t LfeElementTagSelect __unused = br.getBits(4);
4576                     channelsNum += 1;
4577                 }
4578                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4579                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4580                 numChannels = channelsNum;
4581             }
4582         }
4583     }
4584 
4585     if (numChannels == 0) {
4586         return ERROR_UNSUPPORTED;
4587     }
4588 
4589     if (mLastTrack == NULL)
4590         return ERROR_MALFORMED;
4591 
4592     int32_t prevSampleRate;
4593     CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
4594 
4595     if (prevSampleRate != sampleRate) {
4596         ALOGV("mpeg4 audio sample rate different from previous setting. "
4597              "was: %d, now: %d", prevSampleRate, sampleRate);
4598     }
4599 
4600     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
4601 
4602     int32_t prevChannelCount;
4603     CHECK(AMediaFormat_getInt32(mLastTrack->meta,
4604             AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
4605 
4606     if (prevChannelCount != numChannels) {
4607         ALOGV("mpeg4 audio channel count different from previous setting. "
4608              "was: %d, now: %d", prevChannelCount, numChannels);
4609     }
4610 
4611     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
4612 
4613     return OK;
4614 }
4615 
adjustRawDefaultFrameSize()4616 void MPEG4Extractor::adjustRawDefaultFrameSize() {
4617     int32_t chanCount = 0;
4618     int32_t bitWidth = 0;
4619     const char *mimeStr = NULL;
4620 
4621     if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
4622         !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
4623         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
4624         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
4625         // samplesize in stsz may not right , so updade default samplesize
4626         mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
4627     }
4628 }
4629 
4630 ////////////////////////////////////////////////////////////////////////////////
4631 
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks)4632 MPEG4Source::MPEG4Source(
4633         AMediaFormat *format,
4634         DataSourceHelper *dataSource,
4635         int32_t timeScale,
4636         const sp<SampleTable> &sampleTable,
4637         Vector<SidxEntry> &sidx,
4638         const Trex *trex,
4639         off64_t firstMoofOffset,
4640         const sp<ItemTable> &itemTable,
4641         uint64_t elstShiftStartTicks)
4642     : mFormat(format),
4643       mDataSource(dataSource),
4644       mTimescale(timeScale),
4645       mSampleTable(sampleTable),
4646       mCurrentSampleIndex(0),
4647       mCurrentFragmentIndex(0),
4648       mSegments(sidx),
4649       mTrex(trex),
4650       mFirstMoofOffset(firstMoofOffset),
4651       mCurrentMoofOffset(firstMoofOffset),
4652       mNextMoofOffset(-1),
4653       mCurrentTime(0),
4654       mDefaultEncryptedByteBlock(0),
4655       mDefaultSkipByteBlock(0),
4656       mCurrentSampleInfoAllocSize(0),
4657       mCurrentSampleInfoSizes(NULL),
4658       mCurrentSampleInfoOffsetsAllocSize(0),
4659       mCurrentSampleInfoOffsets(NULL),
4660       mIsAVC(false),
4661       mIsHEVC(false),
4662       mIsAC4(false),
4663       mIsPcm(false),
4664       mNALLengthSize(0),
4665       mStarted(false),
4666       mBuffer(NULL),
4667       mSrcBuffer(NULL),
4668       mIsHeif(itemTable != NULL),
4669       mItemTable(itemTable),
4670       mElstShiftStartTicks(elstShiftStartTicks) {
4671 
4672     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
4673 
4674     AMediaFormat_getInt32(mFormat,
4675             AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
4676     mDefaultIVSize = 0;
4677     AMediaFormat_getInt32(mFormat,
4678             AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
4679     void *key;
4680     size_t keysize;
4681     if (AMediaFormat_getBuffer(mFormat,
4682             AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
4683         CHECK(keysize <= 16);
4684         memset(mCryptoKey, 0, 16);
4685         memcpy(mCryptoKey, key, keysize);
4686     }
4687 
4688     AMediaFormat_getInt32(mFormat,
4689             AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
4690     AMediaFormat_getInt32(mFormat,
4691             AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
4692 
4693     const char *mime;
4694     bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
4695     CHECK(success);
4696 
4697     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
4698     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
4699               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
4700     mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
4701 
4702     if (mIsAVC) {
4703         void *data;
4704         size_t size;
4705         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4706 
4707         const uint8_t *ptr = (const uint8_t *)data;
4708 
4709         CHECK(size >= 7);
4710         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
4711 
4712         // The number of bytes used to encode the length of a NAL unit.
4713         mNALLengthSize = 1 + (ptr[4] & 3);
4714     } else if (mIsHEVC) {
4715         void *data;
4716         size_t size;
4717         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4718 
4719         const uint8_t *ptr = (const uint8_t *)data;
4720 
4721         CHECK(size >= 22);
4722         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
4723 
4724         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4725     }
4726 
4727     mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
4728     mIsAudio = !strncasecmp(mime, "audio/", 6);
4729 
4730     if (mIsPcm) {
4731         int32_t numChannels = 0;
4732         int32_t bitsPerSample = 0;
4733         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
4734         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
4735 
4736         int32_t bytesPerSample = bitsPerSample >> 3;
4737         int32_t pcmSampleSize = bytesPerSample * numChannels;
4738 
4739         size_t maxSampleSize;
4740         status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
4741         if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
4742                || bitsPerSample != 16) {
4743             // Not supported
4744             mIsPcm = false;
4745         } else {
4746             AMediaFormat_setInt32(mFormat,
4747                     AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
4748         }
4749     }
4750 
4751     CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
4752 
4753 }
4754 
init()4755 status_t MPEG4Source::init() {
4756     status_t err = OK;
4757     const char *mime;
4758     CHECK(AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime));
4759     if (mFirstMoofOffset != 0) {
4760         off64_t offset = mFirstMoofOffset;
4761         err = parseChunk(&offset);
4762         if(err == OK && !strncasecmp("video/", mime, 6)
4763             && !mCurrentSamples.isEmpty()) {
4764             // Start offset should be less or equal to composition time of first sample.
4765             // ISO : sample_composition_time_offset, version 0 (unsigned) for major brands.
4766             mElstShiftStartTicks = std::min(mElstShiftStartTicks,
4767                                             (uint64_t)(*mCurrentSamples.begin()).compositionOffset);
4768         }
4769         return err;
4770     }
4771 
4772     if (!strncasecmp("video/", mime, 6)) {
4773         uint64_t firstSampleCTS = 0;
4774         err = mSampleTable->getMetaDataForSample(0, NULL, NULL, &firstSampleCTS);
4775         // Start offset should be less or equal to composition time of first sample.
4776         // Composition time stamp of first sample cannot be negative.
4777         mElstShiftStartTicks = std::min(mElstShiftStartTicks, firstSampleCTS);
4778     }
4779 
4780     return err;
4781 }
4782 
~MPEG4Source()4783 MPEG4Source::~MPEG4Source() {
4784     if (mStarted) {
4785         stop();
4786     }
4787     free(mCurrentSampleInfoSizes);
4788     free(mCurrentSampleInfoOffsets);
4789 }
4790 
start()4791 media_status_t MPEG4Source::start() {
4792     Mutex::Autolock autoLock(mLock);
4793 
4794     CHECK(!mStarted);
4795 
4796     int32_t tmp;
4797     CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
4798     size_t max_size = tmp;
4799 
4800     // A somewhat arbitrary limit that should be sufficient for 8k video frames
4801     // If you see the message below for a valid input stream: increase the limit
4802     const size_t kMaxBufferSize = 64 * 1024 * 1024;
4803     if (max_size > kMaxBufferSize) {
4804         ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
4805         return AMEDIA_ERROR_MALFORMED;
4806     }
4807     if (max_size == 0) {
4808         ALOGE("zero max input size");
4809         return AMEDIA_ERROR_MALFORMED;
4810     }
4811 
4812     // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
4813     const size_t kInitialBuffers = 2;
4814     const size_t kMaxBuffers = 8;
4815     const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
4816     mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
4817     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
4818     if (mSrcBuffer == NULL) {
4819         // file probably specified a bad max size
4820         return AMEDIA_ERROR_MALFORMED;
4821     }
4822 
4823     mStarted = true;
4824 
4825     return AMEDIA_OK;
4826 }
4827 
stop()4828 media_status_t MPEG4Source::stop() {
4829     Mutex::Autolock autoLock(mLock);
4830 
4831     CHECK(mStarted);
4832 
4833     if (mBuffer != NULL) {
4834         mBuffer->release();
4835         mBuffer = NULL;
4836     }
4837 
4838     delete[] mSrcBuffer;
4839     mSrcBuffer = NULL;
4840 
4841     mStarted = false;
4842     mCurrentSampleIndex = 0;
4843 
4844     return AMEDIA_OK;
4845 }
4846 
parseChunk(off64_t * offset)4847 status_t MPEG4Source::parseChunk(off64_t *offset) {
4848     uint32_t hdr[2];
4849     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4850         return ERROR_IO;
4851     }
4852     uint64_t chunk_size = ntohl(hdr[0]);
4853     uint32_t chunk_type = ntohl(hdr[1]);
4854     off64_t data_offset = *offset + 8;
4855 
4856     if (chunk_size == 1) {
4857         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4858             return ERROR_IO;
4859         }
4860         chunk_size = ntoh64(chunk_size);
4861         data_offset += 8;
4862 
4863         if (chunk_size < 16) {
4864             // The smallest valid chunk is 16 bytes long in this case.
4865             return ERROR_MALFORMED;
4866         }
4867     } else if (chunk_size < 8) {
4868         // The smallest valid chunk is 8 bytes long.
4869         return ERROR_MALFORMED;
4870     }
4871 
4872     char chunk[5];
4873     MakeFourCCString(chunk_type, chunk);
4874     ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4875 
4876     off64_t chunk_data_size = *offset + chunk_size - data_offset;
4877 
4878     switch(chunk_type) {
4879 
4880         case FOURCC("traf"):
4881         case FOURCC("moof"): {
4882             off64_t stop_offset = *offset + chunk_size;
4883             *offset = data_offset;
4884             while (*offset < stop_offset) {
4885                 status_t err = parseChunk(offset);
4886                 if (err != OK) {
4887                     return err;
4888                 }
4889             }
4890             if (chunk_type == FOURCC("moof")) {
4891                 // *offset points to the box following this moof. Find the next moof from there.
4892 
4893                 while (true) {
4894                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4895                         // no more box to the end of file.
4896                         break;
4897                     }
4898                     chunk_size = ntohl(hdr[0]);
4899                     chunk_type = ntohl(hdr[1]);
4900                     if (chunk_size == 1) {
4901                         // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4902                         // which is defined in 4.2 Object Structure.
4903                         // When chunk_size==1, 8 bytes follows as "largesize".
4904                         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4905                             return ERROR_IO;
4906                         }
4907                         chunk_size = ntoh64(chunk_size);
4908                         if (chunk_size < 16) {
4909                             // The smallest valid chunk is 16 bytes long in this case.
4910                             return ERROR_MALFORMED;
4911                         }
4912                     } else if (chunk_size == 0) {
4913                         // next box extends to end of file.
4914                     } else if (chunk_size < 8) {
4915                         // The smallest valid chunk is 8 bytes long in this case.
4916                         return ERROR_MALFORMED;
4917                     }
4918 
4919                     if (chunk_type == FOURCC("moof")) {
4920                         mNextMoofOffset = *offset;
4921                         break;
4922                     } else if (chunk_size == 0) {
4923                         break;
4924                     }
4925                     *offset += chunk_size;
4926                 }
4927             }
4928             break;
4929         }
4930 
4931         case FOURCC("tfhd"): {
4932                 status_t err;
4933                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4934                     return err;
4935                 }
4936                 *offset += chunk_size;
4937                 break;
4938         }
4939 
4940         case FOURCC("trun"): {
4941                 status_t err;
4942                 if (mLastParsedTrackId == mTrackId) {
4943                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4944                         return err;
4945                     }
4946                 }
4947 
4948                 *offset += chunk_size;
4949                 break;
4950         }
4951 
4952         case FOURCC("saiz"): {
4953             status_t err;
4954             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4955                 return err;
4956             }
4957             *offset += chunk_size;
4958             break;
4959         }
4960         case FOURCC("saio"): {
4961             status_t err;
4962             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
4963                     != OK) {
4964                 return err;
4965             }
4966             *offset += chunk_size;
4967             break;
4968         }
4969 
4970         case FOURCC("senc"): {
4971             status_t err;
4972             if ((err = parseSampleEncryption(data_offset)) != OK) {
4973                 return err;
4974             }
4975             *offset += chunk_size;
4976             break;
4977         }
4978 
4979         case FOURCC("mdat"): {
4980             // parse DRM info if present
4981             ALOGV("MPEG4Source::parseChunk mdat");
4982             // if saiz/saoi was previously observed, do something with the sampleinfos
4983             *offset += chunk_size;
4984             break;
4985         }
4986 
4987         default: {
4988             *offset += chunk_size;
4989             break;
4990         }
4991     }
4992     return OK;
4993 }
4994 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)4995 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4996         off64_t offset, off64_t /* size */) {
4997     ALOGV("parseSampleAuxiliaryInformationSizes");
4998     // 14496-12 8.7.12
4999     uint8_t version;
5000     if (mDataSource->readAt(
5001             offset, &version, sizeof(version))
5002             < (ssize_t)sizeof(version)) {
5003         return ERROR_IO;
5004     }
5005 
5006     if (version != 0) {
5007         return ERROR_UNSUPPORTED;
5008     }
5009     offset++;
5010 
5011     uint32_t flags;
5012     if (!mDataSource->getUInt24(offset, &flags)) {
5013         return ERROR_IO;
5014     }
5015     offset += 3;
5016 
5017     if (flags & 1) {
5018         uint32_t tmp;
5019         if (!mDataSource->getUInt32(offset, &tmp)) {
5020             return ERROR_MALFORMED;
5021         }
5022         mCurrentAuxInfoType = tmp;
5023         offset += 4;
5024         if (!mDataSource->getUInt32(offset, &tmp)) {
5025             return ERROR_MALFORMED;
5026         }
5027         mCurrentAuxInfoTypeParameter = tmp;
5028         offset += 4;
5029     }
5030 
5031     uint8_t defsize;
5032     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5033         return ERROR_MALFORMED;
5034     }
5035     mCurrentDefaultSampleInfoSize = defsize;
5036     offset++;
5037 
5038     uint32_t smplcnt;
5039     if (!mDataSource->getUInt32(offset, &smplcnt)) {
5040         return ERROR_MALFORMED;
5041     }
5042     mCurrentSampleInfoCount = smplcnt;
5043     offset += 4;
5044 
5045     if (mCurrentDefaultSampleInfoSize != 0) {
5046         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5047         return OK;
5048     }
5049     if (smplcnt > mCurrentSampleInfoAllocSize) {
5050         uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5051         if (newPtr == NULL) {
5052             ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5053             return NO_MEMORY;
5054         }
5055         mCurrentSampleInfoSizes = newPtr;
5056         mCurrentSampleInfoAllocSize = smplcnt;
5057     }
5058 
5059     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5060     return OK;
5061 }
5062 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)5063 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5064         off64_t offset, off64_t /* size */) {
5065     ALOGV("parseSampleAuxiliaryInformationOffsets");
5066     // 14496-12 8.7.13
5067     uint8_t version;
5068     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5069         return ERROR_IO;
5070     }
5071     offset++;
5072 
5073     uint32_t flags;
5074     if (!mDataSource->getUInt24(offset, &flags)) {
5075         return ERROR_IO;
5076     }
5077     offset += 3;
5078 
5079     uint32_t entrycount;
5080     if (!mDataSource->getUInt32(offset, &entrycount)) {
5081         return ERROR_IO;
5082     }
5083     offset += 4;
5084     if (entrycount == 0) {
5085         return OK;
5086     }
5087     if (entrycount > UINT32_MAX / 8) {
5088         return ERROR_MALFORMED;
5089     }
5090 
5091     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5092         uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5093         if (newPtr == NULL) {
5094             ALOGE("failed to realloc %u -> %u",
5095                     mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5096             return NO_MEMORY;
5097         }
5098         mCurrentSampleInfoOffsets = newPtr;
5099         mCurrentSampleInfoOffsetsAllocSize = entrycount;
5100     }
5101     mCurrentSampleInfoOffsetCount = entrycount;
5102 
5103     if (mCurrentSampleInfoOffsets == NULL) {
5104         return OK;
5105     }
5106 
5107     for (size_t i = 0; i < entrycount; i++) {
5108         if (version == 0) {
5109             uint32_t tmp;
5110             if (!mDataSource->getUInt32(offset, &tmp)) {
5111                 return ERROR_IO;
5112             }
5113             mCurrentSampleInfoOffsets[i] = tmp;
5114             offset += 4;
5115         } else {
5116             uint64_t tmp;
5117             if (!mDataSource->getUInt64(offset, &tmp)) {
5118                 return ERROR_IO;
5119             }
5120             mCurrentSampleInfoOffsets[i] = tmp;
5121             offset += 8;
5122         }
5123     }
5124 
5125     // parse clear/encrypted data
5126 
5127     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5128 
5129     drmoffset += mCurrentMoofOffset;
5130 
5131     return parseClearEncryptedSizes(drmoffset, false, 0);
5132 }
5133 
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)5134 status_t MPEG4Source::parseClearEncryptedSizes(
5135         off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
5136 
5137     int32_t ivlength;
5138     if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5139         return ERROR_MALFORMED;
5140     }
5141 
5142     // only 0, 8 and 16 byte initialization vectors are supported
5143     if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5144         ALOGW("unsupported IV length: %d", ivlength);
5145         return ERROR_MALFORMED;
5146     }
5147 
5148     uint32_t sampleCount = mCurrentSampleInfoCount;
5149     if (isSubsampleEncryption) {
5150         if (!mDataSource->getUInt32(offset, &sampleCount)) {
5151             return ERROR_IO;
5152         }
5153         offset += 4;
5154     }
5155 
5156     // read CencSampleAuxiliaryDataFormats
5157     for (size_t i = 0; i < sampleCount; i++) {
5158         if (i >= mCurrentSamples.size()) {
5159             ALOGW("too few samples");
5160             break;
5161         }
5162         Sample *smpl = &mCurrentSamples.editItemAt(i);
5163         if (!smpl->clearsizes.isEmpty()) {
5164             continue;
5165         }
5166 
5167         memset(smpl->iv, 0, 16);
5168         if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5169             return ERROR_IO;
5170         }
5171 
5172         offset += ivlength;
5173 
5174         bool readSubsamples;
5175         if (isSubsampleEncryption) {
5176             readSubsamples = flags & 2;
5177         } else {
5178             int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5179             if (smplinfosize == 0) {
5180                 smplinfosize = mCurrentSampleInfoSizes[i];
5181             }
5182             readSubsamples = smplinfosize > ivlength;
5183         }
5184 
5185         if (readSubsamples) {
5186             uint16_t numsubsamples;
5187             if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5188                 return ERROR_IO;
5189             }
5190             offset += 2;
5191             for (size_t j = 0; j < numsubsamples; j++) {
5192                 uint16_t numclear;
5193                 uint32_t numencrypted;
5194                 if (!mDataSource->getUInt16(offset, &numclear)) {
5195                     return ERROR_IO;
5196                 }
5197                 offset += 2;
5198                 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5199                     return ERROR_IO;
5200                 }
5201                 offset += 4;
5202                 smpl->clearsizes.add(numclear);
5203                 smpl->encryptedsizes.add(numencrypted);
5204             }
5205         } else {
5206             smpl->clearsizes.add(0);
5207             smpl->encryptedsizes.add(smpl->size);
5208         }
5209     }
5210 
5211     return OK;
5212 }
5213 
parseSampleEncryption(off64_t offset)5214 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
5215     uint32_t flags;
5216     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5217         return ERROR_MALFORMED;
5218     }
5219     return parseClearEncryptedSizes(offset + 4, true, flags);
5220 }
5221 
parseTrackFragmentHeader(off64_t offset,off64_t size)5222 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5223 
5224     if (size < 8) {
5225         return -EINVAL;
5226     }
5227 
5228     uint32_t flags;
5229     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5230         return ERROR_MALFORMED;
5231     }
5232 
5233     if (flags & 0xff000000) {
5234         return -EINVAL;
5235     }
5236 
5237     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5238         return ERROR_MALFORMED;
5239     }
5240 
5241     if (mLastParsedTrackId != mTrackId) {
5242         // this is not the right track, skip it
5243         return OK;
5244     }
5245 
5246     mTrackFragmentHeaderInfo.mFlags = flags;
5247     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5248     offset += 8;
5249     size -= 8;
5250 
5251     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5252 
5253     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5254         if (size < 8) {
5255             return -EINVAL;
5256         }
5257 
5258         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5259             return ERROR_MALFORMED;
5260         }
5261         offset += 8;
5262         size -= 8;
5263     }
5264 
5265     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5266         if (size < 4) {
5267             return -EINVAL;
5268         }
5269 
5270         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5271             return ERROR_MALFORMED;
5272         }
5273         offset += 4;
5274         size -= 4;
5275     }
5276 
5277     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5278         if (size < 4) {
5279             return -EINVAL;
5280         }
5281 
5282         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5283             return ERROR_MALFORMED;
5284         }
5285         offset += 4;
5286         size -= 4;
5287     }
5288 
5289     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5290         if (size < 4) {
5291             return -EINVAL;
5292         }
5293 
5294         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5295             return ERROR_MALFORMED;
5296         }
5297         offset += 4;
5298         size -= 4;
5299     }
5300 
5301     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5302         if (size < 4) {
5303             return -EINVAL;
5304         }
5305 
5306         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5307             return ERROR_MALFORMED;
5308         }
5309         offset += 4;
5310         size -= 4;
5311     }
5312 
5313     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5314         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5315     }
5316 
5317     mTrackFragmentHeaderInfo.mDataOffset = 0;
5318     return OK;
5319 }
5320 
parseTrackFragmentRun(off64_t offset,off64_t size)5321 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5322 
5323     ALOGV("MPEG4Source::parseTrackFragmentRun");
5324     if (size < 8) {
5325         return -EINVAL;
5326     }
5327 
5328     enum {
5329         kDataOffsetPresent                  = 0x01,
5330         kFirstSampleFlagsPresent            = 0x04,
5331         kSampleDurationPresent              = 0x100,
5332         kSampleSizePresent                  = 0x200,
5333         kSampleFlagsPresent                 = 0x400,
5334         kSampleCompositionTimeOffsetPresent = 0x800,
5335     };
5336 
5337     uint32_t flags;
5338     if (!mDataSource->getUInt32(offset, &flags)) {
5339         return ERROR_MALFORMED;
5340     }
5341     // |version| only affects SampleCompositionTimeOffset field.
5342     // If version == 0, SampleCompositionTimeOffset is uint32_t;
5343     // Otherwise, SampleCompositionTimeOffset is int32_t.
5344     // Sample.compositionOffset is defined as int32_t.
5345     uint8_t version = flags >> 24;
5346     flags &= 0xffffff;
5347     ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5348 
5349     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5350         // These two shall not be used together.
5351         return -EINVAL;
5352     }
5353 
5354     uint32_t sampleCount;
5355     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5356         return ERROR_MALFORMED;
5357     }
5358     offset += 8;
5359     size -= 8;
5360 
5361     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5362 
5363     uint32_t firstSampleFlags = 0;
5364 
5365     if (flags & kDataOffsetPresent) {
5366         if (size < 4) {
5367             return -EINVAL;
5368         }
5369 
5370         int32_t dataOffsetDelta;
5371         if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5372             return ERROR_MALFORMED;
5373         }
5374 
5375         dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5376 
5377         offset += 4;
5378         size -= 4;
5379     }
5380 
5381     if (flags & kFirstSampleFlagsPresent) {
5382         if (size < 4) {
5383             return -EINVAL;
5384         }
5385 
5386         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5387             return ERROR_MALFORMED;
5388         }
5389         offset += 4;
5390         size -= 4;
5391     }
5392 
5393     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5394              sampleCtsOffset = 0;
5395 
5396     size_t bytesPerSample = 0;
5397     if (flags & kSampleDurationPresent) {
5398         bytesPerSample += 4;
5399     } else if (mTrackFragmentHeaderInfo.mFlags
5400             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5401         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5402     } else if (mTrex) {
5403         sampleDuration = mTrex->default_sample_duration;
5404     }
5405 
5406     if (flags & kSampleSizePresent) {
5407         bytesPerSample += 4;
5408     } else if (mTrackFragmentHeaderInfo.mFlags
5409             & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5410         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5411     } else {
5412         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5413     }
5414 
5415     if (flags & kSampleFlagsPresent) {
5416         bytesPerSample += 4;
5417     } else if (mTrackFragmentHeaderInfo.mFlags
5418             & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5419         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5420     } else {
5421         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5422     }
5423 
5424     if (flags & kSampleCompositionTimeOffsetPresent) {
5425         bytesPerSample += 4;
5426     } else {
5427         sampleCtsOffset = 0;
5428     }
5429 
5430     if (bytesPerSample != 0) {
5431         if (size < (off64_t)sampleCount * bytesPerSample) {
5432             return -EINVAL;
5433         }
5434     } else {
5435         if (sampleDuration == 0) {
5436             ALOGW("b/123389881 sampleDuration == 0");
5437             android_errorWriteLog(0x534e4554, "124389881 zero");
5438             return -EINVAL;
5439         }
5440 
5441         // apply some sanity (vs strict legality) checks
5442         //
5443         // clamp the count of entries in the trun box, to avoid spending forever parsing
5444         // this box. Clamping (vs error) lets us play *something*.
5445         // 1 million is about 400 msecs on a Pixel3, should be no more than a couple seconds
5446         // on the slowest devices.
5447         static constexpr uint32_t kMaxTrunSampleCount = 1000000;
5448         if (sampleCount > kMaxTrunSampleCount) {
5449             ALOGW("b/123389881 clamp sampleCount(%u) @ kMaxTrunSampleCount(%u)",
5450                   sampleCount, kMaxTrunSampleCount);
5451             android_errorWriteLog(0x534e4554, "124389881 count");
5452 
5453         }
5454     }
5455 
5456     Sample tmp;
5457     for (uint32_t i = 0; i < sampleCount; ++i) {
5458         if (flags & kSampleDurationPresent) {
5459             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5460                 return ERROR_MALFORMED;
5461             }
5462             offset += 4;
5463         }
5464 
5465         if (flags & kSampleSizePresent) {
5466             if (!mDataSource->getUInt32(offset, &sampleSize)) {
5467                 return ERROR_MALFORMED;
5468             }
5469             offset += 4;
5470         }
5471 
5472         if (flags & kSampleFlagsPresent) {
5473             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
5474                 return ERROR_MALFORMED;
5475             }
5476             offset += 4;
5477         }
5478 
5479         if (flags & kSampleCompositionTimeOffsetPresent) {
5480             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
5481                 return ERROR_MALFORMED;
5482             }
5483             offset += 4;
5484         }
5485 
5486         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
5487               " flags 0x%08x ctsOffset %" PRIu32, i + 1,
5488                 dataOffset, sampleSize, sampleDuration,
5489                 (flags & kFirstSampleFlagsPresent) && i == 0
5490                     ? firstSampleFlags : sampleFlags, sampleCtsOffset);
5491         tmp.offset = dataOffset;
5492         tmp.size = sampleSize;
5493         tmp.duration = sampleDuration;
5494         tmp.compositionOffset = sampleCtsOffset;
5495         memset(tmp.iv, 0, sizeof(tmp.iv));
5496         mCurrentSamples.add(tmp);
5497 
5498         dataOffset += sampleSize;
5499     }
5500 
5501     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
5502 
5503     return OK;
5504 }
5505 
getFormat(AMediaFormat * meta)5506 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
5507     Mutex::Autolock autoLock(mLock);
5508     AMediaFormat_copy(meta, mFormat);
5509     return AMEDIA_OK;
5510 }
5511 
parseNALSize(const uint8_t * data) const5512 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
5513     switch (mNALLengthSize) {
5514         case 1:
5515             return *data;
5516         case 2:
5517             return U16_AT(data);
5518         case 3:
5519             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
5520         case 4:
5521             return U32_AT(data);
5522     }
5523 
5524     // This cannot happen, mNALLengthSize springs to life by adding 1 to
5525     // a 2-bit integer.
5526     CHECK(!"Should not be here.");
5527 
5528     return 0;
5529 }
5530 
parseHEVCLayerId(const uint8_t * data,size_t size)5531 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
5532     if (data == nullptr || size < mNALLengthSize + 2) {
5533         return -1;
5534     }
5535 
5536     // HEVC NAL-header (16-bit)
5537     //  1   6      6     3
5538     // |-|uuuuuu|------|iii|
5539     //      ^            ^
5540     //  NAL_type        layer_id + 1
5541     //
5542     // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
5543     enum {
5544         TSA_N = 2,
5545         TSA_R = 3,
5546         STSA_N = 4,
5547         STSA_R = 5,
5548     };
5549 
5550     data += mNALLengthSize;
5551     uint16_t nalHeader = data[0] << 8 | data[1];
5552 
5553     uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
5554     if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
5555         int32_t layerIdPlusOne = nalHeader & 0x7u;
5556         ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
5557         return layerIdPlusOne - 1;
5558     }
5559     return 0;
5560 }
5561 
read(MediaBufferHelper ** out,const ReadOptions * options)5562 media_status_t MPEG4Source::read(
5563         MediaBufferHelper **out, const ReadOptions *options) {
5564     Mutex::Autolock autoLock(mLock);
5565 
5566     CHECK(mStarted);
5567 
5568     if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
5569         *out = nullptr;
5570         return AMEDIA_ERROR_WOULD_BLOCK;
5571     }
5572 
5573     if (mFirstMoofOffset > 0) {
5574         return fragmentedRead(out, options);
5575     }
5576 
5577     *out = NULL;
5578 
5579     int64_t targetSampleTimeUs = -1;
5580 
5581     int64_t seekTimeUs;
5582     ReadOptions::SeekMode mode;
5583     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5584 
5585         if (mIsHeif) {
5586             CHECK(mSampleTable == NULL);
5587             CHECK(mItemTable != NULL);
5588             int32_t imageIndex;
5589             if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
5590                 return AMEDIA_ERROR_MALFORMED;
5591             }
5592 
5593             status_t err;
5594             if (seekTimeUs >= 0) {
5595                 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
5596             } else {
5597                 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
5598             }
5599             if (err != OK) {
5600                 return AMEDIA_ERROR_UNKNOWN;
5601             }
5602         } else {
5603             uint32_t findFlags = 0;
5604             switch (mode) {
5605                 case ReadOptions::SEEK_PREVIOUS_SYNC:
5606                     findFlags = SampleTable::kFlagBefore;
5607                     break;
5608                 case ReadOptions::SEEK_NEXT_SYNC:
5609                     findFlags = SampleTable::kFlagAfter;
5610                     break;
5611                 case ReadOptions::SEEK_CLOSEST_SYNC:
5612                 case ReadOptions::SEEK_CLOSEST:
5613                     findFlags = SampleTable::kFlagClosest;
5614                     break;
5615                 case ReadOptions::SEEK_FRAME_INDEX:
5616                     findFlags = SampleTable::kFlagFrameIndex;
5617                     break;
5618                 default:
5619                     CHECK(!"Should not be here.");
5620                     break;
5621             }
5622             if( mode != ReadOptions::SEEK_FRAME_INDEX) {
5623                 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5624             }
5625 
5626             uint32_t sampleIndex;
5627             status_t err = mSampleTable->findSampleAtTime(
5628                     seekTimeUs, 1000000, mTimescale,
5629                     &sampleIndex, findFlags);
5630 
5631             if (mode == ReadOptions::SEEK_CLOSEST
5632                     || mode == ReadOptions::SEEK_FRAME_INDEX) {
5633                 // We found the closest sample already, now we want the sync
5634                 // sample preceding it (or the sample itself of course), even
5635                 // if the subsequent sync sample is closer.
5636                 findFlags = SampleTable::kFlagBefore;
5637             }
5638 
5639             uint32_t syncSampleIndex = sampleIndex;
5640             // assume every audio sample is a sync sample. This works around
5641             // seek issues with files that were incorrectly written with an
5642             // empty or single-sample stss block for the audio track
5643             if (err == OK && !mIsAudio) {
5644                 err = mSampleTable->findSyncSampleNear(
5645                         sampleIndex, &syncSampleIndex, findFlags);
5646             }
5647 
5648             uint64_t sampleTime;
5649             if (err == OK) {
5650                 err = mSampleTable->getMetaDataForSample(
5651                         sampleIndex, NULL, NULL, &sampleTime);
5652             }
5653 
5654             if (err != OK) {
5655                 if (err == ERROR_OUT_OF_RANGE) {
5656                     // An attempt to seek past the end of the stream would
5657                     // normally cause this ERROR_OUT_OF_RANGE error. Propagating
5658                     // this all the way to the MediaPlayer would cause abnormal
5659                     // termination. Legacy behaviour appears to be to behave as if
5660                     // we had seeked to the end of stream, ending normally.
5661                     return AMEDIA_ERROR_END_OF_STREAM;
5662                 }
5663                 ALOGV("end of stream");
5664                 return AMEDIA_ERROR_UNKNOWN;
5665             }
5666 
5667             if (mode == ReadOptions::SEEK_CLOSEST
5668                 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5669                 sampleTime -= mElstShiftStartTicks;
5670                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
5671             }
5672 
5673 #if 0
5674             uint32_t syncSampleTime;
5675             CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
5676                         syncSampleIndex, NULL, NULL, &syncSampleTime));
5677 
5678             ALOGI("seek to time %lld us => sample at time %lld us, "
5679                  "sync sample at time %lld us",
5680                  seekTimeUs,
5681                  sampleTime * 1000000ll / mTimescale,
5682                  syncSampleTime * 1000000ll / mTimescale);
5683 #endif
5684 
5685             mCurrentSampleIndex = syncSampleIndex;
5686         }
5687 
5688         if (mBuffer != NULL) {
5689             mBuffer->release();
5690             mBuffer = NULL;
5691         }
5692 
5693         // fall through
5694     }
5695 
5696     off64_t offset = 0;
5697     size_t size = 0;
5698     uint64_t cts, stts;
5699     bool isSyncSample;
5700     bool newBuffer = false;
5701     if (mBuffer == NULL) {
5702         newBuffer = true;
5703 
5704         status_t err;
5705         if (!mIsHeif) {
5706             err = mSampleTable->getMetaDataForSample(
5707                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
5708             if(err == OK) {
5709                 /* Composition Time Stamp cannot be negative. Some files have video Sample
5710                 * Time(STTS)delta with zero value(b/117402420).  Hence subtract only
5711                 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
5712                 */
5713                 cts -= std::min(cts, mElstShiftStartTicks);
5714             }
5715 
5716         } else {
5717             err = mItemTable->getImageOffsetAndSize(
5718                     options && options->getSeekTo(&seekTimeUs, &mode) ?
5719                             &mCurrentSampleIndex : NULL, &offset, &size);
5720 
5721             cts = stts = 0;
5722             isSyncSample = 0;
5723             ALOGV("image offset %lld, size %zu", (long long)offset, size);
5724         }
5725 
5726         if (err != OK) {
5727             if (err == ERROR_END_OF_STREAM) {
5728                 return AMEDIA_ERROR_END_OF_STREAM;
5729             }
5730             return AMEDIA_ERROR_UNKNOWN;
5731         }
5732 
5733         err = mBufferGroup->acquire_buffer(&mBuffer);
5734 
5735         if (err != OK) {
5736             CHECK(mBuffer == NULL);
5737             return AMEDIA_ERROR_UNKNOWN;
5738         }
5739         if (size > mBuffer->size()) {
5740             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5741             mBuffer->release();
5742             mBuffer = NULL;
5743             return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
5744         }
5745     }
5746 
5747     if (!mIsAVC && !mIsHEVC && !mIsAC4) {
5748         if (newBuffer) {
5749             if (mIsPcm) {
5750                 // The twos' PCM block reader assumes that all samples has the same size.
5751 
5752                 uint32_t samplesToRead = mSampleTable->getLastSampleIndexInChunk()
5753                                                       - mCurrentSampleIndex + 1;
5754                 if (samplesToRead > kMaxPcmFrameSize) {
5755                     samplesToRead = kMaxPcmFrameSize;
5756                 }
5757 
5758                 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
5759                       samplesToRead, size, mCurrentSampleIndex,
5760                       mSampleTable->getLastSampleIndexInChunk());
5761 
5762                size_t totalSize = samplesToRead * size;
5763                 uint8_t* buf = (uint8_t *)mBuffer->data();
5764                 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
5765                 if (bytesRead < (ssize_t)totalSize) {
5766                     mBuffer->release();
5767                     mBuffer = NULL;
5768 
5769                     return AMEDIA_ERROR_IO;
5770                 }
5771 
5772                 AMediaFormat *meta = mBuffer->meta_data();
5773                 AMediaFormat_clear(meta);
5774                 AMediaFormat_setInt64(
5775                       meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5776                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5777 
5778                 int32_t byteOrder;
5779                 AMediaFormat_getInt32(mFormat,
5780                         AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
5781 
5782                 if (byteOrder == 1) {
5783                     // Big-endian -> little-endian
5784                     uint16_t *dstData = (uint16_t *)buf;
5785                     uint16_t *srcData = (uint16_t *)buf;
5786 
5787                     for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
5788                          dstData[j] = ntohs(srcData[j]);
5789                     }
5790                 }
5791 
5792                 mCurrentSampleIndex += samplesToRead;
5793                 mBuffer->set_range(0, totalSize);
5794             } else {
5795                 ssize_t num_bytes_read =
5796                     mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5797 
5798                 if (num_bytes_read < (ssize_t)size) {
5799                     mBuffer->release();
5800                     mBuffer = NULL;
5801 
5802                     return AMEDIA_ERROR_IO;
5803                 }
5804 
5805                 CHECK(mBuffer != NULL);
5806                 mBuffer->set_range(0, size);
5807                 AMediaFormat *meta = mBuffer->meta_data();
5808                 AMediaFormat_clear(meta);
5809                 AMediaFormat_setInt64(
5810                         meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5811                 AMediaFormat_setInt64(
5812                         meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5813 
5814                 if (targetSampleTimeUs >= 0) {
5815                     AMediaFormat_setInt64(
5816                             meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5817                 }
5818 
5819                 if (isSyncSample) {
5820                     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5821                 }
5822 
5823                 ++mCurrentSampleIndex;
5824             }
5825         }
5826 
5827         *out = mBuffer;
5828         mBuffer = NULL;
5829 
5830         return AMEDIA_OK;
5831 
5832     } else if (mIsAC4) {
5833         CHECK(mBuffer != NULL);
5834         // Make sure there is enough space to write the sync header and the raw frame
5835         if (mBuffer->range_length() < (7 + size)) {
5836             mBuffer->release();
5837             mBuffer = NULL;
5838 
5839             return AMEDIA_ERROR_IO;
5840         }
5841 
5842         uint8_t *dstData = (uint8_t *)mBuffer->data();
5843         size_t dstOffset = 0;
5844         // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
5845         // AC40 sync word, meaning no CRC at the end of the frame
5846         dstData[dstOffset++] = 0xAC;
5847         dstData[dstOffset++] = 0x40;
5848         dstData[dstOffset++] = 0xFF;
5849         dstData[dstOffset++] = 0xFF;
5850         dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
5851         dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
5852         dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
5853 
5854         ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
5855         if (numBytesRead != (ssize_t)size) {
5856             mBuffer->release();
5857             mBuffer = NULL;
5858 
5859             return AMEDIA_ERROR_IO;
5860         }
5861 
5862         mBuffer->set_range(0, dstOffset + size);
5863         AMediaFormat *meta = mBuffer->meta_data();
5864         AMediaFormat_clear(meta);
5865         AMediaFormat_setInt64(
5866                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5867         AMediaFormat_setInt64(
5868                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5869 
5870         if (targetSampleTimeUs >= 0) {
5871             AMediaFormat_setInt64(
5872                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5873         }
5874 
5875         if (isSyncSample) {
5876             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5877         }
5878 
5879         ++mCurrentSampleIndex;
5880 
5881         *out = mBuffer;
5882         mBuffer = NULL;
5883 
5884         return AMEDIA_OK;
5885     } else {
5886         // Whole NAL units are returned but each fragment is prefixed by
5887         // the start code (0x00 00 00 01).
5888         ssize_t num_bytes_read = 0;
5889         num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
5890 
5891         if (num_bytes_read < (ssize_t)size) {
5892             mBuffer->release();
5893             mBuffer = NULL;
5894 
5895             return AMEDIA_ERROR_IO;
5896         }
5897 
5898         uint8_t *dstData = (uint8_t *)mBuffer->data();
5899         size_t srcOffset = 0;
5900         size_t dstOffset = 0;
5901 
5902         while (srcOffset < size) {
5903             bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5904             size_t nalLength = 0;
5905             if (!isMalFormed) {
5906                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5907                 srcOffset += mNALLengthSize;
5908                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
5909             }
5910 
5911             if (isMalFormed) {
5912                 //if nallength abnormal,ignore it.
5913                 ALOGW("abnormal nallength, ignore this NAL");
5914                 srcOffset = size;
5915                 break;
5916             }
5917 
5918             if (nalLength == 0) {
5919                 continue;
5920             }
5921 
5922             if (dstOffset > SIZE_MAX - 4 ||
5923                     dstOffset + 4 > SIZE_MAX - nalLength ||
5924                     dstOffset + 4 + nalLength > mBuffer->size()) {
5925                 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
5926                 android_errorWriteLog(0x534e4554, "27208621");
5927                 mBuffer->release();
5928                 mBuffer = NULL;
5929                 return AMEDIA_ERROR_MALFORMED;
5930             }
5931 
5932             dstData[dstOffset++] = 0;
5933             dstData[dstOffset++] = 0;
5934             dstData[dstOffset++] = 0;
5935             dstData[dstOffset++] = 1;
5936             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5937             srcOffset += nalLength;
5938             dstOffset += nalLength;
5939         }
5940         CHECK_EQ(srcOffset, size);
5941         CHECK(mBuffer != NULL);
5942         mBuffer->set_range(0, dstOffset);
5943 
5944         AMediaFormat *meta = mBuffer->meta_data();
5945         AMediaFormat_clear(meta);
5946         AMediaFormat_setInt64(
5947                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5948         AMediaFormat_setInt64(
5949                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5950 
5951         if (targetSampleTimeUs >= 0) {
5952             AMediaFormat_setInt64(
5953                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5954         }
5955 
5956         if (mIsAVC) {
5957             uint32_t layerId = FindAVCLayerId(
5958                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5959             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
5960         } else if (mIsHEVC) {
5961             int32_t layerId = parseHEVCLayerId(
5962                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5963             if (layerId >= 0) {
5964                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
5965             }
5966         }
5967 
5968         if (isSyncSample) {
5969             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5970         }
5971 
5972         ++mCurrentSampleIndex;
5973 
5974         *out = mBuffer;
5975         mBuffer = NULL;
5976 
5977         return AMEDIA_OK;
5978     }
5979 }
5980 
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)5981 media_status_t MPEG4Source::fragmentedRead(
5982         MediaBufferHelper **out, const ReadOptions *options) {
5983 
5984     ALOGV("MPEG4Source::fragmentedRead");
5985 
5986     CHECK(mStarted);
5987 
5988     *out = NULL;
5989 
5990     int64_t targetSampleTimeUs = -1;
5991 
5992     int64_t seekTimeUs;
5993     ReadOptions::SeekMode mode;
5994     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5995 
5996         seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5997         ALOGV("shifted seekTimeUs :%" PRId64 ", mElstShiftStartTicks:%" PRIu64, seekTimeUs,
5998               mElstShiftStartTicks);
5999 
6000         int numSidxEntries = mSegments.size();
6001         if (numSidxEntries != 0) {
6002             int64_t totalTime = 0;
6003             off64_t totalOffset = mFirstMoofOffset;
6004             for (int i = 0; i < numSidxEntries; i++) {
6005                 const SidxEntry *se = &mSegments[i];
6006                 if (totalTime + se->mDurationUs > seekTimeUs) {
6007                     // The requested time is somewhere in this segment
6008                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6009                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6010                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6011                         // requested next sync, or closest sync and it was closer to the end of
6012                         // this segment
6013                         totalTime += se->mDurationUs;
6014                         totalOffset += se->mSize;
6015                     }
6016                     break;
6017                 }
6018                 totalTime += se->mDurationUs;
6019                 totalOffset += se->mSize;
6020             }
6021             mCurrentMoofOffset = totalOffset;
6022             mNextMoofOffset = -1;
6023             mCurrentSamples.clear();
6024             mCurrentSampleIndex = 0;
6025             status_t err = parseChunk(&totalOffset);
6026             if (err != OK) {
6027                 return AMEDIA_ERROR_UNKNOWN;
6028             }
6029             mCurrentTime = totalTime * mTimescale / 1000000ll;
6030         } else {
6031             // without sidx boxes, we can only seek to 0
6032             mCurrentMoofOffset = mFirstMoofOffset;
6033             mNextMoofOffset = -1;
6034             mCurrentSamples.clear();
6035             mCurrentSampleIndex = 0;
6036             off64_t tmp = mCurrentMoofOffset;
6037             status_t err = parseChunk(&tmp);
6038             if (err != OK) {
6039                 return AMEDIA_ERROR_UNKNOWN;
6040             }
6041             mCurrentTime = 0;
6042         }
6043 
6044         if (mBuffer != NULL) {
6045             mBuffer->release();
6046             mBuffer = NULL;
6047         }
6048 
6049         // fall through
6050     }
6051 
6052     off64_t offset = 0;
6053     size_t size = 0;
6054     uint64_t cts = 0;
6055     bool isSyncSample = false;
6056     bool newBuffer = false;
6057     if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6058         newBuffer = true;
6059 
6060         if (mBuffer != NULL) {
6061             mBuffer->release();
6062             mBuffer = NULL;
6063         }
6064         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6065             // move to next fragment if there is one
6066             if (mNextMoofOffset <= mCurrentMoofOffset) {
6067                 return AMEDIA_ERROR_END_OF_STREAM;
6068             }
6069             off64_t nextMoof = mNextMoofOffset;
6070             mCurrentMoofOffset = nextMoof;
6071             mCurrentSamples.clear();
6072             mCurrentSampleIndex = 0;
6073             status_t err = parseChunk(&nextMoof);
6074             if (err != OK) {
6075                 return AMEDIA_ERROR_UNKNOWN;
6076             }
6077             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6078                 return AMEDIA_ERROR_END_OF_STREAM;
6079             }
6080         }
6081 
6082         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6083         offset = smpl->offset;
6084         size = smpl->size;
6085         cts = mCurrentTime + smpl->compositionOffset;
6086         /* Composition Time Stamp cannot be negative. Some files have video Sample
6087         * Time(STTS)delta with zero value(b/117402420).  Hence subtract only
6088         * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
6089         */
6090         cts -= std::min(cts, mElstShiftStartTicks);
6091 
6092         mCurrentTime += smpl->duration;
6093         isSyncSample = (mCurrentSampleIndex == 0);
6094 
6095         status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6096 
6097         if (err != OK) {
6098             CHECK(mBuffer == NULL);
6099             ALOGV("acquire_buffer returned %d", err);
6100             return AMEDIA_ERROR_UNKNOWN;
6101         }
6102         if (size > mBuffer->size()) {
6103             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6104             mBuffer->release();
6105             mBuffer = NULL;
6106             return AMEDIA_ERROR_UNKNOWN;
6107         }
6108     }
6109 
6110     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6111     AMediaFormat *bufmeta = mBuffer->meta_data();
6112     AMediaFormat_clear(bufmeta);
6113     if (smpl->encryptedsizes.size()) {
6114         // store clear/encrypted lengths in metadata
6115         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6116                 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
6117         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6118                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
6119         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6120         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6121         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6122         AMediaFormat_setInt32(bufmeta,
6123                 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6124         AMediaFormat_setInt32(bufmeta,
6125                 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6126 
6127         void *iv = NULL;
6128         size_t ivlength = 0;
6129         if (!AMediaFormat_getBuffer(mFormat,
6130                 "crypto-iv", &iv, &ivlength)) {
6131             iv = (void *) smpl->iv;
6132             ivlength = 16; // use 16 or the actual size?
6133         }
6134         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6135     }
6136 
6137     if (!mIsAVC && !mIsHEVC) {
6138         if (newBuffer) {
6139             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6140                 mBuffer->release();
6141                 mBuffer = NULL;
6142 
6143                 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6144                 return AMEDIA_ERROR_MALFORMED;
6145             }
6146 
6147             ssize_t num_bytes_read =
6148                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6149 
6150             if (num_bytes_read < (ssize_t)size) {
6151                 mBuffer->release();
6152                 mBuffer = NULL;
6153 
6154                 ALOGE("i/o error");
6155                 return AMEDIA_ERROR_IO;
6156             }
6157 
6158             CHECK(mBuffer != NULL);
6159             mBuffer->set_range(0, size);
6160             AMediaFormat_setInt64(bufmeta,
6161                     AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6162             AMediaFormat_setInt64(bufmeta,
6163                     AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6164 
6165             if (targetSampleTimeUs >= 0) {
6166                 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6167             }
6168 
6169             if (mIsAVC) {
6170                 uint32_t layerId = FindAVCLayerId(
6171                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6172                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6173             } else if (mIsHEVC) {
6174                 int32_t layerId = parseHEVCLayerId(
6175                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6176                 if (layerId >= 0) {
6177                     AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6178                 }
6179             }
6180 
6181             if (isSyncSample) {
6182                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6183             }
6184 
6185             ++mCurrentSampleIndex;
6186         }
6187 
6188         *out = mBuffer;
6189         mBuffer = NULL;
6190 
6191         return AMEDIA_OK;
6192 
6193     } else {
6194         ALOGV("whole NAL");
6195         // Whole NAL units are returned but each fragment is prefixed by
6196         // the start code (0x00 00 00 01).
6197         ssize_t num_bytes_read = 0;
6198         void *data = NULL;
6199         bool isMalFormed = false;
6200         int32_t max_size;
6201         if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6202                 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6203             isMalFormed = true;
6204         } else {
6205             data = mSrcBuffer;
6206         }
6207 
6208         if (isMalFormed || data == NULL) {
6209             ALOGE("isMalFormed size %zu", size);
6210             if (mBuffer != NULL) {
6211                 mBuffer->release();
6212                 mBuffer = NULL;
6213             }
6214             return AMEDIA_ERROR_MALFORMED;
6215         }
6216         num_bytes_read = mDataSource->readAt(offset, data, size);
6217 
6218         if (num_bytes_read < (ssize_t)size) {
6219             mBuffer->release();
6220             mBuffer = NULL;
6221 
6222             ALOGE("i/o error");
6223             return AMEDIA_ERROR_IO;
6224         }
6225 
6226         uint8_t *dstData = (uint8_t *)mBuffer->data();
6227         size_t srcOffset = 0;
6228         size_t dstOffset = 0;
6229 
6230         while (srcOffset < size) {
6231             isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6232             size_t nalLength = 0;
6233             if (!isMalFormed) {
6234                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6235                 srcOffset += mNALLengthSize;
6236                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6237                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6238                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6239             }
6240 
6241             if (isMalFormed) {
6242                 ALOGE("Video is malformed; nalLength %zu", nalLength);
6243                 mBuffer->release();
6244                 mBuffer = NULL;
6245                 return AMEDIA_ERROR_MALFORMED;
6246             }
6247 
6248             if (nalLength == 0) {
6249                 continue;
6250             }
6251 
6252             if (dstOffset > SIZE_MAX - 4 ||
6253                     dstOffset + 4 > SIZE_MAX - nalLength ||
6254                     dstOffset + 4 + nalLength > mBuffer->size()) {
6255                 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6256                 android_errorWriteLog(0x534e4554, "26365349");
6257                 mBuffer->release();
6258                 mBuffer = NULL;
6259                 return AMEDIA_ERROR_MALFORMED;
6260             }
6261 
6262             dstData[dstOffset++] = 0;
6263             dstData[dstOffset++] = 0;
6264             dstData[dstOffset++] = 0;
6265             dstData[dstOffset++] = 1;
6266             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6267             srcOffset += nalLength;
6268             dstOffset += nalLength;
6269         }
6270         CHECK_EQ(srcOffset, size);
6271         CHECK(mBuffer != NULL);
6272         mBuffer->set_range(0, dstOffset);
6273 
6274         AMediaFormat *bufmeta = mBuffer->meta_data();
6275         AMediaFormat_setInt64(bufmeta,
6276                 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6277         AMediaFormat_setInt64(bufmeta,
6278                 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6279 
6280         if (targetSampleTimeUs >= 0) {
6281             AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6282         }
6283 
6284         if (isSyncSample) {
6285             AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6286         }
6287 
6288         ++mCurrentSampleIndex;
6289 
6290         *out = mBuffer;
6291         mBuffer = NULL;
6292 
6293         return AMEDIA_OK;
6294     }
6295 
6296     return AMEDIA_OK;
6297 }
6298 
findTrackByMimePrefix(const char * mimePrefix)6299 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6300         const char *mimePrefix) {
6301     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6302         const char *mime;
6303         if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6304                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6305             return track;
6306         }
6307     }
6308 
6309     return NULL;
6310 }
6311 
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6312 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6313     uint8_t header[8];
6314 
6315     ssize_t n = source->readAt(4, header, sizeof(header));
6316     if (n < (ssize_t)sizeof(header)) {
6317         return false;
6318     }
6319 
6320     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6321         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6322         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6323         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6324         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6325         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6326         || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6327         || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
6328         *confidence = 0.4;
6329 
6330         return true;
6331     }
6332 
6333     return false;
6334 }
6335 
isCompatibleBrand(uint32_t fourcc)6336 static bool isCompatibleBrand(uint32_t fourcc) {
6337     static const uint32_t kCompatibleBrands[] = {
6338         FOURCC("isom"),
6339         FOURCC("iso2"),
6340         FOURCC("avc1"),
6341         FOURCC("hvc1"),
6342         FOURCC("hev1"),
6343         FOURCC("av01"),
6344         FOURCC("3gp4"),
6345         FOURCC("mp41"),
6346         FOURCC("mp42"),
6347         FOURCC("dash"),
6348         FOURCC("nvr1"),
6349 
6350         // Won't promise that the following file types can be played.
6351         // Just give these file types a chance.
6352         FOURCC("qt  "),  // Apple's QuickTime
6353         FOURCC("MSNV"),  // Sony's PSP
6354         FOURCC("wmf "),
6355 
6356         FOURCC("3g2a"),  // 3GPP2
6357         FOURCC("3g2b"),
6358         FOURCC("mif1"),  // HEIF image
6359         FOURCC("heic"),  // HEIF image
6360         FOURCC("msf1"),  // HEIF image sequence
6361         FOURCC("hevc"),  // HEIF image sequence
6362     };
6363 
6364     for (size_t i = 0;
6365          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6366          ++i) {
6367         if (kCompatibleBrands[i] == fourcc) {
6368             return true;
6369         }
6370     }
6371 
6372     return false;
6373 }
6374 
6375 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6376 // compatible brand is present.
6377 // Also try to identify where this file's metadata ends
6378 // (end of the 'moov' atom) and report it to the caller as part of
6379 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6380 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6381     // We scan up to 128 bytes to identify this file as an MP4.
6382     static const off64_t kMaxScanOffset = 128ll;
6383 
6384     off64_t offset = 0ll;
6385     bool foundGoodFileType = false;
6386     off64_t moovAtomEndOffset = -1ll;
6387     bool done = false;
6388 
6389     while (!done && offset < kMaxScanOffset) {
6390         uint32_t hdr[2];
6391         if (source->readAt(offset, hdr, 8) < 8) {
6392             return false;
6393         }
6394 
6395         uint64_t chunkSize = ntohl(hdr[0]);
6396         uint32_t chunkType = ntohl(hdr[1]);
6397         off64_t chunkDataOffset = offset + 8;
6398 
6399         if (chunkSize == 1) {
6400             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
6401                 return false;
6402             }
6403 
6404             chunkSize = ntoh64(chunkSize);
6405             chunkDataOffset += 8;
6406 
6407             if (chunkSize < 16) {
6408                 // The smallest valid chunk is 16 bytes long in this case.
6409                 return false;
6410             }
6411 
6412         } else if (chunkSize < 8) {
6413             // The smallest valid chunk is 8 bytes long.
6414             return false;
6415         }
6416 
6417         // (data_offset - offset) is either 8 or 16
6418         off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
6419         if (chunkDataSize < 0) {
6420             ALOGE("b/23540914");
6421             return false;
6422         }
6423 
6424         char chunkstring[5];
6425         MakeFourCCString(chunkType, chunkstring);
6426         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
6427                 chunkstring, chunkSize, (long long)offset);
6428         switch (chunkType) {
6429             case FOURCC("ftyp"):
6430             {
6431                 if (chunkDataSize < 8) {
6432                     return false;
6433                 }
6434 
6435                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
6436                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
6437                     if (i == 1) {
6438                         // Skip this index, it refers to the minorVersion,
6439                         // not a brand.
6440                         continue;
6441                     }
6442 
6443                     uint32_t brand;
6444                     if (source->readAt(
6445                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
6446                         return false;
6447                     }
6448 
6449                     brand = ntohl(brand);
6450 
6451                     if (isCompatibleBrand(brand)) {
6452                         foundGoodFileType = true;
6453                         break;
6454                     }
6455                 }
6456 
6457                 if (!foundGoodFileType) {
6458                     return false;
6459                 }
6460 
6461                 break;
6462             }
6463 
6464             case FOURCC("moov"):
6465             {
6466                 moovAtomEndOffset = offset + chunkSize;
6467 
6468                 done = true;
6469                 break;
6470             }
6471 
6472             default:
6473                 break;
6474         }
6475 
6476         offset += chunkSize;
6477     }
6478 
6479     if (!foundGoodFileType) {
6480         return false;
6481     }
6482 
6483     *confidence = 0.4f;
6484 
6485     return true;
6486 }
6487 
CreateExtractor(CDataSource * source,void *)6488 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
6489     return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
6490 }
6491 
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)6492 static CreatorFunc Sniff(
6493         CDataSource *source, float *confidence, void **,
6494         FreeMetaFunc *) {
6495     DataSourceHelper helper(source);
6496     if (BetterSniffMPEG4(&helper, confidence)) {
6497         return CreateExtractor;
6498     }
6499 
6500     if (LegacySniffMPEG4(&helper, confidence)) {
6501         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
6502         return CreateExtractor;
6503     }
6504 
6505     return NULL;
6506 }
6507 
6508 static const char *extensions[] = {
6509     "3g2",
6510     "3ga",
6511     "3gp",
6512     "3gpp",
6513     "3gpp2",
6514     "m4a",
6515     "m4r",
6516     "m4v",
6517     "mov",
6518     "mp4",
6519     "qt",
6520     NULL
6521 };
6522 
6523 extern "C" {
6524 // This is the only symbol that needs to be exported
6525 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()6526 ExtractorDef GETEXTRACTORDEF() {
6527     return {
6528         EXTRACTORDEF_VERSION,
6529         UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
6530         2, // version
6531         "MP4 Extractor",
6532         { .v3 = {Sniff, extensions} },
6533     };
6534 }
6535 
6536 } // extern "C"
6537 
6538 }  // namespace android
6539