1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <log/log.h>
30 #include <utils/Log.h>
31
32 #include "AC4Parser.h"
33 #include "MPEG4Extractor.h"
34 #include "SampleTable.h"
35 #include "ItemTable.h"
36
37 #include <ESDS.h>
38 #include <ID3.h>
39 #include <media/stagefright/DataSourceBase.h>
40 #include <media/ExtractorUtils.h>
41 #include <media/stagefright/foundation/ABitReader.h>
42 #include <media/stagefright/foundation/ABuffer.h>
43 #include <media/stagefright/foundation/ADebug.h>
44 #include <media/stagefright/foundation/AMessage.h>
45 #include <media/stagefright/foundation/AudioPresentationInfo.h>
46 #include <media/stagefright/foundation/AUtils.h>
47 #include <media/stagefright/foundation/ByteUtils.h>
48 #include <media/stagefright/foundation/ColorUtils.h>
49 #include <media/stagefright/foundation/avc_utils.h>
50 #include <media/stagefright/foundation/hexdump.h>
51 #include <media/stagefright/foundation/OpusHeader.h>
52 #include <media/stagefright/MediaBufferGroup.h>
53 #include <media/stagefright/MediaDefs.h>
54 #include <media/stagefright/MetaDataBase.h>
55 #include <utils/String8.h>
56
57 #include <byteswap.h>
58
59 #ifndef UINT32_MAX
60 #define UINT32_MAX (4294967295U)
61 #endif
62
63 #define ALAC_SPECIFIC_INFO_SIZE (36)
64
65 namespace android {
66
67 enum {
68 // max track header chunk to return
69 kMaxTrackHeaderSize = 32,
70
71 // maximum size of an atom. Some atoms can be bigger according to the spec,
72 // but we only allow up to this size.
73 kMaxAtomSize = 64 * 1024 * 1024,
74 };
75
76 class MPEG4Source : public MediaTrackHelper {
77 static const size_t kMaxPcmFrameSize = 8192;
78 public:
79 // Caller retains ownership of both "dataSource" and "sampleTable".
80 MPEG4Source(AMediaFormat *format,
81 DataSourceHelper *dataSource,
82 int32_t timeScale,
83 const sp<SampleTable> &sampleTable,
84 Vector<SidxEntry> &sidx,
85 const Trex *trex,
86 off64_t firstMoofOffset,
87 const sp<ItemTable> &itemTable,
88 uint64_t elstShiftStartTicks,
89 uint64_t elstInitialEmptyEditTicks);
90 virtual status_t init();
91
92 virtual media_status_t start();
93 virtual media_status_t stop();
94
95 virtual media_status_t getFormat(AMediaFormat *);
96
97 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()98 bool supportsNonBlockingRead() override { return true; }
99 virtual media_status_t fragmentedRead(
100 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
101
102 virtual ~MPEG4Source();
103
104 private:
105 Mutex mLock;
106
107 AMediaFormat *mFormat;
108 DataSourceHelper *mDataSource;
109 int32_t mTimescale;
110 sp<SampleTable> mSampleTable;
111 uint32_t mCurrentSampleIndex;
112 uint32_t mCurrentFragmentIndex;
113 Vector<SidxEntry> &mSegments;
114 const Trex *mTrex;
115 off64_t mFirstMoofOffset;
116 off64_t mCurrentMoofOffset;
117 off64_t mCurrentMoofSize;
118 off64_t mNextMoofOffset;
119 uint32_t mCurrentTime; // in media timescale ticks
120 int32_t mLastParsedTrackId;
121 int32_t mTrackId;
122
123 int32_t mCryptoMode; // passed in from extractor
124 int32_t mDefaultIVSize; // passed in from extractor
125 uint8_t mCryptoKey[16]; // passed in from extractor
126 int32_t mDefaultEncryptedByteBlock;
127 int32_t mDefaultSkipByteBlock;
128 uint32_t mCurrentAuxInfoType;
129 uint32_t mCurrentAuxInfoTypeParameter;
130 int32_t mCurrentDefaultSampleInfoSize;
131 uint32_t mCurrentSampleInfoCount;
132 uint32_t mCurrentSampleInfoAllocSize;
133 uint8_t* mCurrentSampleInfoSizes;
134 uint32_t mCurrentSampleInfoOffsetCount;
135 uint32_t mCurrentSampleInfoOffsetsAllocSize;
136 uint64_t* mCurrentSampleInfoOffsets;
137
138 bool mIsAVC;
139 bool mIsHEVC;
140 bool mIsDolbyVision;
141 bool mIsAC4;
142 bool mIsPcm;
143 size_t mNALLengthSize;
144
145 bool mStarted;
146
147 MediaBufferHelper *mBuffer;
148
149 uint8_t *mSrcBuffer;
150
151 bool mIsHeif;
152 bool mIsAudio;
153 bool mIsUsac = false;
154 sp<ItemTable> mItemTable;
155
156 /* Shift start offset (move to earlier time) when media_time > 0,
157 * in media time scale.
158 */
159 uint64_t mElstShiftStartTicks;
160 /* Initial start offset (move to later time), empty edit list entry
161 * in media time scale.
162 */
163 uint64_t mElstInitialEmptyEditTicks;
164
165 size_t parseNALSize(const uint8_t *data) const;
166 status_t parseChunk(off64_t *offset);
167 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
168 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
169 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
170 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
171 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
172 uint32_t flags, off64_t size);
173 status_t parseSampleEncryption(off64_t offset, off64_t size);
174 // returns -1 for invalid layer ID
175 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
176
177 struct TrackFragmentHeaderInfo {
178 enum Flags {
179 kBaseDataOffsetPresent = 0x01,
180 kSampleDescriptionIndexPresent = 0x02,
181 kDefaultSampleDurationPresent = 0x08,
182 kDefaultSampleSizePresent = 0x10,
183 kDefaultSampleFlagsPresent = 0x20,
184 kDurationIsEmpty = 0x10000,
185 };
186
187 uint32_t mTrackID;
188 uint32_t mFlags;
189 uint64_t mBaseDataOffset;
190 uint32_t mSampleDescriptionIndex;
191 uint32_t mDefaultSampleDuration;
192 uint32_t mDefaultSampleSize;
193 uint32_t mDefaultSampleFlags;
194
195 uint64_t mDataOffset;
196 };
197 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
198
199 struct Sample {
200 off64_t offset;
201 size_t size;
202 uint32_t duration;
203 int32_t compositionOffset;
204 uint8_t iv[16];
205 Vector<size_t> clearsizes;
206 Vector<size_t> encryptedsizes;
207 };
208 Vector<Sample> mCurrentSamples;
209 std::map<off64_t, uint32_t> mDrmOffsets;
210
211 MPEG4Source(const MPEG4Source &);
212 MPEG4Source &operator=(const MPEG4Source &);
213 };
214
215 // This custom data source wraps an existing one and satisfies requests
216 // falling entirely within a cached range from the cache while forwarding
217 // all remaining requests to the wrapped datasource.
218 // This is used to cache the full sampletable metadata for a single track,
219 // possibly wrapping multiple times to cover all tracks, i.e.
220 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
221
222 class CachedRangedDataSource : public DataSourceHelper {
223 public:
224 explicit CachedRangedDataSource(DataSourceHelper *source);
225 virtual ~CachedRangedDataSource();
226
227 ssize_t readAt(off64_t offset, void *data, size_t size) override;
228 status_t getSize(off64_t *size) override;
229 uint32_t flags() override;
230
231 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
232
233
234 private:
235 Mutex mLock;
236
237 DataSourceHelper *mSource;
238 bool mOwnsDataSource;
239 off64_t mCachedOffset;
240 size_t mCachedSize;
241 uint8_t *mCache;
242
243 void clearCache();
244
245 CachedRangedDataSource(const CachedRangedDataSource &);
246 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
247 };
248
CachedRangedDataSource(DataSourceHelper * source)249 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
250 : DataSourceHelper(source),
251 mSource(source),
252 mOwnsDataSource(false),
253 mCachedOffset(0),
254 mCachedSize(0),
255 mCache(NULL) {
256 }
257
~CachedRangedDataSource()258 CachedRangedDataSource::~CachedRangedDataSource() {
259 clearCache();
260 if (mOwnsDataSource) {
261 delete mSource;
262 }
263 }
264
clearCache()265 void CachedRangedDataSource::clearCache() {
266 if (mCache) {
267 free(mCache);
268 mCache = NULL;
269 }
270
271 mCachedOffset = 0;
272 mCachedSize = 0;
273 }
274
readAt(off64_t offset,void * data,size_t size)275 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
276 Mutex::Autolock autoLock(mLock);
277
278 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
279 memcpy(data, &mCache[offset - mCachedOffset], size);
280 return size;
281 }
282
283 return mSource->readAt(offset, data, size);
284 }
285
getSize(off64_t * size)286 status_t CachedRangedDataSource::getSize(off64_t *size) {
287 return mSource->getSize(size);
288 }
289
flags()290 uint32_t CachedRangedDataSource::flags() {
291 return mSource->flags();
292 }
293
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)294 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
295 size_t size,
296 bool assumeSourceOwnershipOnSuccess) {
297 Mutex::Autolock autoLock(mLock);
298
299 clearCache();
300
301 mCache = (uint8_t *)malloc(size);
302
303 if (mCache == NULL) {
304 return -ENOMEM;
305 }
306
307 mCachedOffset = offset;
308 mCachedSize = size;
309
310 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
311
312 if (err < (ssize_t)size) {
313 clearCache();
314
315 return ERROR_IO;
316 }
317 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
318 return OK;
319 }
320
321 ////////////////////////////////////////////////////////////////////////////////
322
323 static const bool kUseHexDump = false;
324
FourCC2MIME(uint32_t fourcc)325 static const char *FourCC2MIME(uint32_t fourcc) {
326 switch (fourcc) {
327 case FOURCC("mp4a"):
328 return MEDIA_MIMETYPE_AUDIO_AAC;
329
330 case FOURCC("samr"):
331 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
332
333 case FOURCC("sawb"):
334 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
335
336 case FOURCC("ec-3"):
337 return MEDIA_MIMETYPE_AUDIO_EAC3;
338
339 case FOURCC("mp4v"):
340 return MEDIA_MIMETYPE_VIDEO_MPEG4;
341
342 case FOURCC("s263"):
343 case FOURCC("h263"):
344 case FOURCC("H263"):
345 return MEDIA_MIMETYPE_VIDEO_H263;
346
347 case FOURCC("avc1"):
348 return MEDIA_MIMETYPE_VIDEO_AVC;
349
350 case FOURCC("hvc1"):
351 case FOURCC("hev1"):
352 return MEDIA_MIMETYPE_VIDEO_HEVC;
353
354 case FOURCC("dvav"):
355 case FOURCC("dva1"):
356 case FOURCC("dvhe"):
357 case FOURCC("dvh1"):
358 case FOURCC("dav1"):
359 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
360
361 case FOURCC("ac-4"):
362 return MEDIA_MIMETYPE_AUDIO_AC4;
363 case FOURCC("Opus"):
364 return MEDIA_MIMETYPE_AUDIO_OPUS;
365
366 case FOURCC("twos"):
367 case FOURCC("sowt"):
368 return MEDIA_MIMETYPE_AUDIO_RAW;
369 case FOURCC("alac"):
370 return MEDIA_MIMETYPE_AUDIO_ALAC;
371 case FOURCC("fLaC"):
372 return MEDIA_MIMETYPE_AUDIO_FLAC;
373 case FOURCC("av01"):
374 return MEDIA_MIMETYPE_VIDEO_AV1;
375 case FOURCC(".mp3"):
376 case 0x6D730055: // "ms U" mp3 audio
377 return MEDIA_MIMETYPE_AUDIO_MPEG;
378 default:
379 ALOGW("Unknown fourcc: %c%c%c%c",
380 (fourcc >> 24) & 0xff,
381 (fourcc >> 16) & 0xff,
382 (fourcc >> 8) & 0xff,
383 fourcc & 0xff
384 );
385 return "application/octet-stream";
386 }
387 }
388
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)389 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
390 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
391 // AMR NB audio is always mono, 8kHz
392 *channels = 1;
393 *rate = 8000;
394 return true;
395 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
396 // AMR WB audio is always mono, 16kHz
397 *channels = 1;
398 *rate = 16000;
399 return true;
400 }
401 return false;
402 }
403
MPEG4Extractor(DataSourceHelper * source,const char * mime)404 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
405 : mMoofOffset(0),
406 mMoofFound(false),
407 mMdatFound(false),
408 mDataSource(source),
409 mInitCheck(NO_INIT),
410 mHeaderTimescale(0),
411 mIsQT(false),
412 mIsHeif(false),
413 mHasMoovBox(false),
414 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
415 mFirstTrack(NULL),
416 mLastTrack(NULL) {
417 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
418 mFileMetaData = AMediaFormat_new();
419 }
420
~MPEG4Extractor()421 MPEG4Extractor::~MPEG4Extractor() {
422 Track *track = mFirstTrack;
423 while (track) {
424 Track *next = track->next;
425
426 delete track;
427 track = next;
428 }
429 mFirstTrack = mLastTrack = NULL;
430
431 for (size_t i = 0; i < mPssh.size(); i++) {
432 delete [] mPssh[i].data;
433 }
434 mPssh.clear();
435
436 delete mDataSource;
437 AMediaFormat_delete(mFileMetaData);
438 }
439
flags() const440 uint32_t MPEG4Extractor::flags() const {
441 return CAN_PAUSE |
442 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
443 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
444 }
445
getMetaData(AMediaFormat * meta)446 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
447 status_t err;
448 if ((err = readMetaData()) != OK) {
449 return AMEDIA_ERROR_UNKNOWN;
450 }
451 AMediaFormat_copy(meta, mFileMetaData);
452 return AMEDIA_OK;
453 }
454
countTracks()455 size_t MPEG4Extractor::countTracks() {
456 status_t err;
457 if ((err = readMetaData()) != OK) {
458 ALOGV("MPEG4Extractor::countTracks: no tracks");
459 return 0;
460 }
461
462 size_t n = 0;
463 Track *track = mFirstTrack;
464 while (track) {
465 ++n;
466 track = track->next;
467 }
468
469 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
470 return n;
471 }
472
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)473 media_status_t MPEG4Extractor::getTrackMetaData(
474 AMediaFormat *meta,
475 size_t index, uint32_t flags) {
476 status_t err;
477 if ((err = readMetaData()) != OK) {
478 return AMEDIA_ERROR_UNKNOWN;
479 }
480
481 Track *track = mFirstTrack;
482 while (index > 0) {
483 if (track == NULL) {
484 return AMEDIA_ERROR_UNKNOWN;
485 }
486
487 track = track->next;
488 --index;
489 }
490
491 if (track == NULL) {
492 return AMEDIA_ERROR_UNKNOWN;
493 }
494
495 [=] {
496 int64_t duration;
497 int32_t samplerate;
498 // Only for audio track.
499 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
500 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
501 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
502 // Elst has to be processed only the first time this function is called.
503 track->elst_needs_processing = false;
504
505 if (track->elst_segment_duration > INT64_MAX) {
506 return;
507 }
508 int64_t segment_duration = track->elst_segment_duration;
509 int64_t media_time = track->elst_media_time;
510 int64_t halfscale = track->timescale / 2;
511
512 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
513 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
514 segment_duration, media_time,
515 halfscale, mHeaderTimescale, track->timescale);
516
517 if ((uint32_t)samplerate != track->timescale){
518 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
519 samplerate);
520 }
521 // Both delay and paddingsamples have to be set inorder for either to be
522 // effective in the lower layers.
523 int64_t delay = 0;
524 if (media_time > 0) { // Gapless playback
525 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
526 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
527 __builtin_add_overflow(delay, halfscale, &delay) ||
528 (delay /= track->timescale, false) ||
529 delay > INT32_MAX ||
530 delay < INT32_MIN) {
531 ALOGW("ignoring edit list with bogus values");
532 return;
533 }
534 }
535 ALOGV("delay = %" PRId64, delay);
536 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
537
538 int64_t paddingsamples = 0;
539 if (segment_duration > 0) {
540 int64_t scaled_duration;
541 // scaled_duration = duration * mHeaderTimescale;
542 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
543 return;
544 }
545 ALOGV("scaled_duration = %" PRId64, scaled_duration);
546
547 int64_t segment_end;
548 int64_t padding;
549 int64_t segment_duration_e6;
550 int64_t media_time_scaled_e6;
551 int64_t media_time_scaled;
552 // padding = scaled_duration - ((segment_duration * 1000000) +
553 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
554 // segment_duration is based on timescale in movie header box(mdhd)
555 // media_time is based on timescale track header/media timescale
556 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
557 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
558 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
559 return;
560 }
561 media_time_scaled_e6 /= track->timescale;
562 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
563 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
564 return;
565 }
566 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
567 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
568 // might be slightly shorter than the segment duration, which would make the
569 // padding negative. Clamp to zero.
570 if (padding > 0) {
571 int64_t halfscale_mht = mHeaderTimescale / 2;
572 int64_t halfscale_e6;
573 int64_t timescale_e6;
574 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
575 // / (mHeaderTimescale * 1000000);
576 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
577 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
578 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
579 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
580 (paddingsamples /= timescale_e6, false) ||
581 paddingsamples > INT32_MAX) {
582 return;
583 }
584 }
585 }
586 ALOGV("paddingsamples = %" PRId64, paddingsamples);
587 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
588 }
589 }();
590
591 if ((flags & kIncludeExtensiveMetaData)
592 && !track->includes_expensive_metadata) {
593 track->includes_expensive_metadata = true;
594
595 const char *mime;
596 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
597 if (!strncasecmp("video/", mime, 6)) {
598 // MPEG2 tracks do not provide CSD, so read the stream header
599 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
600 off64_t offset;
601 size_t size;
602 if (track->sampleTable->getMetaDataForSample(
603 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
604 if (size > kMaxTrackHeaderSize) {
605 size = kMaxTrackHeaderSize;
606 }
607 uint8_t header[kMaxTrackHeaderSize];
608 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
609 AMediaFormat_setBuffer(track->meta,
610 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
611 }
612 }
613 }
614
615 if (mMoofOffset > 0) {
616 int64_t duration;
617 if (AMediaFormat_getInt64(track->meta,
618 AMEDIAFORMAT_KEY_DURATION, &duration)) {
619 // nothing fancy, just pick a frame near 1/4th of the duration
620 AMediaFormat_setInt64(track->meta,
621 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
622 }
623 } else {
624 uint32_t sampleIndex;
625 uint64_t sampleTime;
626 if (track->timescale != 0 &&
627 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
628 && track->sampleTable->getMetaDataForSample(
629 sampleIndex, NULL /* offset */, NULL /* size */,
630 &sampleTime) == OK) {
631 AMediaFormat_setInt64(track->meta,
632 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
633 ((int64_t)sampleTime * 1000000) / track->timescale);
634 }
635 }
636 }
637 }
638
639 AMediaFormat_copy(meta, track->meta);
640 return AMEDIA_OK;
641 }
642
readMetaData()643 status_t MPEG4Extractor::readMetaData() {
644 if (mInitCheck != NO_INIT) {
645 return mInitCheck;
646 }
647
648 off64_t offset = 0;
649 status_t err;
650 bool sawMoovOrSidx = false;
651
652 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
653 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
654 (mItemTable != NULL) && mItemTable->isValid()))) {
655 off64_t orig_offset = offset;
656 err = parseChunk(&offset, 0);
657
658 if (err != OK && err != UNKNOWN_ERROR) {
659 break;
660 } else if (offset <= orig_offset) {
661 // only continue parsing if the offset was advanced,
662 // otherwise we might end up in an infinite loop
663 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
664 err = ERROR_MALFORMED;
665 break;
666 } else if (err == UNKNOWN_ERROR) {
667 sawMoovOrSidx = true;
668 }
669 }
670
671 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
672 off64_t exifOffset;
673 size_t exifSize;
674 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
675 AMediaFormat_setInt64(mFileMetaData,
676 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
677 AMediaFormat_setInt64(mFileMetaData,
678 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
679 }
680 for (uint32_t imageIndex = 0;
681 imageIndex < mItemTable->countImages(); imageIndex++) {
682 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
683 if (meta == NULL) {
684 ALOGE("heif image %u has no meta!", imageIndex);
685 continue;
686 }
687 // Some heif files advertise image sequence brands (eg. 'hevc') in
688 // ftyp box, but don't have any valid tracks in them. Instead of
689 // reporting the entire file as malformed, we override the error
690 // to allow still images to be extracted.
691 if (err != OK) {
692 ALOGW("Extracting still images only");
693 err = OK;
694 }
695 mInitCheck = OK;
696
697 ALOGV("adding HEIF image track %u", imageIndex);
698 Track *track = new Track;
699 if (mLastTrack != NULL) {
700 mLastTrack->next = track;
701 } else {
702 mFirstTrack = track;
703 }
704 mLastTrack = track;
705
706 track->meta = meta;
707 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
708 track->timescale = 1000000;
709 }
710 }
711
712 if (mInitCheck == OK) {
713 if (findTrackByMimePrefix("video/") != NULL) {
714 AMediaFormat_setString(mFileMetaData,
715 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
716 } else if (findTrackByMimePrefix("audio/") != NULL) {
717 AMediaFormat_setString(mFileMetaData,
718 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
719 } else if (findTrackByMimePrefix(
720 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
721 AMediaFormat_setString(mFileMetaData,
722 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
723 } else {
724 AMediaFormat_setString(mFileMetaData,
725 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
726 }
727 } else {
728 mInitCheck = err;
729 }
730
731 CHECK_NE(err, (status_t)NO_INIT);
732
733 // copy pssh data into file metadata
734 uint64_t psshsize = 0;
735 for (size_t i = 0; i < mPssh.size(); i++) {
736 psshsize += 20 + mPssh[i].datalen;
737 }
738 if (psshsize > 0 && psshsize <= UINT32_MAX) {
739 char *buf = (char*)malloc(psshsize);
740 if (!buf) {
741 ALOGE("b/28471206");
742 return NO_MEMORY;
743 }
744 char *ptr = buf;
745 for (size_t i = 0; i < mPssh.size(); i++) {
746 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
747 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
748 ptr += (20 + mPssh[i].datalen);
749 }
750 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
751 free(buf);
752 }
753
754 return mInitCheck;
755 }
756
757 struct PathAdder {
PathAdderandroid::PathAdder758 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
759 : mPath(path) {
760 mPath->push(chunkType);
761 }
762
~PathAdderandroid::PathAdder763 ~PathAdder() {
764 mPath->pop();
765 }
766
767 private:
768 Vector<uint32_t> *mPath;
769
770 PathAdder(const PathAdder &);
771 PathAdder &operator=(const PathAdder &);
772 };
773
underMetaDataPath(const Vector<uint32_t> & path)774 static bool underMetaDataPath(const Vector<uint32_t> &path) {
775 return path.size() >= 5
776 && path[0] == FOURCC("moov")
777 && path[1] == FOURCC("udta")
778 && path[2] == FOURCC("meta")
779 && path[3] == FOURCC("ilst");
780 }
781
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)782 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
783 return path.size() >= 2
784 && path[0] == FOURCC("moov")
785 && path[1] == FOURCC("meta")
786 && (depth == 2
787 || (depth == 3
788 && (path[2] == FOURCC("hdlr")
789 || path[2] == FOURCC("ilst")
790 || path[2] == FOURCC("keys"))));
791 }
792
793 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)794 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
795 // delta between mpeg4 time and unix epoch time
796 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
797 if (time_1904 < INT64_MIN + delta) {
798 return false;
799 }
800 time_t time_1970 = time_1904 - delta;
801
802 char tmp[32];
803 struct tm* tm = gmtime(&time_1970);
804 if (tm != NULL &&
805 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
806 s->setTo(tmp);
807 return true;
808 }
809 return false;
810 }
811
parseChunk(off64_t * offset,int depth)812 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
813 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
814
815 if (*offset < 0) {
816 ALOGE("b/23540914");
817 return ERROR_MALFORMED;
818 }
819 if (depth > 100) {
820 ALOGE("b/27456299");
821 return ERROR_MALFORMED;
822 }
823 uint32_t hdr[2];
824 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
825 return ERROR_IO;
826 }
827 uint64_t chunk_size = ntohl(hdr[0]);
828 int32_t chunk_type = ntohl(hdr[1]);
829 off64_t data_offset = *offset + 8;
830
831 if (chunk_size == 1) {
832 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
833 return ERROR_IO;
834 }
835 chunk_size = ntoh64(chunk_size);
836 data_offset += 8;
837
838 if (chunk_size < 16) {
839 // The smallest valid chunk is 16 bytes long in this case.
840 return ERROR_MALFORMED;
841 }
842 } else if (chunk_size == 0) {
843 if (depth == 0) {
844 // atom extends to end of file
845 off64_t sourceSize;
846 if (mDataSource->getSize(&sourceSize) == OK) {
847 chunk_size = (sourceSize - *offset);
848 } else {
849 // XXX could we just pick a "sufficiently large" value here?
850 ALOGE("atom size is 0, and data source has no size");
851 return ERROR_MALFORMED;
852 }
853 } else {
854 // not allowed for non-toplevel atoms, skip it
855 *offset += 4;
856 return OK;
857 }
858 } else if (chunk_size < 8) {
859 // The smallest valid chunk is 8 bytes long.
860 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
861 return ERROR_MALFORMED;
862 }
863
864 char chunk[5];
865 MakeFourCCString(chunk_type, chunk);
866 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
867
868 if (kUseHexDump) {
869 static const char kWhitespace[] = " ";
870 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
871 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
872
873 char buffer[256];
874 size_t n = chunk_size;
875 if (n > sizeof(buffer)) {
876 n = sizeof(buffer);
877 }
878 if (mDataSource->readAt(*offset, buffer, n)
879 < (ssize_t)n) {
880 return ERROR_IO;
881 }
882
883 hexdump(buffer, n);
884 }
885
886 PathAdder autoAdder(&mPath, chunk_type);
887
888 // (data_offset - *offset) is either 8 or 16
889 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
890 if (chunk_data_size < 0) {
891 ALOGE("b/23540914");
892 return ERROR_MALFORMED;
893 }
894 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
895 char errMsg[100];
896 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
897 ALOGE("%s (b/28615448)", errMsg);
898 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
899 return ERROR_MALFORMED;
900 }
901
902 if (chunk_type != FOURCC("cprt")
903 && chunk_type != FOURCC("covr")
904 && mPath.size() == 5 && underMetaDataPath(mPath)) {
905 off64_t stop_offset = *offset + chunk_size;
906 *offset = data_offset;
907 while (*offset < stop_offset) {
908 status_t err = parseChunk(offset, depth + 1);
909 if (err != OK) {
910 return err;
911 }
912 }
913
914 if (*offset != stop_offset) {
915 return ERROR_MALFORMED;
916 }
917
918 return OK;
919 }
920
921 switch(chunk_type) {
922 case FOURCC("moov"):
923 case FOURCC("trak"):
924 case FOURCC("mdia"):
925 case FOURCC("minf"):
926 case FOURCC("dinf"):
927 case FOURCC("stbl"):
928 case FOURCC("mvex"):
929 case FOURCC("moof"):
930 case FOURCC("traf"):
931 case FOURCC("mfra"):
932 case FOURCC("udta"):
933 case FOURCC("ilst"):
934 case FOURCC("sinf"):
935 case FOURCC("schi"):
936 case FOURCC("edts"):
937 case FOURCC("wave"):
938 {
939 if (chunk_type == FOURCC("moov") && depth != 0) {
940 ALOGE("moov: depth %d", depth);
941 return ERROR_MALFORMED;
942 }
943
944 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
945 ALOGE("duplicate moov");
946 return ERROR_MALFORMED;
947 }
948
949 if (chunk_type == FOURCC("moof") && !mMoofFound) {
950 // store the offset of the first segment
951 mMoofFound = true;
952 mMoofOffset = *offset;
953 }
954
955 if (chunk_type == FOURCC("stbl")) {
956 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
957
958 if (mDataSource->flags()
959 & (DataSourceBase::kWantsPrefetching
960 | DataSourceBase::kIsCachingDataSource)) {
961 CachedRangedDataSource *cachedSource =
962 new CachedRangedDataSource(mDataSource);
963
964 if (cachedSource->setCachedRange(
965 *offset, chunk_size,
966 true /* assume ownership on success */) == OK) {
967 mDataSource = cachedSource;
968 } else {
969 delete cachedSource;
970 }
971 }
972
973 if (mLastTrack == NULL) {
974 return ERROR_MALFORMED;
975 }
976
977 mLastTrack->sampleTable = new SampleTable(mDataSource);
978 }
979
980 bool isTrack = false;
981 if (chunk_type == FOURCC("trak")) {
982 if (depth != 1) {
983 ALOGE("trak: depth %d", depth);
984 return ERROR_MALFORMED;
985 }
986 isTrack = true;
987
988 ALOGV("adding new track");
989 Track *track = new Track;
990 if (mLastTrack) {
991 mLastTrack->next = track;
992 } else {
993 mFirstTrack = track;
994 }
995 mLastTrack = track;
996
997 track->meta = AMediaFormat_new();
998 AMediaFormat_setString(track->meta,
999 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1000 }
1001
1002 off64_t stop_offset = *offset + chunk_size;
1003 *offset = data_offset;
1004 while (*offset < stop_offset) {
1005
1006 // pass udata terminate
1007 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1008 // handle the case that udta terminates with terminate code x00000000
1009 // note that 0 terminator is optional and we just handle this case.
1010 uint32_t terminate_code = 1;
1011 mDataSource->readAt(*offset, &terminate_code, 4);
1012 if (0 == terminate_code) {
1013 *offset += 4;
1014 ALOGD("Terminal code for udta");
1015 continue;
1016 } else {
1017 ALOGW("invalid udta Terminal code");
1018 }
1019 }
1020
1021 status_t err = parseChunk(offset, depth + 1);
1022 if (err != OK) {
1023 if (isTrack) {
1024 mLastTrack->skipTrack = true;
1025 break;
1026 }
1027 return err;
1028 }
1029 }
1030
1031 if (*offset != stop_offset) {
1032 return ERROR_MALFORMED;
1033 }
1034
1035 if (isTrack) {
1036 int32_t trackId;
1037 // There must be exactly one track header per track.
1038
1039 if (!AMediaFormat_getInt32(mLastTrack->meta,
1040 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1041 mLastTrack->skipTrack = true;
1042 }
1043
1044 status_t err = verifyTrack(mLastTrack);
1045 if (err != OK) {
1046 mLastTrack->skipTrack = true;
1047 }
1048
1049
1050 if (mLastTrack->skipTrack) {
1051 ALOGV("skipping this track...");
1052 Track *cur = mFirstTrack;
1053
1054 if (cur == mLastTrack) {
1055 delete cur;
1056 mFirstTrack = mLastTrack = NULL;
1057 } else {
1058 while (cur && cur->next != mLastTrack) {
1059 cur = cur->next;
1060 }
1061 if (cur) {
1062 cur->next = NULL;
1063 }
1064 delete mLastTrack;
1065 mLastTrack = cur;
1066 }
1067
1068 return OK;
1069 }
1070
1071 // place things we built elsewhere into their final locations
1072
1073 // put aggregated tx3g data into the metadata
1074 if (mLastTrack->mTx3gFilled > 0) {
1075 ALOGV("Putting %zu bytes of tx3g data into meta data",
1076 mLastTrack->mTx3gFilled);
1077 AMediaFormat_setBuffer(mLastTrack->meta,
1078 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1079 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1080 // drop it now to reduce our footprint
1081 free(mLastTrack->mTx3gBuffer);
1082 mLastTrack->mTx3gBuffer = NULL;
1083 mLastTrack->mTx3gFilled = 0;
1084 mLastTrack->mTx3gSize = 0;
1085 }
1086
1087 const char *mime;
1088 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1089
1090 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1091 void *data;
1092 size_t size;
1093
1094 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
1095 const uint8_t *ptr = (const uint8_t *)data;
1096 const uint8_t profile = ptr[2] >> 1;
1097 const uint8_t bl_compatibility_id = (ptr[4]) >> 4;
1098 bool create_two_tracks = false;
1099
1100 if (bl_compatibility_id && bl_compatibility_id != 15) {
1101 create_two_tracks = true;
1102 }
1103
1104 if (4 == profile || 7 == profile ||
1105 (profile >= 8 && profile < 11 && create_two_tracks)) {
1106 // we need a backward compatible track
1107 ALOGV("Adding new backward compatible track");
1108 Track *track_b = new Track;
1109
1110 track_b->timescale = mLastTrack->timescale;
1111 track_b->sampleTable = mLastTrack->sampleTable;
1112 track_b->includes_expensive_metadata = mLastTrack->includes_expensive_metadata;
1113 track_b->skipTrack = mLastTrack->skipTrack;
1114 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1115 track_b->elst_media_time = mLastTrack->elst_media_time;
1116 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1117 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1118 track_b->elst_initial_empty_edit_ticks = mLastTrack->elst_initial_empty_edit_ticks;
1119 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1120
1121 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1122 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1123 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1124
1125 track_b->meta = AMediaFormat_new();
1126 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1127
1128 mLastTrack->next = track_b;
1129 track_b->next = NULL;
1130
1131 auto id = track_b->meta->mFormat->findEntryByName(AMEDIAFORMAT_KEY_CSD_2);
1132 track_b->meta->mFormat->removeEntryAt(id);
1133
1134 if (4 == profile || 7 == profile || 8 == profile ) {
1135 AMediaFormat_setString(track_b->meta,
1136 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1137 } else if (9 == profile) {
1138 AMediaFormat_setString(track_b->meta,
1139 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1140 } else if (10 == profile) {
1141 AMediaFormat_setString(track_b->meta,
1142 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1143 } // Should never get to else part
1144
1145 mLastTrack = track_b;
1146 }
1147 }
1148 }
1149 } else if (chunk_type == FOURCC("moov")) {
1150 mInitCheck = OK;
1151
1152 return UNKNOWN_ERROR; // Return a dummy error.
1153 }
1154 break;
1155 }
1156
1157 case FOURCC("schm"):
1158 {
1159
1160 *offset += chunk_size;
1161 if (!mLastTrack) {
1162 return ERROR_MALFORMED;
1163 }
1164
1165 uint32_t scheme_type;
1166 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1167 return ERROR_IO;
1168 }
1169 scheme_type = ntohl(scheme_type);
1170 int32_t mode = kCryptoModeUnencrypted;
1171 switch(scheme_type) {
1172 case FOURCC("cbc1"):
1173 {
1174 mode = kCryptoModeAesCbc;
1175 break;
1176 }
1177 case FOURCC("cbcs"):
1178 {
1179 mode = kCryptoModeAesCbc;
1180 mLastTrack->subsample_encryption = true;
1181 break;
1182 }
1183 case FOURCC("cenc"):
1184 {
1185 mode = kCryptoModeAesCtr;
1186 break;
1187 }
1188 case FOURCC("cens"):
1189 {
1190 mode = kCryptoModeAesCtr;
1191 mLastTrack->subsample_encryption = true;
1192 break;
1193 }
1194 }
1195 if (mode != kCryptoModeUnencrypted) {
1196 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1197 }
1198 break;
1199 }
1200
1201
1202 case FOURCC("elst"):
1203 {
1204 *offset += chunk_size;
1205
1206 if (!mLastTrack) {
1207 return ERROR_MALFORMED;
1208 }
1209
1210 // See 14496-12 8.6.6
1211 uint8_t version;
1212 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1213 return ERROR_IO;
1214 }
1215
1216 uint32_t entry_count;
1217 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1218 return ERROR_IO;
1219 }
1220
1221 if (entry_count > 2) {
1222 /* We support a single entry for gapless playback or negating offset for
1223 * reordering B frames, two entries (empty edit) for start offset at the moment.
1224 */
1225 ALOGW("ignoring edit list with %d entries", entry_count);
1226 } else {
1227 off64_t entriesoffset = data_offset + 8;
1228 uint64_t segment_duration;
1229 int64_t media_time;
1230 bool empty_edit_present = false;
1231 for (int i = 0; i < entry_count; ++i) {
1232 switch (version) {
1233 case 0: {
1234 uint32_t sd;
1235 int32_t mt;
1236 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1237 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1238 return ERROR_IO;
1239 }
1240 segment_duration = sd;
1241 media_time = mt;
1242 // 4(segment duration) + 4(media time) + 4(media rate)
1243 entriesoffset += 12;
1244 break;
1245 }
1246 case 1: {
1247 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1248 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1249 return ERROR_IO;
1250 }
1251 // 8(segment duration) + 8(media time) + 4(media rate)
1252 entriesoffset += 20;
1253 break;
1254 }
1255 default:
1256 return ERROR_IO;
1257 break;
1258 }
1259 // Empty edit entry would have to be first entry.
1260 if (media_time == -1 && i == 0) {
1261 empty_edit_present = true;
1262 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1263 /* In movie header timescale, and needs to be converted to media timescale
1264 * after we get that from a track's 'mdhd' atom,
1265 * which at times come after 'elst'.
1266 */
1267 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1268 } else if (media_time >= 0 && i == 0) {
1269 ALOGV("first edit list entry - from gapless playback files");
1270 mLastTrack->elst_media_time = media_time;
1271 mLastTrack->elst_segment_duration = segment_duration;
1272 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1273 segment_duration, media_time);
1274 // media_time is in media timescale as are STTS/CTTS entries.
1275 mLastTrack->elst_shift_start_ticks = media_time;
1276 } else if (empty_edit_present && i == 1) {
1277 // Process second entry only when the first entry was an empty edit entry.
1278 ALOGV("second edit list entry");
1279 mLastTrack->elst_shift_start_ticks = media_time;
1280 } else {
1281 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1282 }
1283 }
1284 // save these for later, because the elst atom might precede
1285 // the atoms that actually gives us the duration and sample rate
1286 // needed to calculate the padding and delay values
1287 mLastTrack->elst_needs_processing = true;
1288 }
1289 break;
1290 }
1291
1292 case FOURCC("frma"):
1293 {
1294 *offset += chunk_size;
1295
1296 uint32_t original_fourcc;
1297 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1298 return ERROR_IO;
1299 }
1300 original_fourcc = ntohl(original_fourcc);
1301 ALOGV("read original format: %d", original_fourcc);
1302
1303 if (mLastTrack == NULL) {
1304 return ERROR_MALFORMED;
1305 }
1306
1307 AMediaFormat_setString(mLastTrack->meta,
1308 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1309 uint32_t num_channels = 0;
1310 uint32_t sample_rate = 0;
1311 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1312 AMediaFormat_setInt32(mLastTrack->meta,
1313 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1314 AMediaFormat_setInt32(mLastTrack->meta,
1315 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1316 }
1317
1318 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1319 off64_t tmpOffset = *offset;
1320 status_t err = parseALACSampleEntry(&tmpOffset);
1321 if (err != OK) {
1322 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1323 return err;
1324 }
1325 *offset = tmpOffset + 8;
1326 }
1327
1328 break;
1329 }
1330
1331 case FOURCC("tenc"):
1332 {
1333 *offset += chunk_size;
1334
1335 if (chunk_size < 32) {
1336 return ERROR_MALFORMED;
1337 }
1338
1339 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1340 // default IV size, 16 bytes default KeyID
1341 // (ISO 23001-7)
1342
1343 uint8_t version;
1344 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1345 < (ssize_t)sizeof(version)) {
1346 return ERROR_IO;
1347 }
1348
1349 uint8_t buf[4];
1350 memset(buf, 0, 4);
1351 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1352 return ERROR_IO;
1353 }
1354
1355 if (mLastTrack == NULL) {
1356 return ERROR_MALFORMED;
1357 }
1358
1359 uint8_t defaultEncryptedByteBlock = 0;
1360 uint8_t defaultSkipByteBlock = 0;
1361 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1362 if (version == 1) {
1363 uint32_t pattern = buf[2];
1364 defaultEncryptedByteBlock = pattern >> 4;
1365 defaultSkipByteBlock = pattern & 0xf;
1366 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1367 // use (1,0) to mean "encrypt everything"
1368 defaultEncryptedByteBlock = 1;
1369 }
1370 } else if (mLastTrack->subsample_encryption) {
1371 ALOGW("subsample_encryption should be version 1");
1372 } else if (defaultAlgorithmId > 1) {
1373 // only 0 (clear) and 1 (AES-128) are valid
1374 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1375 defaultAlgorithmId = 1;
1376 }
1377
1378 memset(buf, 0, 4);
1379 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1380 return ERROR_IO;
1381 }
1382 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1383
1384 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1385 // only unencrypted data must have 0 IV size
1386 return ERROR_MALFORMED;
1387 } else if (defaultIVSize != 0 &&
1388 defaultIVSize != 8 &&
1389 defaultIVSize != 16) {
1390 return ERROR_MALFORMED;
1391 }
1392
1393 uint8_t defaultKeyId[16];
1394
1395 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1396 return ERROR_IO;
1397 }
1398
1399 sp<ABuffer> defaultConstantIv;
1400 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1401
1402 uint8_t ivlength;
1403 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1404 < (ssize_t)sizeof(ivlength)) {
1405 return ERROR_IO;
1406 }
1407
1408 if (ivlength != 8 && ivlength != 16) {
1409 ALOGW("unsupported IV length: %u", ivlength);
1410 return ERROR_MALFORMED;
1411 }
1412
1413 defaultConstantIv = new ABuffer(ivlength);
1414 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1415 < (ssize_t)ivlength) {
1416 return ERROR_IO;
1417 }
1418
1419 defaultConstantIv->setRange(0, ivlength);
1420 }
1421
1422 int32_t tmpAlgorithmId;
1423 if (!AMediaFormat_getInt32(mLastTrack->meta,
1424 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1425 AMediaFormat_setInt32(mLastTrack->meta,
1426 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1427 }
1428
1429 AMediaFormat_setInt32(mLastTrack->meta,
1430 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1431 AMediaFormat_setBuffer(mLastTrack->meta,
1432 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1433 AMediaFormat_setInt32(mLastTrack->meta,
1434 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1435 AMediaFormat_setInt32(mLastTrack->meta,
1436 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1437 if (defaultConstantIv != NULL) {
1438 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1439 defaultConstantIv->data(), defaultConstantIv->size());
1440 }
1441 break;
1442 }
1443
1444 case FOURCC("tkhd"):
1445 {
1446 *offset += chunk_size;
1447
1448 status_t err;
1449 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1450 return err;
1451 }
1452
1453 break;
1454 }
1455
1456 case FOURCC("tref"):
1457 {
1458 off64_t stop_offset = *offset + chunk_size;
1459 *offset = data_offset;
1460 while (*offset < stop_offset) {
1461 status_t err = parseChunk(offset, depth + 1);
1462 if (err != OK) {
1463 return err;
1464 }
1465 }
1466 if (*offset != stop_offset) {
1467 return ERROR_MALFORMED;
1468 }
1469 break;
1470 }
1471
1472 case FOURCC("thmb"):
1473 {
1474 *offset += chunk_size;
1475
1476 if (mLastTrack != NULL) {
1477 // Skip thumbnail track for now since we don't have an
1478 // API to retrieve it yet.
1479 // The thumbnail track can't be accessed by negative index or time,
1480 // because each timed sample has its own corresponding thumbnail
1481 // in the thumbnail track. We'll need a dedicated API to retrieve
1482 // thumbnail at time instead.
1483 mLastTrack->skipTrack = true;
1484 }
1485
1486 break;
1487 }
1488
1489 case FOURCC("pssh"):
1490 {
1491 *offset += chunk_size;
1492
1493 PsshInfo pssh;
1494
1495 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1496 return ERROR_IO;
1497 }
1498
1499 uint32_t psshdatalen = 0;
1500 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1501 return ERROR_IO;
1502 }
1503 pssh.datalen = ntohl(psshdatalen);
1504 ALOGV("pssh data size: %d", pssh.datalen);
1505 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1506 // pssh data length exceeds size of containing box
1507 return ERROR_MALFORMED;
1508 }
1509
1510 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1511 if (pssh.data == NULL) {
1512 return ERROR_MALFORMED;
1513 }
1514 ALOGV("allocated pssh @ %p", pssh.data);
1515 ssize_t requested = (ssize_t) pssh.datalen;
1516 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1517 delete[] pssh.data;
1518 return ERROR_IO;
1519 }
1520 mPssh.push_back(pssh);
1521
1522 break;
1523 }
1524
1525 case FOURCC("mdhd"):
1526 {
1527 *offset += chunk_size;
1528
1529 if (chunk_data_size < 4 || mLastTrack == NULL) {
1530 return ERROR_MALFORMED;
1531 }
1532
1533 uint8_t version;
1534 if (mDataSource->readAt(
1535 data_offset, &version, sizeof(version))
1536 < (ssize_t)sizeof(version)) {
1537 return ERROR_IO;
1538 }
1539
1540 off64_t timescale_offset;
1541
1542 if (version == 1) {
1543 timescale_offset = data_offset + 4 + 16;
1544 } else if (version == 0) {
1545 timescale_offset = data_offset + 4 + 8;
1546 } else {
1547 return ERROR_IO;
1548 }
1549
1550 uint32_t timescale;
1551 if (mDataSource->readAt(
1552 timescale_offset, ×cale, sizeof(timescale))
1553 < (ssize_t)sizeof(timescale)) {
1554 return ERROR_IO;
1555 }
1556
1557 if (!timescale) {
1558 ALOGE("timescale should not be ZERO.");
1559 return ERROR_MALFORMED;
1560 }
1561
1562 mLastTrack->timescale = ntohl(timescale);
1563
1564 // 14496-12 says all ones means indeterminate, but some files seem to use
1565 // 0 instead. We treat both the same.
1566 int64_t duration = 0;
1567 if (version == 1) {
1568 if (mDataSource->readAt(
1569 timescale_offset + 4, &duration, sizeof(duration))
1570 < (ssize_t)sizeof(duration)) {
1571 return ERROR_IO;
1572 }
1573 if (duration != -1) {
1574 duration = ntoh64(duration);
1575 }
1576 } else {
1577 uint32_t duration32;
1578 if (mDataSource->readAt(
1579 timescale_offset + 4, &duration32, sizeof(duration32))
1580 < (ssize_t)sizeof(duration32)) {
1581 return ERROR_IO;
1582 }
1583 if (duration32 != 0xffffffff) {
1584 duration = ntohl(duration32);
1585 }
1586 }
1587 if (duration != 0 && mLastTrack->timescale != 0) {
1588 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1589 if (durationUs < 0 || durationUs > INT64_MAX) {
1590 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1591 (long long) duration, (long long) mLastTrack->timescale);
1592 return ERROR_MALFORMED;
1593 }
1594 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1595 }
1596
1597 uint8_t lang[2];
1598 off64_t lang_offset;
1599 if (version == 1) {
1600 lang_offset = timescale_offset + 4 + 8;
1601 } else if (version == 0) {
1602 lang_offset = timescale_offset + 4 + 4;
1603 } else {
1604 return ERROR_IO;
1605 }
1606
1607 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1608 < (ssize_t)sizeof(lang)) {
1609 return ERROR_IO;
1610 }
1611
1612 // To get the ISO-639-2/T three character language code
1613 // 1 bit pad followed by 3 5-bits characters. Each character
1614 // is packed as the difference between its ASCII value and 0x60.
1615 char lang_code[4];
1616 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1617 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1618 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1619 lang_code[3] = '\0';
1620
1621 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1622
1623 break;
1624 }
1625
1626 case FOURCC("stsd"):
1627 {
1628 uint8_t buffer[8];
1629 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1630 return ERROR_MALFORMED;
1631 }
1632
1633 if (mDataSource->readAt(
1634 data_offset, buffer, 8) < 8) {
1635 return ERROR_IO;
1636 }
1637
1638 if (U32_AT(buffer) != 0) {
1639 // Should be version 0, flags 0.
1640 return ERROR_MALFORMED;
1641 }
1642
1643 uint32_t entry_count = U32_AT(&buffer[4]);
1644
1645 if (entry_count > 1) {
1646 // For 3GPP timed text, there could be multiple tx3g boxes contain
1647 // multiple text display formats. These formats will be used to
1648 // display the timed text.
1649 // For encrypted files, there may also be more than one entry.
1650 const char *mime;
1651
1652 if (mLastTrack == NULL)
1653 return ERROR_MALFORMED;
1654
1655 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1656 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1657 strcasecmp(mime, "application/octet-stream")) {
1658 // For now we only support a single type of media per track.
1659 mLastTrack->skipTrack = true;
1660 *offset += chunk_size;
1661 break;
1662 }
1663 }
1664 off64_t stop_offset = *offset + chunk_size;
1665 *offset = data_offset + 8;
1666 for (uint32_t i = 0; i < entry_count; ++i) {
1667 status_t err = parseChunk(offset, depth + 1);
1668 if (err != OK) {
1669 return err;
1670 }
1671 }
1672
1673 if (*offset != stop_offset) {
1674 return ERROR_MALFORMED;
1675 }
1676 break;
1677 }
1678 case FOURCC("mett"):
1679 {
1680 *offset += chunk_size;
1681
1682 // the absolute minimum size of a compliant mett box is 11 bytes:
1683 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1684 // The resulting mime_format would be invalid at that size though.
1685 if (mLastTrack == NULL || chunk_data_size < 11) {
1686 return ERROR_MALFORMED;
1687 }
1688
1689 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1690 if (buffer.get() == NULL) {
1691 return NO_MEMORY;
1692 }
1693
1694 if (mDataSource->readAt(
1695 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1696 return ERROR_IO;
1697 }
1698
1699 // ISO-14496-12:
1700 // int8 reserved[6]; // should be all zeroes
1701 // int16_t data_reference_index;
1702 // char content_encoding[]; // null terminated, optional (= just the null byte)
1703 // char mime_format[]; // null terminated, mandatory
1704 // optional other boxes
1705 //
1706 // API < 29:
1707 // char mime_format[]; // null terminated
1708 //
1709 // API >= 29
1710 // char mime_format[]; // null terminated
1711 // char mime_format[]; // null terminated
1712
1713 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1714 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1715 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1716 // make it somewhat compatible with the standard. The workaround is to write the
1717 // null-terminated mime_format string twice. This allows compliant parsers to
1718 // read the missing reserved, data_reference_index, and content_encoding fields
1719 // from the first mime_type string. The actual mime_format field would then be
1720 // read correctly from the second string. The non-compliant Android frameworks
1721 // from API 28 and earlier would still be able to read the mime_format correctly
1722 // as it would only read the first null-terminated mime_format string. To enable
1723 // reading metadata tracks generated from both the non-compliant and compliant
1724 // formats, a check needs to be done to see which format is used.
1725 const char *str = (const char*) buffer.get();
1726 size_t string_length = strnlen(str, chunk_data_size);
1727
1728 if (string_length == chunk_data_size - 1) {
1729 // This is likely a pre API 29 file, since it's a single null terminated
1730 // string filling the entire box.
1731 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1732 } else {
1733 // This might be a fully compliant metadata track, a "double mime" compatibility
1734 // track, or anything else, including a single non-terminated string, so we need
1735 // to determine the length of each string we want to parse out of the box.
1736 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1737 if (encoding_length + 8 >= chunk_data_size - 2) {
1738 // the encoding extends to the end of the box, so there's no mime_format
1739 return ERROR_MALFORMED;
1740 }
1741 String8 contentEncoding(str + 8, encoding_length);
1742 String8 mimeFormat(str + 8 + encoding_length + 1,
1743 chunk_data_size - 8 - encoding_length - 1);
1744 AMediaFormat_setString(mLastTrack->meta,
1745 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1746 }
1747 break;
1748 }
1749
1750 case FOURCC("mp4a"):
1751 case FOURCC("enca"):
1752 case FOURCC("samr"):
1753 case FOURCC("sawb"):
1754 case FOURCC("Opus"):
1755 case FOURCC("twos"):
1756 case FOURCC("sowt"):
1757 case FOURCC("alac"):
1758 case FOURCC("fLaC"):
1759 case FOURCC(".mp3"):
1760 case 0x6D730055: // "ms U" mp3 audio
1761 {
1762 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1763
1764 if (chunk_type == FOURCC("alac")) {
1765 off64_t offsetTmp = *offset;
1766 status_t err = parseALACSampleEntry(&offsetTmp);
1767 if (err != OK) {
1768 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1769 return err;
1770 }
1771 }
1772
1773 // Ignore all atoms embedded in QT wave atom
1774 ALOGV("Ignore all atoms embedded in QT wave atom");
1775 *offset += chunk_size;
1776 break;
1777 }
1778
1779 uint8_t buffer[8 + 20];
1780 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1781 // Basic AudioSampleEntry size.
1782 return ERROR_MALFORMED;
1783 }
1784
1785 if (mDataSource->readAt(
1786 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1787 return ERROR_IO;
1788 }
1789
1790 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1791 uint16_t version = U16_AT(&buffer[8]);
1792 uint32_t num_channels = U16_AT(&buffer[16]);
1793
1794 uint16_t sample_size = U16_AT(&buffer[18]);
1795 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1796
1797 if (mLastTrack == NULL)
1798 return ERROR_MALFORMED;
1799
1800 off64_t stop_offset = *offset + chunk_size;
1801 *offset = data_offset + sizeof(buffer);
1802
1803 if (mIsQT) {
1804 if (version == 1) {
1805 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1806 return ERROR_IO;
1807 }
1808
1809 #if 0
1810 U32_AT(buffer); // samples per packet
1811 U32_AT(&buffer[4]); // bytes per packet
1812 U32_AT(&buffer[8]); // bytes per frame
1813 U32_AT(&buffer[12]); // bytes per sample
1814 #endif
1815 *offset += 16;
1816 } else if (version == 2) {
1817 uint8_t v2buffer[36];
1818 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1819 return ERROR_IO;
1820 }
1821
1822 #if 0
1823 U32_AT(v2buffer); // size of struct only
1824 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1825 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1826 U32_AT(&v2buffer[16]); // always 0x7f000000
1827 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1828 U32_AT(&v2buffer[24]); // format specifc flags
1829 U32_AT(&v2buffer[28]); // const bytes per audio packet
1830 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1831 #endif
1832 *offset += 36;
1833 }
1834 }
1835
1836 if (chunk_type != FOURCC("enca")) {
1837 // if the chunk type is enca, we'll get the type from the frma box later
1838 AMediaFormat_setString(mLastTrack->meta,
1839 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1840 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1841
1842 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1843 AMediaFormat_setInt32(mLastTrack->meta,
1844 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1845 if (chunk_type == FOURCC("twos")) {
1846 AMediaFormat_setInt32(mLastTrack->meta,
1847 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1848 }
1849 }
1850 }
1851 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1852 chunk, num_channels, sample_size, sample_rate);
1853 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1854 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1855
1856 if (chunk_type == FOURCC("Opus")) {
1857 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1858 data_offset += sizeof(buffer);
1859 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1860
1861 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1862 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1863 return ERROR_MALFORMED;
1864 }
1865 // Read Opus Header
1866 if (mDataSource->readAt(
1867 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1868 return ERROR_IO;
1869 }
1870
1871 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1872 // http://wiki.xiph.org/OggOpus#ID_Header
1873 strncpy((char *)opusInfo, "OpusHead", 8);
1874
1875 // Version shall be 0 as per mp4 Opus Specific Box
1876 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1877 if (opusInfo[8]) {
1878 return ERROR_MALFORMED;
1879 }
1880 // Force version to 1 as per OpusHead definition
1881 // (http://wiki.xiph.org/OggOpus#ID_Header)
1882 opusInfo[8] = 1;
1883
1884 // Read Opus Specific Box values
1885 size_t opusOffset = 10;
1886 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1887 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1888 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1889
1890 // Convert Opus Specific Box values. ParseOpusHeader expects
1891 // the values in LE, however MP4 stores these values as BE
1892 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1893 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1894 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1895 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1896
1897 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1898 static const int32_t kOpusSampleRate = 48000;
1899 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1900
1901 AMediaFormat_setBuffer(mLastTrack->meta,
1902 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1903 AMediaFormat_setBuffer(mLastTrack->meta,
1904 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1905 AMediaFormat_setBuffer(mLastTrack->meta,
1906 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1907
1908 data_offset += opusInfoSize;
1909 *offset = data_offset;
1910 CHECK_EQ(*offset, stop_offset);
1911 }
1912
1913 if (!mIsQT && chunk_type == FOURCC("alac")) {
1914 data_offset += sizeof(buffer);
1915
1916 status_t err = parseALACSampleEntry(&data_offset);
1917 if (err != OK) {
1918 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1919 return err;
1920 }
1921 *offset = data_offset;
1922 CHECK_EQ(*offset, stop_offset);
1923 }
1924
1925 if (chunk_type == FOURCC("fLaC")) {
1926
1927 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1928 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1929 uint8_t flacInfo[4 + 4 + 34];
1930 // skipping dFla, version
1931 data_offset += sizeof(buffer) + 12;
1932 size_t flacOffset = 4;
1933 // Add flaC header mime type to CSD
1934 strncpy((char *)flacInfo, "fLaC", 4);
1935 if (mDataSource->readAt(
1936 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1937 (ssize_t)sizeof(flacInfo) - flacOffset) {
1938 return ERROR_IO;
1939 }
1940 data_offset += sizeof(flacInfo) - flacOffset;
1941
1942 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1943 sizeof(flacInfo));
1944 *offset = data_offset;
1945 CHECK_EQ(*offset, stop_offset);
1946 }
1947
1948 while (*offset < stop_offset) {
1949 status_t err = parseChunk(offset, depth + 1);
1950 if (err != OK) {
1951 return err;
1952 }
1953 }
1954
1955 if (*offset != stop_offset) {
1956 return ERROR_MALFORMED;
1957 }
1958 break;
1959 }
1960
1961 case FOURCC("mp4v"):
1962 case FOURCC("encv"):
1963 case FOURCC("s263"):
1964 case FOURCC("H263"):
1965 case FOURCC("h263"):
1966 case FOURCC("avc1"):
1967 case FOURCC("hvc1"):
1968 case FOURCC("hev1"):
1969 case FOURCC("dvav"):
1970 case FOURCC("dva1"):
1971 case FOURCC("dvhe"):
1972 case FOURCC("dvh1"):
1973 case FOURCC("dav1"):
1974 case FOURCC("av01"):
1975 {
1976 uint8_t buffer[78];
1977 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1978 // Basic VideoSampleEntry size.
1979 return ERROR_MALFORMED;
1980 }
1981
1982 if (mDataSource->readAt(
1983 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1984 return ERROR_IO;
1985 }
1986
1987 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1988 uint16_t width = U16_AT(&buffer[6 + 18]);
1989 uint16_t height = U16_AT(&buffer[6 + 20]);
1990
1991 // The video sample is not standard-compliant if it has invalid dimension.
1992 // Use some default width and height value, and
1993 // let the decoder figure out the actual width and height (and thus
1994 // be prepared for INFO_FOMRAT_CHANGED event).
1995 if (width == 0) width = 352;
1996 if (height == 0) height = 288;
1997
1998 // printf("*** coding='%s' width=%d height=%d\n",
1999 // chunk, width, height);
2000
2001 if (mLastTrack == NULL)
2002 return ERROR_MALFORMED;
2003
2004 if (chunk_type != FOURCC("encv")) {
2005 // if the chunk type is encv, we'll get the type from the frma box later
2006 AMediaFormat_setString(mLastTrack->meta,
2007 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2008 }
2009 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2010 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2011
2012 off64_t stop_offset = *offset + chunk_size;
2013 *offset = data_offset + sizeof(buffer);
2014 while (*offset < stop_offset) {
2015 status_t err = parseChunk(offset, depth + 1);
2016 if (err != OK) {
2017 return err;
2018 }
2019 }
2020
2021 if (*offset != stop_offset) {
2022 return ERROR_MALFORMED;
2023 }
2024 break;
2025 }
2026
2027 case FOURCC("stco"):
2028 case FOURCC("co64"):
2029 {
2030 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2031 return ERROR_MALFORMED;
2032 }
2033
2034 status_t err =
2035 mLastTrack->sampleTable->setChunkOffsetParams(
2036 chunk_type, data_offset, chunk_data_size);
2037
2038 *offset += chunk_size;
2039
2040 if (err != OK) {
2041 return err;
2042 }
2043
2044 break;
2045 }
2046
2047 case FOURCC("stsc"):
2048 {
2049 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2050 return ERROR_MALFORMED;
2051
2052 status_t err =
2053 mLastTrack->sampleTable->setSampleToChunkParams(
2054 data_offset, chunk_data_size);
2055
2056 *offset += chunk_size;
2057
2058 if (err != OK) {
2059 return err;
2060 }
2061
2062 break;
2063 }
2064
2065 case FOURCC("stsz"):
2066 case FOURCC("stz2"):
2067 {
2068 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2069 return ERROR_MALFORMED;
2070 }
2071
2072 status_t err =
2073 mLastTrack->sampleTable->setSampleSizeParams(
2074 chunk_type, data_offset, chunk_data_size);
2075
2076 *offset += chunk_size;
2077
2078 if (err != OK) {
2079 return err;
2080 }
2081
2082 adjustRawDefaultFrameSize();
2083
2084 size_t max_size;
2085 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2086
2087 if (err != OK) {
2088 return err;
2089 }
2090
2091 if (max_size != 0) {
2092 // Assume that a given buffer only contains at most 10 chunks,
2093 // each chunk originally prefixed with a 2 byte length will
2094 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2095 // and thus will grow by 2 bytes per chunk.
2096 if (max_size > SIZE_MAX - 10 * 2) {
2097 ALOGE("max sample size too big: %zu", max_size);
2098 return ERROR_MALFORMED;
2099 }
2100 AMediaFormat_setInt32(mLastTrack->meta,
2101 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2102 } else {
2103 // No size was specified. Pick a conservatively large size.
2104 uint32_t width, height;
2105 if (!AMediaFormat_getInt32(mLastTrack->meta,
2106 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2107 !AMediaFormat_getInt32(mLastTrack->meta,
2108 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2109 ALOGE("No width or height, assuming worst case 1080p");
2110 width = 1920;
2111 height = 1080;
2112 } else {
2113 // A resolution was specified, check that it's not too big. The values below
2114 // were chosen so that the calculations below don't cause overflows, they're
2115 // not indicating that resolutions up to 32kx32k are actually supported.
2116 if (width > 32768 || height > 32768) {
2117 ALOGE("can't support %u x %u video", width, height);
2118 return ERROR_MALFORMED;
2119 }
2120 }
2121
2122 const char *mime;
2123 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2124 if (!strncmp(mime, "audio/", 6)) {
2125 // for audio, use 128KB
2126 max_size = 1024 * 128;
2127 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2128 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2129 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2130 // AVC & HEVC requires compression ratio of at least 2, and uses
2131 // macroblocks
2132 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2133 } else {
2134 // For all other formats there is no minimum compression
2135 // ratio. Use compression ratio of 1.
2136 max_size = width * height * 3 / 2;
2137 }
2138 // HACK: allow 10% overhead
2139 // TODO: read sample size from traf atom for fragmented MPEG4.
2140 max_size += max_size / 10;
2141 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2142 }
2143
2144 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2145 // mimetype) previously obtained, so don't cache them.
2146 const char *mime;
2147 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2148 // Calculate average frame rate.
2149 if (!strncasecmp("video/", mime, 6)) {
2150 size_t nSamples = mLastTrack->sampleTable->countSamples();
2151 if (nSamples == 0) {
2152 int32_t trackId;
2153 if (AMediaFormat_getInt32(mLastTrack->meta,
2154 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2155 for (size_t i = 0; i < mTrex.size(); i++) {
2156 Trex *t = &mTrex.editItemAt(i);
2157 if (t->track_ID == (uint32_t) trackId) {
2158 if (t->default_sample_duration > 0) {
2159 int32_t frameRate =
2160 mLastTrack->timescale / t->default_sample_duration;
2161 AMediaFormat_setInt32(mLastTrack->meta,
2162 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2163 }
2164 break;
2165 }
2166 }
2167 }
2168 } else {
2169 int64_t durationUs;
2170 if (AMediaFormat_getInt64(mLastTrack->meta,
2171 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2172 if (durationUs > 0) {
2173 int32_t frameRate = (nSamples * 1000000LL +
2174 (durationUs >> 1)) / durationUs;
2175 AMediaFormat_setInt32(mLastTrack->meta,
2176 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2177 }
2178 }
2179 ALOGV("setting frame count %zu", nSamples);
2180 AMediaFormat_setInt32(mLastTrack->meta,
2181 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2182 }
2183 }
2184
2185 break;
2186 }
2187
2188 case FOURCC("stts"):
2189 {
2190 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2191 return ERROR_MALFORMED;
2192
2193 *offset += chunk_size;
2194
2195 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2196 char chunk[5];
2197 MakeFourCCString(mPath[depth - 1], chunk);
2198 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2199 break;
2200 }
2201
2202 status_t err =
2203 mLastTrack->sampleTable->setTimeToSampleParams(
2204 data_offset, chunk_data_size);
2205
2206 if (err != OK) {
2207 return err;
2208 }
2209
2210 break;
2211 }
2212
2213 case FOURCC("ctts"):
2214 {
2215 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2216 return ERROR_MALFORMED;
2217
2218 *offset += chunk_size;
2219
2220 status_t err =
2221 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2222 data_offset, chunk_data_size);
2223
2224 if (err != OK) {
2225 return err;
2226 }
2227
2228 break;
2229 }
2230
2231 case FOURCC("stss"):
2232 {
2233 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2234 return ERROR_MALFORMED;
2235
2236 *offset += chunk_size;
2237
2238 status_t err =
2239 mLastTrack->sampleTable->setSyncSampleParams(
2240 data_offset, chunk_data_size);
2241
2242 if (err != OK) {
2243 return err;
2244 }
2245
2246 break;
2247 }
2248
2249 // \xA9xyz
2250 case FOURCC("\251xyz"):
2251 {
2252 *offset += chunk_size;
2253
2254 // Best case the total data length inside "\xA9xyz" box would
2255 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2256 // where "\x00\x05" is the text string length with value = 5,
2257 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2258 // location (string) value with longitude = 0 and latitude = 0.
2259 // Since some devices encountered in the wild omit the trailing
2260 // slash, we'll allow that.
2261 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2262 return ERROR_MALFORMED;
2263 }
2264
2265 uint16_t len;
2266 if (!mDataSource->getUInt16(data_offset, &len)) {
2267 return ERROR_IO;
2268 }
2269
2270 // allow "+0+0" without trailing slash
2271 if (len < 4 || len > chunk_data_size - 4) {
2272 return ERROR_MALFORMED;
2273 }
2274 // The location string following the language code is formatted
2275 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2276 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2277 // and to add a terminating 0.
2278 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2279 if (!buffer) {
2280 return NO_MEMORY;
2281 }
2282
2283 if (mDataSource->readAt(
2284 data_offset + 4, &buffer[0], len) < len) {
2285 return ERROR_IO;
2286 }
2287
2288 len = strlen(&buffer[0]);
2289 if (len < 4) {
2290 return ERROR_MALFORMED;
2291 }
2292 // Add a trailing slash if there wasn't one.
2293 if (buffer[len - 1] != '/') {
2294 buffer[len] = '/';
2295 }
2296 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2297 break;
2298 }
2299
2300 case FOURCC("esds"):
2301 {
2302 *offset += chunk_size;
2303
2304 if (chunk_data_size < 4) {
2305 return ERROR_MALFORMED;
2306 }
2307
2308 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2309 uint8_t *buffer = tmp.get();
2310 if (buffer == NULL) {
2311 return -ENOMEM;
2312 }
2313
2314 if (mDataSource->readAt(
2315 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2316 return ERROR_IO;
2317 }
2318
2319 if (U32_AT(buffer) != 0) {
2320 // Should be version 0, flags 0.
2321 return ERROR_MALFORMED;
2322 }
2323
2324 if (mLastTrack == NULL)
2325 return ERROR_MALFORMED;
2326
2327 AMediaFormat_setBuffer(mLastTrack->meta,
2328 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2329
2330 if (mPath.size() >= 2
2331 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2332 // Information from the ESDS must be relied on for proper
2333 // setup of sample rate and channel count for MPEG4 Audio.
2334 // The generic header appears to only contain generic
2335 // information...
2336
2337 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2338 &buffer[4], chunk_data_size - 4);
2339
2340 if (err != OK) {
2341 return err;
2342 }
2343 }
2344 if (mPath.size() >= 2
2345 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2346 // Check if the video is MPEG2
2347 ESDS esds(&buffer[4], chunk_data_size - 4);
2348
2349 uint8_t objectTypeIndication;
2350 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2351 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2352 AMediaFormat_setString(mLastTrack->meta,
2353 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2354 }
2355 }
2356 }
2357 break;
2358 }
2359
2360 case FOURCC("btrt"):
2361 {
2362 *offset += chunk_size;
2363 if (mLastTrack == NULL) {
2364 return ERROR_MALFORMED;
2365 }
2366
2367 uint8_t buffer[12];
2368 if (chunk_data_size != sizeof(buffer)) {
2369 return ERROR_MALFORMED;
2370 }
2371
2372 if (mDataSource->readAt(
2373 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2374 return ERROR_IO;
2375 }
2376
2377 uint32_t maxBitrate = U32_AT(&buffer[4]);
2378 uint32_t avgBitrate = U32_AT(&buffer[8]);
2379 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2380 AMediaFormat_setInt32(mLastTrack->meta,
2381 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2382 }
2383 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2384 AMediaFormat_setInt32(mLastTrack->meta,
2385 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2386 }
2387 break;
2388 }
2389
2390 case FOURCC("avcC"):
2391 {
2392 *offset += chunk_size;
2393
2394 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2395
2396 if (buffer.get() == NULL) {
2397 ALOGE("b/28471206");
2398 return NO_MEMORY;
2399 }
2400
2401 if (mDataSource->readAt(
2402 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2403 return ERROR_IO;
2404 }
2405
2406 if (mLastTrack == NULL)
2407 return ERROR_MALFORMED;
2408
2409 AMediaFormat_setBuffer(mLastTrack->meta,
2410 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2411
2412 break;
2413 }
2414 case FOURCC("hvcC"):
2415 {
2416 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2417
2418 if (buffer.get() == NULL) {
2419 ALOGE("b/28471206");
2420 return NO_MEMORY;
2421 }
2422
2423 if (mDataSource->readAt(
2424 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2425 return ERROR_IO;
2426 }
2427
2428 if (mLastTrack == NULL)
2429 return ERROR_MALFORMED;
2430
2431 AMediaFormat_setBuffer(mLastTrack->meta,
2432 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2433
2434 *offset += chunk_size;
2435 break;
2436 }
2437 case FOURCC("av1C"):
2438 {
2439 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2440
2441 if (buffer.get() == NULL) {
2442 ALOGE("b/28471206");
2443 return NO_MEMORY;
2444 }
2445
2446 if (mDataSource->readAt(
2447 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2448 return ERROR_IO;
2449 }
2450
2451 if (mLastTrack == NULL)
2452 return ERROR_MALFORMED;
2453
2454 AMediaFormat_setBuffer(mLastTrack->meta,
2455 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2456
2457 *offset += chunk_size;
2458 break;
2459 }
2460 case FOURCC("dvcC"):
2461 case FOURCC("dvvC"): {
2462
2463 CHECK_EQ(chunk_data_size, 24);
2464
2465 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2466
2467 if (buffer.get() == NULL) {
2468 ALOGE("b/28471206");
2469 return NO_MEMORY;
2470 }
2471
2472 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2473 return ERROR_IO;
2474 }
2475
2476 if (mLastTrack == NULL)
2477 return ERROR_MALFORMED;
2478
2479 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2480 buffer.get(), chunk_data_size);
2481 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2482 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2483
2484 *offset += chunk_size;
2485 break;
2486 }
2487 case FOURCC("d263"):
2488 {
2489 *offset += chunk_size;
2490 /*
2491 * d263 contains a fixed 7 bytes part:
2492 * vendor - 4 bytes
2493 * version - 1 byte
2494 * level - 1 byte
2495 * profile - 1 byte
2496 * optionally, "d263" box itself may contain a 16-byte
2497 * bit rate box (bitr)
2498 * average bit rate - 4 bytes
2499 * max bit rate - 4 bytes
2500 */
2501 char buffer[23];
2502 if (chunk_data_size != 7 &&
2503 chunk_data_size != 23) {
2504 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2505 return ERROR_MALFORMED;
2506 }
2507
2508 if (mDataSource->readAt(
2509 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2510 return ERROR_IO;
2511 }
2512
2513 if (mLastTrack == NULL)
2514 return ERROR_MALFORMED;
2515
2516 AMediaFormat_setBuffer(mLastTrack->meta,
2517 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2518
2519 break;
2520 }
2521
2522 case FOURCC("meta"):
2523 {
2524 off64_t stop_offset = *offset + chunk_size;
2525 *offset = data_offset;
2526 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2527 if (!isParsingMetaKeys) {
2528 uint8_t buffer[4];
2529 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2530 *offset = stop_offset;
2531 return ERROR_MALFORMED;
2532 }
2533
2534 if (mDataSource->readAt(
2535 data_offset, buffer, 4) < 4) {
2536 *offset = stop_offset;
2537 return ERROR_IO;
2538 }
2539
2540 if (U32_AT(buffer) != 0) {
2541 // Should be version 0, flags 0.
2542
2543 // If it's not, let's assume this is one of those
2544 // apparently malformed chunks that don't have flags
2545 // and completely different semantics than what's
2546 // in the MPEG4 specs and skip it.
2547 *offset = stop_offset;
2548 return OK;
2549 }
2550 *offset += sizeof(buffer);
2551 }
2552
2553 while (*offset < stop_offset) {
2554 status_t err = parseChunk(offset, depth + 1);
2555 if (err != OK) {
2556 return err;
2557 }
2558 }
2559
2560 if (*offset != stop_offset) {
2561 return ERROR_MALFORMED;
2562 }
2563 break;
2564 }
2565
2566 case FOURCC("iloc"):
2567 case FOURCC("iinf"):
2568 case FOURCC("iprp"):
2569 case FOURCC("pitm"):
2570 case FOURCC("idat"):
2571 case FOURCC("iref"):
2572 case FOURCC("ipro"):
2573 {
2574 if (mIsHeif) {
2575 if (mItemTable == NULL) {
2576 mItemTable = new ItemTable(mDataSource);
2577 }
2578 status_t err = mItemTable->parse(
2579 chunk_type, data_offset, chunk_data_size);
2580 if (err != OK) {
2581 return err;
2582 }
2583 }
2584 *offset += chunk_size;
2585 break;
2586 }
2587
2588 case FOURCC("mean"):
2589 case FOURCC("name"):
2590 case FOURCC("data"):
2591 {
2592 *offset += chunk_size;
2593
2594 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2595 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2596
2597 if (err != OK) {
2598 return err;
2599 }
2600 }
2601
2602 break;
2603 }
2604
2605 case FOURCC("mvhd"):
2606 {
2607 *offset += chunk_size;
2608
2609 if (depth != 1) {
2610 ALOGE("mvhd: depth %d", depth);
2611 return ERROR_MALFORMED;
2612 }
2613 if (chunk_data_size < 32) {
2614 return ERROR_MALFORMED;
2615 }
2616
2617 uint8_t header[32];
2618 if (mDataSource->readAt(
2619 data_offset, header, sizeof(header))
2620 < (ssize_t)sizeof(header)) {
2621 return ERROR_IO;
2622 }
2623
2624 uint64_t creationTime;
2625 uint64_t duration = 0;
2626 if (header[0] == 1) {
2627 creationTime = U64_AT(&header[4]);
2628 mHeaderTimescale = U32_AT(&header[20]);
2629 duration = U64_AT(&header[24]);
2630 if (duration == 0xffffffffffffffff) {
2631 duration = 0;
2632 }
2633 } else if (header[0] != 0) {
2634 return ERROR_MALFORMED;
2635 } else {
2636 creationTime = U32_AT(&header[4]);
2637 mHeaderTimescale = U32_AT(&header[12]);
2638 uint32_t d32 = U32_AT(&header[16]);
2639 if (d32 == 0xffffffff) {
2640 d32 = 0;
2641 }
2642 duration = d32;
2643 }
2644 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2645 AMediaFormat_setInt64(mFileMetaData,
2646 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2647 }
2648
2649 String8 s;
2650 if (convertTimeToDate(creationTime, &s)) {
2651 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2652 }
2653
2654 break;
2655 }
2656
2657 case FOURCC("mehd"):
2658 {
2659 *offset += chunk_size;
2660
2661 if (chunk_data_size < 8) {
2662 return ERROR_MALFORMED;
2663 }
2664
2665 uint8_t flags[4];
2666 if (mDataSource->readAt(
2667 data_offset, flags, sizeof(flags))
2668 < (ssize_t)sizeof(flags)) {
2669 return ERROR_IO;
2670 }
2671
2672 uint64_t duration = 0;
2673 if (flags[0] == 1) {
2674 // 64 bit
2675 if (chunk_data_size < 12) {
2676 return ERROR_MALFORMED;
2677 }
2678 mDataSource->getUInt64(data_offset + 4, &duration);
2679 if (duration == 0xffffffffffffffff) {
2680 duration = 0;
2681 }
2682 } else if (flags[0] == 0) {
2683 // 32 bit
2684 uint32_t d32;
2685 mDataSource->getUInt32(data_offset + 4, &d32);
2686 if (d32 == 0xffffffff) {
2687 d32 = 0;
2688 }
2689 duration = d32;
2690 } else {
2691 return ERROR_MALFORMED;
2692 }
2693
2694 if (duration != 0 && mHeaderTimescale != 0) {
2695 AMediaFormat_setInt64(mFileMetaData,
2696 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2697 }
2698
2699 break;
2700 }
2701
2702 case FOURCC("mdat"):
2703 {
2704 mMdatFound = true;
2705
2706 *offset += chunk_size;
2707 break;
2708 }
2709
2710 case FOURCC("hdlr"):
2711 {
2712 *offset += chunk_size;
2713
2714 if (underQTMetaPath(mPath, 3)) {
2715 break;
2716 }
2717
2718 uint32_t buffer;
2719 if (mDataSource->readAt(
2720 data_offset + 8, &buffer, 4) < 4) {
2721 return ERROR_IO;
2722 }
2723
2724 uint32_t type = ntohl(buffer);
2725 // For the 3GPP file format, the handler-type within the 'hdlr' box
2726 // shall be 'text'. We also want to support 'sbtl' handler type
2727 // for a practical reason as various MPEG4 containers use it.
2728 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2729 if (mLastTrack != NULL) {
2730 AMediaFormat_setString(mLastTrack->meta,
2731 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2732 }
2733 }
2734
2735 break;
2736 }
2737
2738 case FOURCC("keys"):
2739 {
2740 *offset += chunk_size;
2741
2742 if (underQTMetaPath(mPath, 3)) {
2743 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2744 if (err != OK) {
2745 return err;
2746 }
2747 }
2748 break;
2749 }
2750
2751 case FOURCC("trex"):
2752 {
2753 *offset += chunk_size;
2754
2755 if (chunk_data_size < 24) {
2756 return ERROR_IO;
2757 }
2758 Trex trex;
2759 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2760 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2761 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2762 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2763 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2764 return ERROR_IO;
2765 }
2766 mTrex.add(trex);
2767 break;
2768 }
2769
2770 case FOURCC("tx3g"):
2771 {
2772 if (mLastTrack == NULL)
2773 return ERROR_MALFORMED;
2774
2775 // complain about ridiculous chunks
2776 if (chunk_size > kMaxAtomSize) {
2777 return ERROR_MALFORMED;
2778 }
2779
2780 // complain about empty atoms
2781 if (chunk_data_size <= 0) {
2782 ALOGE("b/124330204");
2783 android_errorWriteLog(0x534e4554, "124330204");
2784 return ERROR_MALFORMED;
2785 }
2786
2787 // should fill buffer based on "data_offset" and "chunk_data_size"
2788 // instead of *offset and chunk_size;
2789 // but we've been feeding the extra data to consumers for multiple releases and
2790 // if those apps are compensating for it, we'd break them with such a change
2791 //
2792
2793 if (mLastTrack->mTx3gBuffer == NULL) {
2794 mLastTrack->mTx3gSize = 0;
2795 mLastTrack->mTx3gFilled = 0;
2796 }
2797 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2798 size_t growth = kTx3gGrowth;
2799 if (growth < chunk_size) {
2800 growth = chunk_size;
2801 }
2802 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2803 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2804 ALOGE("b/124330204 - too much space");
2805 android_errorWriteLog(0x534e4554, "124330204");
2806 return ERROR_MALFORMED;
2807 }
2808 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2809 mLastTrack->mTx3gSize + growth);
2810 if (updated == NULL) {
2811 return ERROR_MALFORMED;
2812 }
2813 mLastTrack->mTx3gBuffer = updated;
2814 mLastTrack->mTx3gSize += growth;
2815 }
2816
2817 if ((size_t)(mDataSource->readAt(*offset,
2818 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2819 chunk_size))
2820 < chunk_size) {
2821
2822 // advance read pointer so we don't end up reading this again
2823 *offset += chunk_size;
2824 return ERROR_IO;
2825 }
2826
2827 mLastTrack->mTx3gFilled += chunk_size;
2828 *offset += chunk_size;
2829 break;
2830 }
2831
2832 case FOURCC("covr"):
2833 {
2834 *offset += chunk_size;
2835
2836 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2837 chunk_data_size, data_offset);
2838
2839 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2840 return ERROR_MALFORMED;
2841 }
2842 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2843 if (buffer.get() == NULL) {
2844 ALOGE("b/28471206");
2845 return NO_MEMORY;
2846 }
2847 if (mDataSource->readAt(
2848 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2849 return ERROR_IO;
2850 }
2851 const int kSkipBytesOfDataBox = 16;
2852 if (chunk_data_size <= kSkipBytesOfDataBox) {
2853 return ERROR_MALFORMED;
2854 }
2855
2856 AMediaFormat_setBuffer(mFileMetaData,
2857 AMEDIAFORMAT_KEY_ALBUMART,
2858 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2859
2860 break;
2861 }
2862
2863 case FOURCC("colr"):
2864 {
2865 *offset += chunk_size;
2866 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2867 // ignore otherwise
2868 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
2869 status_t err = parseColorInfo(data_offset, chunk_data_size);
2870 if (err != OK) {
2871 return err;
2872 }
2873 }
2874
2875 break;
2876 }
2877
2878 case FOURCC("titl"):
2879 case FOURCC("perf"):
2880 case FOURCC("auth"):
2881 case FOURCC("gnre"):
2882 case FOURCC("albm"):
2883 case FOURCC("yrrc"):
2884 {
2885 *offset += chunk_size;
2886
2887 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2888
2889 if (err != OK) {
2890 return err;
2891 }
2892
2893 break;
2894 }
2895
2896 case FOURCC("ID32"):
2897 {
2898 *offset += chunk_size;
2899
2900 if (chunk_data_size < 6) {
2901 return ERROR_MALFORMED;
2902 }
2903
2904 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
2905
2906 break;
2907 }
2908
2909 case FOURCC("----"):
2910 {
2911 mLastCommentMean.clear();
2912 mLastCommentName.clear();
2913 mLastCommentData.clear();
2914 *offset += chunk_size;
2915 break;
2916 }
2917
2918 case FOURCC("sidx"):
2919 {
2920 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2921 if (err != OK) {
2922 return err;
2923 }
2924 *offset += chunk_size;
2925 return UNKNOWN_ERROR; // stop parsing after sidx
2926 }
2927
2928 case FOURCC("ac-3"):
2929 {
2930 *offset += chunk_size;
2931 // bypass ac-3 if parse fail
2932 if (parseAC3SpecificBox(data_offset) != OK) {
2933 if (mLastTrack != NULL) {
2934 ALOGW("Fail to parse ac-3");
2935 mLastTrack->skipTrack = true;
2936 }
2937 }
2938 return OK;
2939 }
2940
2941 case FOURCC("ec-3"):
2942 {
2943 *offset += chunk_size;
2944 // bypass ec-3 if parse fail
2945 if (parseEAC3SpecificBox(data_offset) != OK) {
2946 if (mLastTrack != NULL) {
2947 ALOGW("Fail to parse ec-3");
2948 mLastTrack->skipTrack = true;
2949 }
2950 }
2951 return OK;
2952 }
2953
2954 case FOURCC("ac-4"):
2955 {
2956 *offset += chunk_size;
2957 // bypass ac-4 if parse fail
2958 if (parseAC4SpecificBox(data_offset) != OK) {
2959 if (mLastTrack != NULL) {
2960 ALOGW("Fail to parse ac-4");
2961 mLastTrack->skipTrack = true;
2962 }
2963 }
2964 return OK;
2965 }
2966
2967 case FOURCC("ftyp"):
2968 {
2969 if (chunk_data_size < 8 || depth != 0) {
2970 return ERROR_MALFORMED;
2971 }
2972
2973 off64_t stop_offset = *offset + chunk_size;
2974 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2975 std::set<uint32_t> brandSet;
2976 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2977 if (i == 1) {
2978 // Skip this index, it refers to the minorVersion,
2979 // not a brand.
2980 continue;
2981 }
2982
2983 uint32_t brand;
2984 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2985 return ERROR_MALFORMED;
2986 }
2987
2988 brand = ntohl(brand);
2989 brandSet.insert(brand);
2990 }
2991
2992 if (brandSet.count(FOURCC("qt ")) > 0) {
2993 mIsQT = true;
2994 } else {
2995 if (brandSet.count(FOURCC("mif1")) > 0
2996 && brandSet.count(FOURCC("heic")) > 0) {
2997 ALOGV("identified HEIF image");
2998
2999 mIsHeif = true;
3000 brandSet.erase(FOURCC("mif1"));
3001 brandSet.erase(FOURCC("heic"));
3002 }
3003
3004 if (!brandSet.empty()) {
3005 // This means that the file should have moov box.
3006 // It could be any iso files (mp4, heifs, etc.)
3007 mHasMoovBox = true;
3008 if (mIsHeif) {
3009 ALOGV("identified HEIF image with other tracks");
3010 }
3011 }
3012 }
3013
3014 *offset = stop_offset;
3015
3016 break;
3017 }
3018
3019 default:
3020 {
3021 // check if we're parsing 'ilst' for meta keys
3022 // if so, treat type as a number (key-id).
3023 if (underQTMetaPath(mPath, 3)) {
3024 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3025 if (err != OK) {
3026 return err;
3027 }
3028 }
3029
3030 *offset += chunk_size;
3031 break;
3032 }
3033 }
3034
3035 return OK;
3036 }
3037
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3038 status_t MPEG4Extractor::parseChannelCountSampleRate(
3039 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3040 // skip 16 bytes:
3041 // + 6-byte reserved,
3042 // + 2-byte data reference index,
3043 // + 8-byte reserved
3044 *offset += 16;
3045 if (!mDataSource->getUInt16(*offset, channelCount)) {
3046 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3047 return ERROR_MALFORMED;
3048 }
3049 // skip 8 bytes:
3050 // + 2-byte channelCount,
3051 // + 2-byte sample size,
3052 // + 4-byte reserved
3053 *offset += 8;
3054 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3055 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3056 return ERROR_MALFORMED;
3057 }
3058 // skip 4 bytes:
3059 // + 2-byte sampleRate,
3060 // + 2-byte reserved
3061 *offset += 4;
3062 return OK;
3063 }
3064
parseAC4SpecificBox(off64_t offset)3065 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3066 if (mLastTrack == NULL) {
3067 return ERROR_MALFORMED;
3068 }
3069
3070 uint16_t sampleRate, channelCount;
3071 status_t status;
3072 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3073 return status;
3074 }
3075 uint32_t size;
3076 // + 4-byte size
3077 // + 4-byte type
3078 // + 3-byte payload
3079 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3080 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3081 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3082 return ERROR_MALFORMED;
3083 }
3084
3085 // + 4-byte size
3086 offset += 4;
3087 uint32_t type;
3088 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3089 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3090 return ERROR_MALFORMED;
3091 }
3092
3093 // + 4-byte type
3094 offset += 4;
3095 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3096 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3097 ssize_t dsiSize = size - 8; // size of box - size and type fields
3098 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3099 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3100 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3101 return ERROR_MALFORMED;
3102 }
3103 // + size-byte payload
3104 offset += dsiSize;
3105 ABitReader br(chunk, dsiSize);
3106 AC4DSIParser parser(br);
3107 if (!parser.parse()){
3108 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3109 return ERROR_MALFORMED;
3110 }
3111
3112 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3113 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3114 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3115
3116 AudioPresentationCollection presentations;
3117 // translate the AC4 presentation information to audio presentations for this track
3118 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3119 if (!ac4Presentations.empty()) {
3120 for (const auto& ac4Presentation : ac4Presentations) {
3121 auto& presentation = ac4Presentation.second;
3122 if (!presentation.mEnabled) {
3123 continue;
3124 }
3125 AudioPresentationV1 ap;
3126 ap.mPresentationId = presentation.mGroupIndex;
3127 ap.mProgramId = presentation.mProgramID;
3128 ap.mLanguage = presentation.mLanguage;
3129 if (presentation.mPreVirtualized) {
3130 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3131 } else {
3132 switch (presentation.mChannelMode) {
3133 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3134 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3135 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3136 break;
3137 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3138 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3139 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3140 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3141 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3142 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3143 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3144 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3145 break;
3146 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3147 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3148 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3149 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3150 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3151 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3152 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3153 ap.mMasteringIndication = MASTERED_FOR_3D;
3154 break;
3155 default:
3156 ALOGE("Invalid channel mode in AC4 presentation");
3157 return ERROR_MALFORMED;
3158 }
3159 }
3160
3161 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3162 AC4Parser::AC4Presentation::kVisuallyImpaired);
3163 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3164 AC4Parser::AC4Presentation::kVoiceOver);
3165 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3166 if (!ap.mLanguage.empty()) {
3167 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3168 }
3169 presentations.push_back(std::move(ap));
3170 }
3171 }
3172
3173 if (presentations.empty()) {
3174 // Clear audio presentation info in metadata.
3175 AMediaFormat_setBuffer(
3176 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3177 } else {
3178 std::ostringstream outStream(std::ios::out);
3179 serializeAudioPresentations(presentations, &outStream);
3180 AMediaFormat_setBuffer(
3181 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3182 outStream.str().data(), outStream.str().size());
3183 }
3184 return OK;
3185 }
3186
parseEAC3SpecificBox(off64_t offset)3187 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3188 if (mLastTrack == NULL) {
3189 return ERROR_MALFORMED;
3190 }
3191
3192 uint16_t sampleRate, channels;
3193 status_t status;
3194 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3195 return status;
3196 }
3197 uint32_t size;
3198 // + 4-byte size
3199 // + 4-byte type
3200 // + 3-byte payload
3201 const uint32_t kEAC3SpecificBoxMinSize = 11;
3202 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3203 // calculated from the required bits read below as well as the maximum number of independent
3204 // and dependant sub streams you can have
3205 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3206 if (!mDataSource->getUInt32(offset, &size) ||
3207 size < kEAC3SpecificBoxMinSize ||
3208 size > kEAC3SpecificBoxMaxSize) {
3209 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3210 return ERROR_MALFORMED;
3211 }
3212
3213 offset += 4;
3214 uint32_t type;
3215 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3216 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3217 return ERROR_MALFORMED;
3218 }
3219
3220 offset += 4;
3221 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3222 if (chunk == NULL) {
3223 return ERROR_MALFORMED;
3224 }
3225
3226 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3227 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3228 delete[] chunk;
3229 return ERROR_MALFORMED;
3230 }
3231
3232 ABitReader br(chunk, size);
3233 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3234 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3235
3236 if (br.numBitsLeft() < 16) {
3237 delete[] chunk;
3238 return ERROR_MALFORMED;
3239 }
3240 unsigned data_rate = br.getBits(13);
3241 ALOGV("EAC3 data rate = %d", data_rate);
3242
3243 unsigned num_ind_sub = br.getBits(3) + 1;
3244 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3245 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3246 delete[] chunk;
3247 return ERROR_MALFORMED;
3248 }
3249
3250 unsigned channelCount = 0;
3251 for (unsigned i = 0; i < num_ind_sub; i++) {
3252 unsigned fscod = br.getBits(2);
3253 if (fscod == 3) {
3254 ALOGE("Incorrect fscod (3) in EAC3 header");
3255 delete[] chunk;
3256 return ERROR_MALFORMED;
3257 }
3258 unsigned boxSampleRate = sampleRateTable[fscod];
3259 if (boxSampleRate != sampleRate) {
3260 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3261 boxSampleRate, sampleRate);
3262 delete[] chunk;
3263 return ERROR_MALFORMED;
3264 }
3265
3266 unsigned bsid = br.getBits(5);
3267 if (bsid == 9 || bsid == 10) {
3268 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3269 } else if (bsid > 16) {
3270 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3271 delete[] chunk;
3272 return ERROR_MALFORMED;
3273 }
3274
3275 // skip
3276 br.skipBits(2);
3277 unsigned bsmod = br.getBits(3);
3278 unsigned acmod = br.getBits(3);
3279 unsigned lfeon = br.getBits(1);
3280 // we currently only support the first stream
3281 if (i == 0)
3282 channelCount = channelCountTable[acmod] + lfeon;
3283 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3284
3285 br.skipBits(3);
3286 unsigned num_dep_sub = br.getBits(4);
3287 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3288 if (num_dep_sub != 0) {
3289 if (br.numBitsLeft() < 9) {
3290 delete[] chunk;
3291 return ERROR_MALFORMED;
3292 }
3293 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3294 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3295 unsigned chan_loc = br.getBits(9);
3296 unsigned mask = 1;
3297 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3298 if ((chan_loc & mask) != 0) {
3299 // we currently only support the first stream
3300 if (i == 0) {
3301 channelCount++;
3302 // these are 2 channels in the mask
3303 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3304 channelCount++;
3305 }
3306 }
3307 ALOGV(" %s", chan_loc_tbl[j]);
3308 }
3309 }
3310 } else {
3311 if (br.numBitsLeft() == 0) {
3312 delete[] chunk;
3313 return ERROR_MALFORMED;
3314 }
3315 br.skipBits(1);
3316 }
3317 }
3318
3319 if (br.numBitsLeft() != 0) {
3320 if (br.numBitsLeft() < 8) {
3321 delete[] chunk;
3322 return ERROR_MALFORMED;
3323 }
3324 unsigned mask = br.getBits(8);
3325 for (unsigned i = 0; i < 8; i++) {
3326 if (((0x1 << i) && mask) == 0)
3327 continue;
3328
3329 if (br.numBitsLeft() < 8) {
3330 delete[] chunk;
3331 return ERROR_MALFORMED;
3332 }
3333 switch (i) {
3334 case 0: {
3335 unsigned complexity = br.getBits(8);
3336 ALOGV("Found a JOC stream with complexity = %d", complexity);
3337 }break;
3338 default: {
3339 br.skipBits(8);
3340 }break;
3341 }
3342 }
3343 }
3344 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3345 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3346 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3347
3348 delete[] chunk;
3349 return OK;
3350 }
3351
parseAC3SpecificBox(off64_t offset)3352 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3353 if (mLastTrack == NULL) {
3354 return ERROR_MALFORMED;
3355 }
3356
3357 uint16_t sampleRate, channels;
3358 status_t status;
3359 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3360 return status;
3361 }
3362 uint32_t size;
3363 // + 4-byte size
3364 // + 4-byte type
3365 // + 3-byte payload
3366 const uint32_t kAC3SpecificBoxSize = 11;
3367 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3368 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3369 return ERROR_MALFORMED;
3370 }
3371
3372 offset += 4;
3373 uint32_t type;
3374 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3375 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3376 return ERROR_MALFORMED;
3377 }
3378
3379 offset += 4;
3380 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3381 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3382 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3383 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3384 return ERROR_MALFORMED;
3385 }
3386
3387 ABitReader br(chunk, sizeof(chunk));
3388 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3389 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3390
3391 unsigned fscod = br.getBits(2);
3392 if (fscod == 3) {
3393 ALOGE("Incorrect fscod (3) in AC3 header");
3394 return ERROR_MALFORMED;
3395 }
3396 unsigned boxSampleRate = sampleRateTable[fscod];
3397 if (boxSampleRate != sampleRate) {
3398 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3399 boxSampleRate, sampleRate);
3400 return ERROR_MALFORMED;
3401 }
3402
3403 unsigned bsid = br.getBits(5);
3404 if (bsid > 8) {
3405 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3406 return ERROR_MALFORMED;
3407 }
3408
3409 // skip
3410 unsigned bsmod __unused = br.getBits(3);
3411
3412 unsigned acmod = br.getBits(3);
3413 unsigned lfeon = br.getBits(1);
3414 unsigned channelCount = channelCountTable[acmod] + lfeon;
3415
3416 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3417 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3418 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3419 return OK;
3420 }
3421
parseALACSampleEntry(off64_t * offset)3422 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3423 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3424 // Store ALAC magic cookie (decoder needs it).
3425 uint8_t alacInfo[12];
3426 off64_t data_offset = *offset;
3427
3428 if (mDataSource->readAt(
3429 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3430 return ERROR_IO;
3431 }
3432 uint32_t size = U32_AT(&alacInfo[0]);
3433 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3434 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3435 (U32_AT(&alacInfo[8]) != 0)) {
3436 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3437 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3438 return ERROR_MALFORMED;
3439 }
3440 data_offset += sizeof(alacInfo);
3441 uint8_t cookie[size - sizeof(alacInfo)];
3442 if (mDataSource->readAt(
3443 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3444 return ERROR_IO;
3445 }
3446
3447 uint8_t bitsPerSample = cookie[5];
3448 AMediaFormat_setInt32(mLastTrack->meta,
3449 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3450 AMediaFormat_setInt32(mLastTrack->meta,
3451 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3452 AMediaFormat_setInt32(mLastTrack->meta,
3453 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3454 AMediaFormat_setBuffer(mLastTrack->meta,
3455 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3456 data_offset += sizeof(cookie);
3457 *offset = data_offset;
3458 return OK;
3459 }
3460
parseSegmentIndex(off64_t offset,size_t size)3461 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3462 ALOGV("MPEG4Extractor::parseSegmentIndex");
3463
3464 if (size < 12) {
3465 return -EINVAL;
3466 }
3467
3468 uint32_t flags;
3469 if (!mDataSource->getUInt32(offset, &flags)) {
3470 return ERROR_MALFORMED;
3471 }
3472
3473 uint32_t version = flags >> 24;
3474 flags &= 0xffffff;
3475
3476 ALOGV("sidx version %d", version);
3477
3478 uint32_t referenceId;
3479 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3480 return ERROR_MALFORMED;
3481 }
3482
3483 uint32_t timeScale;
3484 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3485 return ERROR_MALFORMED;
3486 }
3487 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3488 if (timeScale == 0)
3489 return ERROR_MALFORMED;
3490
3491 uint64_t earliestPresentationTime;
3492 uint64_t firstOffset;
3493
3494 offset += 12;
3495 size -= 12;
3496
3497 if (version == 0) {
3498 if (size < 8) {
3499 return -EINVAL;
3500 }
3501 uint32_t tmp;
3502 if (!mDataSource->getUInt32(offset, &tmp)) {
3503 return ERROR_MALFORMED;
3504 }
3505 earliestPresentationTime = tmp;
3506 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3507 return ERROR_MALFORMED;
3508 }
3509 firstOffset = tmp;
3510 offset += 8;
3511 size -= 8;
3512 } else {
3513 if (size < 16) {
3514 return -EINVAL;
3515 }
3516 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3517 return ERROR_MALFORMED;
3518 }
3519 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3520 return ERROR_MALFORMED;
3521 }
3522 offset += 16;
3523 size -= 16;
3524 }
3525 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3526
3527 if (size < 4) {
3528 return -EINVAL;
3529 }
3530
3531 uint16_t referenceCount;
3532 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3533 return ERROR_MALFORMED;
3534 }
3535 offset += 4;
3536 size -= 4;
3537 ALOGV("refcount: %d", referenceCount);
3538
3539 if (size < referenceCount * 12) {
3540 return -EINVAL;
3541 }
3542
3543 uint64_t total_duration = 0;
3544 for (unsigned int i = 0; i < referenceCount; i++) {
3545 uint32_t d1, d2, d3;
3546
3547 if (!mDataSource->getUInt32(offset, &d1) || // size
3548 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3549 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3550 return ERROR_MALFORMED;
3551 }
3552
3553 if (d1 & 0x80000000) {
3554 ALOGW("sub-sidx boxes not supported yet");
3555 }
3556 bool sap = d3 & 0x80000000;
3557 uint32_t saptype = (d3 >> 28) & 7;
3558 if (!sap || (saptype != 1 && saptype != 2)) {
3559 // type 1 and 2 are sync samples
3560 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3561 }
3562 total_duration += d2;
3563 offset += 12;
3564 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3565 SidxEntry se;
3566 se.mSize = d1 & 0x7fffffff;
3567 se.mDurationUs = 1000000LL * d2 / timeScale;
3568 mSidxEntries.add(se);
3569 }
3570
3571 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3572
3573 if (mLastTrack == NULL)
3574 return ERROR_MALFORMED;
3575
3576 int64_t metaDuration;
3577 if (!AMediaFormat_getInt64(mLastTrack->meta,
3578 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3579 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3580 }
3581 return OK;
3582 }
3583
parseQTMetaKey(off64_t offset,size_t size)3584 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3585 if (size < 8) {
3586 return ERROR_MALFORMED;
3587 }
3588
3589 uint32_t count;
3590 if (!mDataSource->getUInt32(offset + 4, &count)) {
3591 return ERROR_MALFORMED;
3592 }
3593
3594 if (mMetaKeyMap.size() > 0) {
3595 ALOGW("'keys' atom seen again, discarding existing entries");
3596 mMetaKeyMap.clear();
3597 }
3598
3599 off64_t keyOffset = offset + 8;
3600 off64_t stopOffset = offset + size;
3601 for (size_t i = 1; i <= count; i++) {
3602 if (keyOffset + 8 > stopOffset) {
3603 return ERROR_MALFORMED;
3604 }
3605
3606 uint32_t keySize;
3607 if (!mDataSource->getUInt32(keyOffset, &keySize)
3608 || keySize < 8
3609 || keyOffset + keySize > stopOffset) {
3610 return ERROR_MALFORMED;
3611 }
3612
3613 uint32_t type;
3614 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3615 || type != FOURCC("mdta")) {
3616 return ERROR_MALFORMED;
3617 }
3618
3619 keySize -= 8;
3620 keyOffset += 8;
3621
3622 auto keyData = heapbuffer<uint8_t>(keySize);
3623 if (keyData.get() == NULL) {
3624 return ERROR_MALFORMED;
3625 }
3626 if (mDataSource->readAt(
3627 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3628 return ERROR_MALFORMED;
3629 }
3630
3631 AString key((const char *)keyData.get(), keySize);
3632 mMetaKeyMap.add(i, key);
3633
3634 keyOffset += keySize;
3635 }
3636 return OK;
3637 }
3638
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3639 status_t MPEG4Extractor::parseQTMetaVal(
3640 int32_t keyId, off64_t offset, size_t size) {
3641 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3642 if (index < 0) {
3643 // corresponding key is not present, ignore
3644 return ERROR_MALFORMED;
3645 }
3646
3647 if (size <= 16) {
3648 return ERROR_MALFORMED;
3649 }
3650 uint32_t dataSize;
3651 if (!mDataSource->getUInt32(offset, &dataSize)
3652 || dataSize > size || dataSize <= 16) {
3653 return ERROR_MALFORMED;
3654 }
3655 uint32_t atomFourCC;
3656 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3657 || atomFourCC != FOURCC("data")) {
3658 return ERROR_MALFORMED;
3659 }
3660 uint32_t dataType;
3661 if (!mDataSource->getUInt32(offset + 8, &dataType)
3662 || ((dataType & 0xff000000) != 0)) {
3663 // not well-known type
3664 return ERROR_MALFORMED;
3665 }
3666
3667 dataSize -= 16;
3668 offset += 16;
3669
3670 if (dataType == 23 && dataSize >= 4) {
3671 // BE Float32
3672 uint32_t val;
3673 if (!mDataSource->getUInt32(offset, &val)) {
3674 return ERROR_MALFORMED;
3675 }
3676 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3677 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3678 }
3679 } else if (dataType == 67 && dataSize >= 4) {
3680 // BE signed int32
3681 uint32_t val;
3682 if (!mDataSource->getUInt32(offset, &val)) {
3683 return ERROR_MALFORMED;
3684 }
3685 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3686 AMediaFormat_setInt32(mFileMetaData,
3687 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3688 }
3689 } else {
3690 // add more keys if needed
3691 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3692 }
3693
3694 return OK;
3695 }
3696
parseTrackHeader(off64_t data_offset,off64_t data_size)3697 status_t MPEG4Extractor::parseTrackHeader(
3698 off64_t data_offset, off64_t data_size) {
3699 if (data_size < 4) {
3700 return ERROR_MALFORMED;
3701 }
3702
3703 uint8_t version;
3704 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3705 return ERROR_IO;
3706 }
3707
3708 size_t dynSize = (version == 1) ? 36 : 24;
3709
3710 uint8_t buffer[36 + 60];
3711
3712 if (data_size != (off64_t)dynSize + 60) {
3713 return ERROR_MALFORMED;
3714 }
3715
3716 if (mDataSource->readAt(
3717 data_offset, buffer, data_size) < (ssize_t)data_size) {
3718 return ERROR_IO;
3719 }
3720
3721 uint64_t ctime __unused, mtime __unused, duration __unused;
3722 int32_t id;
3723
3724 if (version == 1) {
3725 ctime = U64_AT(&buffer[4]);
3726 mtime = U64_AT(&buffer[12]);
3727 id = U32_AT(&buffer[20]);
3728 duration = U64_AT(&buffer[28]);
3729 } else if (version == 0) {
3730 ctime = U32_AT(&buffer[4]);
3731 mtime = U32_AT(&buffer[8]);
3732 id = U32_AT(&buffer[12]);
3733 duration = U32_AT(&buffer[20]);
3734 } else {
3735 return ERROR_UNSUPPORTED;
3736 }
3737
3738 if (mLastTrack == NULL)
3739 return ERROR_MALFORMED;
3740
3741 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3742
3743 size_t matrixOffset = dynSize + 16;
3744 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3745 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3746 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3747 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3748
3749 #if 0
3750 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3751 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3752
3753 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3754 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3755 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3756 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3757 #endif
3758
3759 uint32_t rotationDegrees;
3760
3761 static const int32_t kFixedOne = 0x10000;
3762 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3763 // Identity, no rotation
3764 rotationDegrees = 0;
3765 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3766 rotationDegrees = 90;
3767 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3768 rotationDegrees = 270;
3769 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3770 rotationDegrees = 180;
3771 } else {
3772 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3773 rotationDegrees = 0;
3774 }
3775
3776 if (rotationDegrees != 0) {
3777 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3778 }
3779
3780 // Handle presentation display size, which could be different
3781 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3782 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3783 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3784 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3785 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3786
3787 return OK;
3788 }
3789
parseITunesMetaData(off64_t offset,size_t size)3790 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3791 if (size == 0) {
3792 return OK;
3793 }
3794
3795 if (size < 4 || size == SIZE_MAX) {
3796 return ERROR_MALFORMED;
3797 }
3798
3799 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3800 if (buffer == NULL) {
3801 return ERROR_MALFORMED;
3802 }
3803 if (mDataSource->readAt(
3804 offset, buffer, size) != (ssize_t)size) {
3805 delete[] buffer;
3806 buffer = NULL;
3807
3808 return ERROR_IO;
3809 }
3810
3811 uint32_t flags = U32_AT(buffer);
3812
3813 const char *metadataKey = nullptr;
3814 char chunk[5];
3815 MakeFourCCString(mPath[4], chunk);
3816 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3817 switch ((int32_t)mPath[4]) {
3818 case FOURCC("\251alb"):
3819 {
3820 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
3821 break;
3822 }
3823 case FOURCC("\251ART"):
3824 {
3825 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
3826 break;
3827 }
3828 case FOURCC("aART"):
3829 {
3830 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
3831 break;
3832 }
3833 case FOURCC("\251day"):
3834 {
3835 metadataKey = AMEDIAFORMAT_KEY_YEAR;
3836 break;
3837 }
3838 case FOURCC("\251nam"):
3839 {
3840 metadataKey = AMEDIAFORMAT_KEY_TITLE;
3841 break;
3842 }
3843 case FOURCC("\251wrt"):
3844 {
3845 // various open source taggers agree that the "©wrt" tag is for composer, not writer
3846 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
3847 break;
3848 }
3849 case FOURCC("covr"):
3850 {
3851 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
3852 break;
3853 }
3854 case FOURCC("gnre"):
3855 case FOURCC("\251gen"):
3856 {
3857 metadataKey = AMEDIAFORMAT_KEY_GENRE;
3858 break;
3859 }
3860 case FOURCC("cpil"):
3861 {
3862 if (size == 9 && flags == 21) {
3863 char tmp[16];
3864 sprintf(tmp, "%d",
3865 (int)buffer[size - 1]);
3866
3867 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
3868 }
3869 break;
3870 }
3871 case FOURCC("trkn"):
3872 {
3873 if (size == 16 && flags == 0) {
3874 char tmp[16];
3875 uint16_t* pTrack = (uint16_t*)&buffer[10];
3876 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3877 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3878
3879 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3880 }
3881 break;
3882 }
3883 case FOURCC("disk"):
3884 {
3885 if ((size == 14 || size == 16) && flags == 0) {
3886 char tmp[16];
3887 uint16_t* pDisc = (uint16_t*)&buffer[10];
3888 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3889 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3890
3891 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
3892 }
3893 break;
3894 }
3895 case FOURCC("----"):
3896 {
3897 buffer[size] = '\0';
3898 switch (mPath[5]) {
3899 case FOURCC("mean"):
3900 mLastCommentMean.setTo((const char *)buffer + 4);
3901 break;
3902 case FOURCC("name"):
3903 mLastCommentName.setTo((const char *)buffer + 4);
3904 break;
3905 case FOURCC("data"):
3906 if (size < 8) {
3907 delete[] buffer;
3908 buffer = NULL;
3909 ALOGE("b/24346430");
3910 return ERROR_MALFORMED;
3911 }
3912 mLastCommentData.setTo((const char *)buffer + 8);
3913 break;
3914 }
3915
3916 // Once we have a set of mean/name/data info, go ahead and process
3917 // it to see if its something we are interested in. Whether or not
3918 // were are interested in the specific tag, make sure to clear out
3919 // the set so we can be ready to process another tuple should one
3920 // show up later in the file.
3921 if ((mLastCommentMean.length() != 0) &&
3922 (mLastCommentName.length() != 0) &&
3923 (mLastCommentData.length() != 0)) {
3924
3925 if (mLastCommentMean == "com.apple.iTunes"
3926 && mLastCommentName == "iTunSMPB") {
3927 int32_t delay, padding;
3928 if (sscanf(mLastCommentData,
3929 " %*x %x %x %*x", &delay, &padding) == 2) {
3930 if (mLastTrack == NULL) {
3931 delete[] buffer;
3932 return ERROR_MALFORMED;
3933 }
3934
3935 AMediaFormat_setInt32(mLastTrack->meta,
3936 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
3937 AMediaFormat_setInt32(mLastTrack->meta,
3938 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
3939 }
3940 }
3941
3942 mLastCommentMean.clear();
3943 mLastCommentName.clear();
3944 mLastCommentData.clear();
3945 }
3946 break;
3947 }
3948
3949 default:
3950 break;
3951 }
3952
3953 void *tmpData;
3954 size_t tmpDataSize;
3955 const char *s;
3956 if (size >= 8 && metadataKey &&
3957 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
3958 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
3959 if (!strcmp(metadataKey, "albumart")) {
3960 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
3961 buffer + 8, size - 8);
3962 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
3963 if (flags == 0) {
3964 // uint8_t genre code, iTunes genre codes are
3965 // the standard id3 codes, except they start
3966 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3967 // We use standard id3 numbering, so subtract 1.
3968 int genrecode = (int)buffer[size - 1];
3969 genrecode--;
3970 if (genrecode < 0) {
3971 genrecode = 255; // reserved for 'unknown genre'
3972 }
3973 char genre[10];
3974 sprintf(genre, "%d", genrecode);
3975
3976 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
3977 } else if (flags == 1) {
3978 // custom genre string
3979 buffer[size] = '\0';
3980
3981 AMediaFormat_setString(mFileMetaData,
3982 metadataKey, (const char *)buffer + 8);
3983 }
3984 } else {
3985 buffer[size] = '\0';
3986
3987 AMediaFormat_setString(mFileMetaData,
3988 metadataKey, (const char *)buffer + 8);
3989 }
3990 }
3991
3992 delete[] buffer;
3993 buffer = NULL;
3994
3995 return OK;
3996 }
3997
parseColorInfo(off64_t offset,size_t size)3998 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3999 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4000 return ERROR_MALFORMED;
4001 }
4002
4003 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4004 if (buffer == NULL) {
4005 return ERROR_MALFORMED;
4006 }
4007 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4008 delete[] buffer;
4009 buffer = NULL;
4010
4011 return ERROR_IO;
4012 }
4013
4014 int32_t type = U32_AT(&buffer[0]);
4015 if ((type == FOURCC("nclx") && size >= 11)
4016 || (type == FOURCC("nclc") && size >= 10)) {
4017 // only store the first color specification
4018 int32_t existingColor;
4019 if (!AMediaFormat_getInt32(mLastTrack->meta,
4020 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4021 int32_t primaries = U16_AT(&buffer[4]);
4022 int32_t isotransfer = U16_AT(&buffer[6]);
4023 int32_t coeffs = U16_AT(&buffer[8]);
4024 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4025
4026 int32_t range = 0;
4027 int32_t standard = 0;
4028 int32_t transfer = 0;
4029 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4030 primaries, isotransfer, coeffs, fullRange,
4031 &range, &standard, &transfer);
4032
4033 if (range != 0) {
4034 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4035 }
4036 if (standard != 0) {
4037 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4038 }
4039 if (transfer != 0) {
4040 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4041 }
4042 }
4043 }
4044
4045 delete[] buffer;
4046 buffer = NULL;
4047
4048 return OK;
4049 }
4050
parse3GPPMetaData(off64_t offset,size_t size,int depth)4051 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4052 if (size < 4 || size == SIZE_MAX) {
4053 return ERROR_MALFORMED;
4054 }
4055
4056 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4057 if (buffer == NULL) {
4058 return ERROR_MALFORMED;
4059 }
4060 if (mDataSource->readAt(
4061 offset, buffer, size) != (ssize_t)size) {
4062 delete[] buffer;
4063 buffer = NULL;
4064
4065 return ERROR_IO;
4066 }
4067
4068 const char *metadataKey = nullptr;
4069 switch (mPath[depth]) {
4070 case FOURCC("titl"):
4071 {
4072 metadataKey = "title";
4073 break;
4074 }
4075 case FOURCC("perf"):
4076 {
4077 metadataKey = "artist";
4078 break;
4079 }
4080 case FOURCC("auth"):
4081 {
4082 metadataKey = "writer";
4083 break;
4084 }
4085 case FOURCC("gnre"):
4086 {
4087 metadataKey = "genre";
4088 break;
4089 }
4090 case FOURCC("albm"):
4091 {
4092 if (buffer[size - 1] != '\0') {
4093 char tmp[4];
4094 sprintf(tmp, "%u", buffer[size - 1]);
4095
4096 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4097 }
4098
4099 metadataKey = "album";
4100 break;
4101 }
4102 case FOURCC("yrrc"):
4103 {
4104 if (size < 6) {
4105 delete[] buffer;
4106 buffer = NULL;
4107 ALOGE("b/62133227");
4108 android_errorWriteLog(0x534e4554, "62133227");
4109 return ERROR_MALFORMED;
4110 }
4111 char tmp[5];
4112 uint16_t year = U16_AT(&buffer[4]);
4113
4114 if (year < 10000) {
4115 sprintf(tmp, "%u", year);
4116
4117 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4118 }
4119 break;
4120 }
4121
4122 default:
4123 break;
4124 }
4125
4126 if (metadataKey) {
4127 bool isUTF8 = true; // Common case
4128 char16_t *framedata = NULL;
4129 int len16 = 0; // Number of UTF-16 characters
4130
4131 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4132 if (size < 6) {
4133 delete[] buffer;
4134 buffer = NULL;
4135 return ERROR_MALFORMED;
4136 }
4137
4138 if (size - 6 >= 4) {
4139 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4140 framedata = (char16_t *)(buffer + 6);
4141 if (0xfffe == *framedata) {
4142 // endianness marker (BOM) doesn't match host endianness
4143 for (int i = 0; i < len16; i++) {
4144 framedata[i] = bswap_16(framedata[i]);
4145 }
4146 // BOM is now swapped to 0xfeff, we will execute next block too
4147 }
4148
4149 if (0xfeff == *framedata) {
4150 // Remove the BOM
4151 framedata++;
4152 len16--;
4153 isUTF8 = false;
4154 }
4155 // else normal non-zero-length UTF-8 string
4156 // we can't handle UTF-16 without BOM as there is no other
4157 // indication of encoding.
4158 }
4159
4160 if (isUTF8) {
4161 buffer[size] = 0;
4162 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4163 } else {
4164 // Convert from UTF-16 string to UTF-8 string.
4165 String8 tmpUTF8str(framedata, len16);
4166 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4167 }
4168 }
4169
4170 delete[] buffer;
4171 buffer = NULL;
4172
4173 return OK;
4174 }
4175
parseID3v2MetaData(off64_t offset,uint64_t size)4176 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4177 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4178 if (buffer == NULL) {
4179 return;
4180 }
4181 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4182 delete[] buffer;
4183 buffer = NULL;
4184 return;
4185 }
4186
4187 ID3 id3(buffer, size, true /* ignorev1 */);
4188 delete[] buffer;
4189
4190 if (id3.isValid()) {
4191 struct Map {
4192 const char *key;
4193 const char *tag1;
4194 const char *tag2;
4195 };
4196 static const Map kMap[] = {
4197 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4198 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4199 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4200 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4201 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4202 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4203 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4204 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4205 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4206 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4207 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4208 };
4209 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4210
4211 for (size_t i = 0; i < kNumMapEntries; ++i) {
4212 const char *ss;
4213 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4214 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4215 if (it->done()) {
4216 delete it;
4217 it = new ID3::Iterator(id3, kMap[i].tag2);
4218 }
4219
4220 if (it->done()) {
4221 delete it;
4222 continue;
4223 }
4224
4225 String8 s;
4226 it->getString(&s);
4227 delete it;
4228
4229 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4230 }
4231 }
4232
4233 size_t dataSize;
4234 String8 mime;
4235 const void *data = id3.getAlbumArt(&dataSize, &mime);
4236
4237 if (data) {
4238 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4239 }
4240 }
4241 }
4242
getTrack(size_t index)4243 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4244 status_t err;
4245 if ((err = readMetaData()) != OK) {
4246 return NULL;
4247 }
4248
4249 Track *track = mFirstTrack;
4250 while (index > 0) {
4251 if (track == NULL) {
4252 return NULL;
4253 }
4254
4255 track = track->next;
4256 --index;
4257 }
4258
4259 if (track == NULL) {
4260 return NULL;
4261 }
4262
4263
4264 Trex *trex = NULL;
4265 int32_t trackId;
4266 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4267 for (size_t i = 0; i < mTrex.size(); i++) {
4268 Trex *t = &mTrex.editItemAt(i);
4269 if (t->track_ID == (uint32_t) trackId) {
4270 trex = t;
4271 break;
4272 }
4273 }
4274 } else {
4275 ALOGE("b/21657957");
4276 return NULL;
4277 }
4278
4279 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4280
4281 const char *mime;
4282 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4283 return NULL;
4284 }
4285
4286 sp<ItemTable> itemTable;
4287 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4288 void *data;
4289 size_t size;
4290 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4291 return NULL;
4292 }
4293
4294 const uint8_t *ptr = (const uint8_t *)data;
4295
4296 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4297 return NULL;
4298 }
4299 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4300 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4301 void *data;
4302 size_t size;
4303 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4304 return NULL;
4305 }
4306
4307 const uint8_t *ptr = (const uint8_t *)data;
4308
4309 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4310 return NULL;
4311 }
4312 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4313 itemTable = mItemTable;
4314 }
4315 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4316 void *data;
4317 size_t size;
4318 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4319 return NULL;
4320 }
4321
4322 const uint8_t *ptr = (const uint8_t *)data;
4323
4324 // dv_major.dv_minor Should be 1.0 or 2.1
4325 if (size != 24 || ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1))) {
4326 return NULL;
4327 }
4328 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4329 void *data;
4330 size_t size;
4331 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4332 return NULL;
4333 }
4334
4335 const uint8_t *ptr = (const uint8_t *)data;
4336
4337 if (size < 5 || ptr[0] != 0x81) { // configurationVersion == 1
4338 return NULL;
4339 }
4340 }
4341
4342 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4343
4344 uint64_t elst_initial_empty_edit_ticks = 0;
4345 if (mHeaderTimescale != 0) {
4346 // Convert empty_edit_ticks from movie timescale to media timescale.
4347 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4348 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4349 &elst_initial_empty_edit_ticks_mul) ||
4350 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4351 &elst_initial_empty_edit_ticks_add)) {
4352 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4353 return nullptr;
4354 }
4355 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4356 }
4357 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4358 elst_initial_empty_edit_ticks);
4359
4360 MPEG4Source* source =
4361 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4362 mSidxEntries, trex, mMoofOffset, itemTable,
4363 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4364 if (source->init() != OK) {
4365 delete source;
4366 return NULL;
4367 }
4368 return source;
4369 }
4370
4371 // static
verifyTrack(Track * track)4372 status_t MPEG4Extractor::verifyTrack(Track *track) {
4373 const char *mime;
4374 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4375
4376 void *data;
4377 size_t size;
4378 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4379 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4380 return ERROR_MALFORMED;
4381 }
4382 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4383 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4384 return ERROR_MALFORMED;
4385 }
4386 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4387 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4388 return ERROR_MALFORMED;
4389 }
4390 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4391 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4392 return ERROR_MALFORMED;
4393 }
4394 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4395 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4396 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4397 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4398 return ERROR_MALFORMED;
4399 }
4400 }
4401
4402 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4403 // Make sure we have all the metadata we need.
4404 ALOGE("stbl atom missing/invalid.");
4405 return ERROR_MALFORMED;
4406 }
4407
4408 if (track->timescale == 0) {
4409 ALOGE("timescale invalid.");
4410 return ERROR_MALFORMED;
4411 }
4412
4413 return OK;
4414 }
4415
4416 typedef enum {
4417 //AOT_NONE = -1,
4418 //AOT_NULL_OBJECT = 0,
4419 //AOT_AAC_MAIN = 1, /**< Main profile */
4420 AOT_AAC_LC = 2, /**< Low Complexity object */
4421 //AOT_AAC_SSR = 3,
4422 //AOT_AAC_LTP = 4,
4423 AOT_SBR = 5,
4424 //AOT_AAC_SCAL = 6,
4425 //AOT_TWIN_VQ = 7,
4426 //AOT_CELP = 8,
4427 //AOT_HVXC = 9,
4428 //AOT_RSVD_10 = 10, /**< (reserved) */
4429 //AOT_RSVD_11 = 11, /**< (reserved) */
4430 //AOT_TTSI = 12, /**< TTSI Object */
4431 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4432 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4433 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4434 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4435 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4436 //AOT_RSVD_18 = 18, /**< (reserved) */
4437 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4438 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4439 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4440 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4441 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4442 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4443 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4444 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4445 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4446 //AOT_RSVD_28 = 28, /**< might become SSC */
4447 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4448 //AOT_MPEGS = 30, /**< MPEG Surround */
4449
4450 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4451
4452 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4453 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4454 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4455 //AOT_RSVD_35 = 35, /**< might become DST */
4456 //AOT_RSVD_36 = 36, /**< might become ALS */
4457 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4458 //AOT_SLS = 38, /**< SLS */
4459 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4460
4461 AOT_USAC = 42, /**< USAC */
4462 //AOT_SAOC = 43, /**< SAOC */
4463 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4464
4465 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4466 } AUDIO_OBJECT_TYPE;
4467
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4468 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4469 const void *esds_data, size_t esds_size) {
4470 ESDS esds(esds_data, esds_size);
4471
4472 uint8_t objectTypeIndication;
4473 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4474 return ERROR_MALFORMED;
4475 }
4476
4477 if (objectTypeIndication == 0xe1) {
4478 // This isn't MPEG4 audio at all, it's QCELP 14k...
4479 if (mLastTrack == NULL)
4480 return ERROR_MALFORMED;
4481
4482 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4483 return OK;
4484 }
4485
4486 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4487 // mp3 audio
4488 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4489 return OK;
4490 }
4491
4492 if (mLastTrack != NULL) {
4493 uint32_t maxBitrate = 0;
4494 uint32_t avgBitrate = 0;
4495 esds.getBitRate(&maxBitrate, &avgBitrate);
4496 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4497 AMediaFormat_setInt32(mLastTrack->meta,
4498 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4499 }
4500 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4501 AMediaFormat_setInt32(mLastTrack->meta,
4502 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4503 }
4504 }
4505
4506 const uint8_t *csd;
4507 size_t csd_size;
4508 if (esds.getCodecSpecificInfo(
4509 (const void **)&csd, &csd_size) != OK) {
4510 return ERROR_MALFORMED;
4511 }
4512
4513 if (kUseHexDump) {
4514 printf("ESD of size %zu\n", csd_size);
4515 hexdump(csd, csd_size);
4516 }
4517
4518 if (csd_size == 0) {
4519 // There's no further information, i.e. no codec specific data
4520 // Let's assume that the information provided in the mpeg4 headers
4521 // is accurate and hope for the best.
4522
4523 return OK;
4524 }
4525
4526 if (csd_size < 2) {
4527 return ERROR_MALFORMED;
4528 }
4529
4530 if (objectTypeIndication == 0xdd) {
4531 // vorbis audio
4532 if (csd[0] != 0x02) {
4533 return ERROR_MALFORMED;
4534 }
4535
4536 // codecInfo starts with two lengths, len1 and len2, that are
4537 // "Xiph-style-lacing encoded"..
4538
4539 size_t offset = 1;
4540 size_t len1 = 0;
4541 while (offset < csd_size && csd[offset] == 0xff) {
4542 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4543 return ERROR_MALFORMED;
4544 }
4545 ++offset;
4546 }
4547 if (offset >= csd_size) {
4548 return ERROR_MALFORMED;
4549 }
4550 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4551 return ERROR_MALFORMED;
4552 }
4553 ++offset;
4554 if (len1 == 0) {
4555 return ERROR_MALFORMED;
4556 }
4557
4558 size_t len2 = 0;
4559 while (offset < csd_size && csd[offset] == 0xff) {
4560 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4561 return ERROR_MALFORMED;
4562 }
4563 ++offset;
4564 }
4565 if (offset >= csd_size) {
4566 return ERROR_MALFORMED;
4567 }
4568 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4569 return ERROR_MALFORMED;
4570 }
4571 ++offset;
4572 if (len2 == 0) {
4573 return ERROR_MALFORMED;
4574 }
4575 if (offset >= csd_size || csd[offset] != 0x01) {
4576 return ERROR_MALFORMED;
4577 }
4578 // formerly kKeyVorbisInfo
4579 AMediaFormat_setBuffer(mLastTrack->meta,
4580 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4581
4582 if (__builtin_add_overflow(offset, len1, &offset) ||
4583 offset >= csd_size || csd[offset] != 0x03) {
4584 return ERROR_MALFORMED;
4585 }
4586
4587 if (__builtin_add_overflow(offset, len2, &offset) ||
4588 offset >= csd_size || csd[offset] != 0x05) {
4589 return ERROR_MALFORMED;
4590 }
4591
4592 // formerly kKeyVorbisBooks
4593 AMediaFormat_setBuffer(mLastTrack->meta,
4594 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4595 AMediaFormat_setString(mLastTrack->meta,
4596 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4597
4598 return OK;
4599 }
4600
4601 static uint32_t kSamplingRate[] = {
4602 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4603 16000, 12000, 11025, 8000, 7350
4604 };
4605
4606 ABitReader br(csd, csd_size);
4607 uint32_t objectType = br.getBits(5);
4608
4609 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4610 objectType = 32 + br.getBits(6);
4611 }
4612
4613 if (mLastTrack == NULL)
4614 return ERROR_MALFORMED;
4615
4616 //keep AOT type
4617 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4618
4619 uint32_t freqIndex = br.getBits(4);
4620
4621 int32_t sampleRate = 0;
4622 int32_t numChannels = 0;
4623 if (freqIndex == 15) {
4624 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4625 sampleRate = br.getBits(24);
4626 numChannels = br.getBits(4);
4627 } else {
4628 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4629 numChannels = br.getBits(4);
4630
4631 if (freqIndex == 13 || freqIndex == 14) {
4632 return ERROR_MALFORMED;
4633 }
4634
4635 sampleRate = kSamplingRate[freqIndex];
4636 }
4637
4638 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4639 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4640 uint32_t extFreqIndex = br.getBits(4);
4641 int32_t extSampleRate __unused;
4642 if (extFreqIndex == 15) {
4643 if (csd_size < 8) {
4644 return ERROR_MALFORMED;
4645 }
4646 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4647 extSampleRate = br.getBits(24);
4648 } else {
4649 if (extFreqIndex == 13 || extFreqIndex == 14) {
4650 return ERROR_MALFORMED;
4651 }
4652 extSampleRate = kSamplingRate[extFreqIndex];
4653 }
4654 //TODO: save the extension sampling rate value in meta data =>
4655 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4656 }
4657
4658 switch (numChannels) {
4659 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4660 case 0:
4661 case 1:// FC
4662 case 2:// FL FR
4663 case 3:// FC, FL FR
4664 case 4:// FC, FL FR, RC
4665 case 5:// FC, FL FR, SL SR
4666 case 6:// FC, FL FR, SL SR, LFE
4667 //numChannels already contains the right value
4668 break;
4669 case 11:// FC, FL FR, SL SR, RC, LFE
4670 numChannels = 7;
4671 break;
4672 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4673 case 12:// FC, FL FR, SL SR, RL RR, LFE
4674 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4675 numChannels = 8;
4676 break;
4677 default:
4678 return ERROR_UNSUPPORTED;
4679 }
4680
4681 {
4682 if (objectType == AOT_SBR || objectType == AOT_PS) {
4683 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4684 objectType = br.getBits(5);
4685
4686 if (objectType == AOT_ESCAPE) {
4687 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4688 objectType = 32 + br.getBits(6);
4689 }
4690 }
4691 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4692 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4693 objectType == AOT_ER_BSAC) {
4694 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4695 const int32_t frameLengthFlag __unused = br.getBits(1);
4696
4697 const int32_t dependsOnCoreCoder = br.getBits(1);
4698
4699 if (dependsOnCoreCoder ) {
4700 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4701 const int32_t coreCoderDelay __unused = br.getBits(14);
4702 }
4703
4704 int32_t extensionFlag = -1;
4705 if (br.numBitsLeft() > 0) {
4706 extensionFlag = br.getBits(1);
4707 } else {
4708 switch (objectType) {
4709 // 14496-3 4.5.1.1 extensionFlag
4710 case AOT_AAC_LC:
4711 extensionFlag = 0;
4712 break;
4713 case AOT_ER_AAC_LC:
4714 case AOT_ER_AAC_SCAL:
4715 case AOT_ER_BSAC:
4716 case AOT_ER_AAC_LD:
4717 extensionFlag = 1;
4718 break;
4719 default:
4720 return ERROR_MALFORMED;
4721 break;
4722 }
4723 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4724 extensionFlag, objectType);
4725 }
4726
4727 if (numChannels == 0) {
4728 int32_t channelsEffectiveNum = 0;
4729 int32_t channelsNum = 0;
4730 if (br.numBitsLeft() < 32) {
4731 return ERROR_MALFORMED;
4732 }
4733 const int32_t ElementInstanceTag __unused = br.getBits(4);
4734 const int32_t Profile __unused = br.getBits(2);
4735 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
4736 const int32_t NumFrontChannelElements = br.getBits(4);
4737 const int32_t NumSideChannelElements = br.getBits(4);
4738 const int32_t NumBackChannelElements = br.getBits(4);
4739 const int32_t NumLfeChannelElements = br.getBits(2);
4740 const int32_t NumAssocDataElements __unused = br.getBits(3);
4741 const int32_t NumValidCcElements __unused = br.getBits(4);
4742
4743 const int32_t MonoMixdownPresent = br.getBits(1);
4744
4745 if (MonoMixdownPresent != 0) {
4746 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4747 const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
4748 }
4749
4750 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4751 const int32_t StereoMixdownPresent = br.getBits(1);
4752 if (StereoMixdownPresent != 0) {
4753 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4754 const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
4755 }
4756
4757 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4758 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4759 if (MatrixMixdownIndexPresent != 0) {
4760 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4761 const int32_t MatrixMixdownIndex __unused = br.getBits(2);
4762 const int32_t PseudoSurroundEnable __unused = br.getBits(1);
4763 }
4764
4765 int i;
4766 for (i=0; i < NumFrontChannelElements; i++) {
4767 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4768 const int32_t FrontElementIsCpe = br.getBits(1);
4769 const int32_t FrontElementTagSelect __unused = br.getBits(4);
4770 channelsNum += FrontElementIsCpe ? 2 : 1;
4771 }
4772
4773 for (i=0; i < NumSideChannelElements; i++) {
4774 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4775 const int32_t SideElementIsCpe = br.getBits(1);
4776 const int32_t SideElementTagSelect __unused = br.getBits(4);
4777 channelsNum += SideElementIsCpe ? 2 : 1;
4778 }
4779
4780 for (i=0; i < NumBackChannelElements; i++) {
4781 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4782 const int32_t BackElementIsCpe = br.getBits(1);
4783 const int32_t BackElementTagSelect __unused = br.getBits(4);
4784 channelsNum += BackElementIsCpe ? 2 : 1;
4785 }
4786 channelsEffectiveNum = channelsNum;
4787
4788 for (i=0; i < NumLfeChannelElements; i++) {
4789 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4790 const int32_t LfeElementTagSelect __unused = br.getBits(4);
4791 channelsNum += 1;
4792 }
4793 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4794 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4795 numChannels = channelsNum;
4796 }
4797 }
4798 }
4799
4800 if (numChannels == 0) {
4801 return ERROR_UNSUPPORTED;
4802 }
4803
4804 if (mLastTrack == NULL)
4805 return ERROR_MALFORMED;
4806
4807 int32_t prevSampleRate;
4808 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
4809
4810 if (prevSampleRate != sampleRate) {
4811 ALOGV("mpeg4 audio sample rate different from previous setting. "
4812 "was: %d, now: %d", prevSampleRate, sampleRate);
4813 }
4814
4815 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
4816
4817 int32_t prevChannelCount;
4818 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
4819 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
4820
4821 if (prevChannelCount != numChannels) {
4822 ALOGV("mpeg4 audio channel count different from previous setting. "
4823 "was: %d, now: %d", prevChannelCount, numChannels);
4824 }
4825
4826 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
4827
4828 return OK;
4829 }
4830
adjustRawDefaultFrameSize()4831 void MPEG4Extractor::adjustRawDefaultFrameSize() {
4832 int32_t chanCount = 0;
4833 int32_t bitWidth = 0;
4834 const char *mimeStr = NULL;
4835
4836 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
4837 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
4838 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
4839 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
4840 // samplesize in stsz may not right , so updade default samplesize
4841 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
4842 }
4843 }
4844
4845 ////////////////////////////////////////////////////////////////////////////////
4846
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)4847 MPEG4Source::MPEG4Source(
4848 AMediaFormat *format,
4849 DataSourceHelper *dataSource,
4850 int32_t timeScale,
4851 const sp<SampleTable> &sampleTable,
4852 Vector<SidxEntry> &sidx,
4853 const Trex *trex,
4854 off64_t firstMoofOffset,
4855 const sp<ItemTable> &itemTable,
4856 uint64_t elstShiftStartTicks,
4857 uint64_t elstInitialEmptyEditTicks)
4858 : mFormat(format),
4859 mDataSource(dataSource),
4860 mTimescale(timeScale),
4861 mSampleTable(sampleTable),
4862 mCurrentSampleIndex(0),
4863 mCurrentFragmentIndex(0),
4864 mSegments(sidx),
4865 mTrex(trex),
4866 mFirstMoofOffset(firstMoofOffset),
4867 mCurrentMoofOffset(firstMoofOffset),
4868 mCurrentMoofSize(0),
4869 mNextMoofOffset(-1),
4870 mCurrentTime(0),
4871 mDefaultEncryptedByteBlock(0),
4872 mDefaultSkipByteBlock(0),
4873 mCurrentSampleInfoAllocSize(0),
4874 mCurrentSampleInfoSizes(NULL),
4875 mCurrentSampleInfoOffsetsAllocSize(0),
4876 mCurrentSampleInfoOffsets(NULL),
4877 mIsAVC(false),
4878 mIsHEVC(false),
4879 mIsDolbyVision(false),
4880 mIsAC4(false),
4881 mIsPcm(false),
4882 mNALLengthSize(0),
4883 mStarted(false),
4884 mBuffer(NULL),
4885 mSrcBuffer(NULL),
4886 mIsHeif(itemTable != NULL),
4887 mItemTable(itemTable),
4888 mElstShiftStartTicks(elstShiftStartTicks),
4889 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
4890
4891 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
4892
4893 AMediaFormat_getInt32(mFormat,
4894 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
4895 mDefaultIVSize = 0;
4896 AMediaFormat_getInt32(mFormat,
4897 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
4898 void *key;
4899 size_t keysize;
4900 if (AMediaFormat_getBuffer(mFormat,
4901 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
4902 CHECK(keysize <= 16);
4903 memset(mCryptoKey, 0, 16);
4904 memcpy(mCryptoKey, key, keysize);
4905 }
4906
4907 AMediaFormat_getInt32(mFormat,
4908 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
4909 AMediaFormat_getInt32(mFormat,
4910 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
4911
4912 const char *mime;
4913 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
4914 CHECK(success);
4915
4916 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
4917 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
4918 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
4919 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
4920 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
4921
4922 if (mIsAVC) {
4923 void *data;
4924 size_t size;
4925 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4926
4927 const uint8_t *ptr = (const uint8_t *)data;
4928
4929 CHECK(size >= 7);
4930 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4931
4932 // The number of bytes used to encode the length of a NAL unit.
4933 mNALLengthSize = 1 + (ptr[4] & 3);
4934 } else if (mIsHEVC) {
4935 void *data;
4936 size_t size;
4937 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4938
4939 const uint8_t *ptr = (const uint8_t *)data;
4940
4941 CHECK(size >= 22);
4942 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4943
4944 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4945 } else if (mIsDolbyVision) {
4946 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
4947 void *data;
4948 size_t size;
4949 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
4950
4951 const uint8_t *ptr = (const uint8_t *)data;
4952
4953 CHECK(size == 24);
4954
4955 // dv_major.dv_minor Should be 1.0 or 2.1
4956 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
4957
4958 const uint8_t profile = ptr[2] >> 1;
4959 // profile == (unknown,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
4960 // profile == (10) --> AV1
4961 if (profile > 1 && profile < 9) {
4962 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4963
4964 const uint8_t *ptr = (const uint8_t *)data;
4965
4966 CHECK(size >= 22);
4967 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4968
4969 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4970 } else if (10 == profile) {
4971 /* AV1 profile nothing to do */
4972 } else {
4973 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4974 const uint8_t *ptr = (const uint8_t *)data;
4975
4976 CHECK(size >= 7);
4977 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4978 // The number of bytes used to encode the length of a NAL unit.
4979 mNALLengthSize = 1 + (ptr[4] & 3);
4980 }
4981 }
4982
4983 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
4984 mIsAudio = !strncasecmp(mime, "audio/", 6);
4985
4986 int32_t aacObjectType = -1;
4987
4988 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
4989 mIsUsac = (aacObjectType == AOT_USAC);
4990 }
4991
4992 if (mIsPcm) {
4993 int32_t numChannels = 0;
4994 int32_t bitsPerSample = 0;
4995 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
4996 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
4997
4998 int32_t bytesPerSample = bitsPerSample >> 3;
4999 int32_t pcmSampleSize = bytesPerSample * numChannels;
5000
5001 size_t maxSampleSize;
5002 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5003 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5004 || bitsPerSample != 16) {
5005 // Not supported
5006 mIsPcm = false;
5007 } else {
5008 AMediaFormat_setInt32(mFormat,
5009 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5010 }
5011 }
5012
5013 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5014 }
5015
init()5016 status_t MPEG4Source::init() {
5017 if (mFirstMoofOffset != 0) {
5018 off64_t offset = mFirstMoofOffset;
5019 return parseChunk(&offset);
5020 }
5021 return OK;
5022 }
5023
~MPEG4Source()5024 MPEG4Source::~MPEG4Source() {
5025 if (mStarted) {
5026 stop();
5027 }
5028 free(mCurrentSampleInfoSizes);
5029 free(mCurrentSampleInfoOffsets);
5030 }
5031
start()5032 media_status_t MPEG4Source::start() {
5033 Mutex::Autolock autoLock(mLock);
5034
5035 CHECK(!mStarted);
5036
5037 int32_t tmp;
5038 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5039 size_t max_size = tmp;
5040
5041 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5042 // If you see the message below for a valid input stream: increase the limit
5043 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5044 if (max_size > kMaxBufferSize) {
5045 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5046 return AMEDIA_ERROR_MALFORMED;
5047 }
5048 if (max_size == 0) {
5049 ALOGE("zero max input size");
5050 return AMEDIA_ERROR_MALFORMED;
5051 }
5052
5053 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5054 const size_t kInitialBuffers = 2;
5055 const size_t kMaxBuffers = 8;
5056 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5057 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5058 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5059 if (mSrcBuffer == NULL) {
5060 // file probably specified a bad max size
5061 return AMEDIA_ERROR_MALFORMED;
5062 }
5063
5064 mStarted = true;
5065
5066 return AMEDIA_OK;
5067 }
5068
stop()5069 media_status_t MPEG4Source::stop() {
5070 Mutex::Autolock autoLock(mLock);
5071
5072 CHECK(mStarted);
5073
5074 if (mBuffer != NULL) {
5075 mBuffer->release();
5076 mBuffer = NULL;
5077 }
5078
5079 delete[] mSrcBuffer;
5080 mSrcBuffer = NULL;
5081
5082 mStarted = false;
5083 mCurrentSampleIndex = 0;
5084
5085 return AMEDIA_OK;
5086 }
5087
parseChunk(off64_t * offset)5088 status_t MPEG4Source::parseChunk(off64_t *offset) {
5089 uint32_t hdr[2];
5090 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5091 return ERROR_IO;
5092 }
5093 uint64_t chunk_size = ntohl(hdr[0]);
5094 uint32_t chunk_type = ntohl(hdr[1]);
5095 off64_t data_offset = *offset + 8;
5096
5097 if (chunk_size == 1) {
5098 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5099 return ERROR_IO;
5100 }
5101 chunk_size = ntoh64(chunk_size);
5102 data_offset += 8;
5103
5104 if (chunk_size < 16) {
5105 // The smallest valid chunk is 16 bytes long in this case.
5106 return ERROR_MALFORMED;
5107 }
5108 } else if (chunk_size < 8) {
5109 // The smallest valid chunk is 8 bytes long.
5110 return ERROR_MALFORMED;
5111 }
5112
5113 char chunk[5];
5114 MakeFourCCString(chunk_type, chunk);
5115 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5116
5117 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5118
5119 switch(chunk_type) {
5120
5121 case FOURCC("traf"):
5122 case FOURCC("moof"): {
5123 off64_t stop_offset = *offset + chunk_size;
5124 *offset = data_offset;
5125 if (chunk_type == FOURCC("moof")) {
5126 mCurrentMoofSize = chunk_data_size;
5127 }
5128 while (*offset < stop_offset) {
5129 status_t err = parseChunk(offset);
5130 if (err != OK) {
5131 return err;
5132 }
5133 }
5134 if (chunk_type == FOURCC("moof")) {
5135 // *offset points to the box following this moof. Find the next moof from there.
5136
5137 while (true) {
5138 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5139 // no more box to the end of file.
5140 break;
5141 }
5142 chunk_size = ntohl(hdr[0]);
5143 chunk_type = ntohl(hdr[1]);
5144 if (chunk_size == 1) {
5145 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5146 // which is defined in 4.2 Object Structure.
5147 // When chunk_size==1, 8 bytes follows as "largesize".
5148 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5149 return ERROR_IO;
5150 }
5151 chunk_size = ntoh64(chunk_size);
5152 if (chunk_size < 16) {
5153 // The smallest valid chunk is 16 bytes long in this case.
5154 return ERROR_MALFORMED;
5155 }
5156 } else if (chunk_size == 0) {
5157 // next box extends to end of file.
5158 } else if (chunk_size < 8) {
5159 // The smallest valid chunk is 8 bytes long in this case.
5160 return ERROR_MALFORMED;
5161 }
5162
5163 if (chunk_type == FOURCC("moof")) {
5164 mNextMoofOffset = *offset;
5165 break;
5166 } else if (chunk_type == FOURCC("mdat")) {
5167 parseChunk(offset);
5168 continue;
5169 } else if (chunk_size == 0) {
5170 break;
5171 }
5172 *offset += chunk_size;
5173 }
5174 }
5175 break;
5176 }
5177
5178 case FOURCC("tfhd"): {
5179 status_t err;
5180 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5181 return err;
5182 }
5183 *offset += chunk_size;
5184 break;
5185 }
5186
5187 case FOURCC("trun"): {
5188 status_t err;
5189 if (mLastParsedTrackId == mTrackId) {
5190 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5191 return err;
5192 }
5193 }
5194
5195 *offset += chunk_size;
5196 break;
5197 }
5198
5199 case FOURCC("saiz"): {
5200 status_t err;
5201 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5202 return err;
5203 }
5204 *offset += chunk_size;
5205 break;
5206 }
5207 case FOURCC("saio"): {
5208 status_t err;
5209 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5210 != OK) {
5211 return err;
5212 }
5213 *offset += chunk_size;
5214 break;
5215 }
5216
5217 case FOURCC("senc"): {
5218 status_t err;
5219 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5220 return err;
5221 }
5222 *offset += chunk_size;
5223 break;
5224 }
5225
5226 case FOURCC("mdat"): {
5227 // parse DRM info if present
5228 ALOGV("MPEG4Source::parseChunk mdat");
5229 // if saiz/saoi was previously observed, do something with the sampleinfos
5230 status_t err = OK;
5231 auto kv = mDrmOffsets.lower_bound(*offset);
5232 if (kv != mDrmOffsets.end()) {
5233 auto drmoffset = kv->first;
5234 auto flags = kv->second;
5235 mDrmOffsets.erase(kv);
5236 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5237 chunk_size, drmoffset, *offset);
5238 if (chunk_size >= drmoffset - *offset) {
5239 err = parseClearEncryptedSizes(drmoffset, false, flags,
5240 chunk_size - (drmoffset - *offset));
5241 }
5242 }
5243 if (err != OK) {
5244 return err;
5245 }
5246 *offset += chunk_size;
5247 break;
5248 }
5249
5250 default: {
5251 *offset += chunk_size;
5252 break;
5253 }
5254 }
5255 return OK;
5256 }
5257
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5258 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5259 off64_t offset, off64_t size) {
5260 ALOGV("parseSampleAuxiliaryInformationSizes");
5261 if (size < 9) {
5262 return -EINVAL;
5263 }
5264 // 14496-12 8.7.12
5265 uint8_t version;
5266 if (mDataSource->readAt(
5267 offset, &version, sizeof(version))
5268 < (ssize_t)sizeof(version)) {
5269 return ERROR_IO;
5270 }
5271
5272 if (version != 0) {
5273 return ERROR_UNSUPPORTED;
5274 }
5275 offset++;
5276 size--;
5277
5278 uint32_t flags;
5279 if (!mDataSource->getUInt24(offset, &flags)) {
5280 return ERROR_IO;
5281 }
5282 offset += 3;
5283 size -= 3;
5284
5285 if (flags & 1) {
5286 if (size < 13) {
5287 return -EINVAL;
5288 }
5289 uint32_t tmp;
5290 if (!mDataSource->getUInt32(offset, &tmp)) {
5291 return ERROR_MALFORMED;
5292 }
5293 mCurrentAuxInfoType = tmp;
5294 offset += 4;
5295 size -= 4;
5296 if (!mDataSource->getUInt32(offset, &tmp)) {
5297 return ERROR_MALFORMED;
5298 }
5299 mCurrentAuxInfoTypeParameter = tmp;
5300 offset += 4;
5301 size -= 4;
5302 }
5303
5304 uint8_t defsize;
5305 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5306 return ERROR_MALFORMED;
5307 }
5308 mCurrentDefaultSampleInfoSize = defsize;
5309 offset++;
5310 size--;
5311
5312 uint32_t smplcnt;
5313 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5314 return ERROR_MALFORMED;
5315 }
5316 mCurrentSampleInfoCount = smplcnt;
5317 offset += 4;
5318 size -= 4;
5319 if (mCurrentDefaultSampleInfoSize != 0) {
5320 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5321 return OK;
5322 }
5323 if(smplcnt > size) {
5324 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5325 android_errorWriteLog(0x534e4554, "124525515");
5326 return -EINVAL;
5327 }
5328 if (smplcnt > mCurrentSampleInfoAllocSize) {
5329 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5330 if (newPtr == NULL) {
5331 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5332 return NO_MEMORY;
5333 }
5334 mCurrentSampleInfoSizes = newPtr;
5335 mCurrentSampleInfoAllocSize = smplcnt;
5336 }
5337
5338 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5339 return OK;
5340 }
5341
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5342 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5343 off64_t offset, off64_t size) {
5344 ALOGV("parseSampleAuxiliaryInformationOffsets");
5345 if (size < 8) {
5346 return -EINVAL;
5347 }
5348 // 14496-12 8.7.13
5349 uint8_t version;
5350 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5351 return ERROR_IO;
5352 }
5353 offset++;
5354 size--;
5355
5356 uint32_t flags;
5357 if (!mDataSource->getUInt24(offset, &flags)) {
5358 return ERROR_IO;
5359 }
5360 offset += 3;
5361 size -= 3;
5362
5363 uint32_t entrycount;
5364 if (!mDataSource->getUInt32(offset, &entrycount)) {
5365 return ERROR_IO;
5366 }
5367 offset += 4;
5368 size -= 4;
5369 if (entrycount == 0) {
5370 return OK;
5371 }
5372 if (entrycount > UINT32_MAX / 8) {
5373 return ERROR_MALFORMED;
5374 }
5375
5376 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5377 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5378 if (newPtr == NULL) {
5379 ALOGE("failed to realloc %u -> %u",
5380 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5381 return NO_MEMORY;
5382 }
5383 mCurrentSampleInfoOffsets = newPtr;
5384 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5385 }
5386 mCurrentSampleInfoOffsetCount = entrycount;
5387
5388 if (mCurrentSampleInfoOffsets == NULL) {
5389 return OK;
5390 }
5391
5392 for (size_t i = 0; i < entrycount; i++) {
5393 if (version == 0) {
5394 if (size < 4) {
5395 ALOGW("b/124526959");
5396 android_errorWriteLog(0x534e4554, "124526959");
5397 return -EINVAL;
5398 }
5399 uint32_t tmp;
5400 if (!mDataSource->getUInt32(offset, &tmp)) {
5401 return ERROR_IO;
5402 }
5403 mCurrentSampleInfoOffsets[i] = tmp;
5404 offset += 4;
5405 size -= 4;
5406 } else {
5407 if (size < 8) {
5408 ALOGW("b/124526959");
5409 android_errorWriteLog(0x534e4554, "124526959");
5410 return -EINVAL;
5411 }
5412 uint64_t tmp;
5413 if (!mDataSource->getUInt64(offset, &tmp)) {
5414 return ERROR_IO;
5415 }
5416 mCurrentSampleInfoOffsets[i] = tmp;
5417 offset += 8;
5418 size -= 8;
5419 }
5420 }
5421
5422 // parse clear/encrypted data
5423
5424 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5425
5426 drmoffset += mCurrentMoofOffset;
5427 mDrmOffsets[drmoffset] = flags;
5428 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5429
5430 return OK;
5431 }
5432
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5433 status_t MPEG4Source::parseClearEncryptedSizes(
5434 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5435
5436 int32_t ivlength;
5437 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5438 return ERROR_MALFORMED;
5439 }
5440
5441 // only 0, 8 and 16 byte initialization vectors are supported
5442 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5443 ALOGW("unsupported IV length: %d", ivlength);
5444 return ERROR_MALFORMED;
5445 }
5446
5447 uint32_t sampleCount = mCurrentSampleInfoCount;
5448 if (isSampleEncryption) {
5449 if (size < 4) {
5450 return ERROR_MALFORMED;
5451 }
5452 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5453 return ERROR_IO;
5454 }
5455 offset += 4;
5456 size -= 4;
5457 }
5458
5459 // read CencSampleAuxiliaryDataFormats
5460 for (size_t i = 0; i < sampleCount; i++) {
5461 if (i >= mCurrentSamples.size()) {
5462 ALOGW("too few samples");
5463 break;
5464 }
5465 Sample *smpl = &mCurrentSamples.editItemAt(i);
5466 if (!smpl->clearsizes.isEmpty()) {
5467 continue;
5468 }
5469
5470 memset(smpl->iv, 0, 16);
5471 if (size < ivlength) {
5472 return ERROR_MALFORMED;
5473 }
5474 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5475 return ERROR_IO;
5476 }
5477
5478 offset += ivlength;
5479 size -= ivlength;
5480
5481 bool readSubsamples;
5482 if (isSampleEncryption) {
5483 readSubsamples = flags & 2;
5484 } else {
5485 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5486 if (smplinfosize == 0) {
5487 smplinfosize = mCurrentSampleInfoSizes[i];
5488 }
5489 readSubsamples = smplinfosize > ivlength;
5490 }
5491
5492 if (readSubsamples) {
5493 uint16_t numsubsamples;
5494 if (size < 2) {
5495 return ERROR_MALFORMED;
5496 }
5497 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5498 return ERROR_IO;
5499 }
5500 offset += 2;
5501 size -= 2;
5502 for (size_t j = 0; j < numsubsamples; j++) {
5503 uint16_t numclear;
5504 uint32_t numencrypted;
5505 if (size < 6) {
5506 return ERROR_MALFORMED;
5507 }
5508 if (!mDataSource->getUInt16(offset, &numclear)) {
5509 return ERROR_IO;
5510 }
5511 offset += 2;
5512 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5513 return ERROR_IO;
5514 }
5515 offset += 4;
5516 size -= 6;
5517 smpl->clearsizes.add(numclear);
5518 smpl->encryptedsizes.add(numencrypted);
5519 }
5520 } else {
5521 smpl->clearsizes.add(0);
5522 smpl->encryptedsizes.add(smpl->size);
5523 }
5524 }
5525
5526 return OK;
5527 }
5528
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5529 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5530 uint32_t flags;
5531 if (chunk_data_size < 4) {
5532 return ERROR_MALFORMED;
5533 }
5534 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5535 return ERROR_MALFORMED;
5536 }
5537 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5538 }
5539
parseTrackFragmentHeader(off64_t offset,off64_t size)5540 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5541
5542 if (size < 8) {
5543 return -EINVAL;
5544 }
5545
5546 uint32_t flags;
5547 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5548 return ERROR_MALFORMED;
5549 }
5550
5551 if (flags & 0xff000000) {
5552 return -EINVAL;
5553 }
5554
5555 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5556 return ERROR_MALFORMED;
5557 }
5558
5559 if (mLastParsedTrackId != mTrackId) {
5560 // this is not the right track, skip it
5561 return OK;
5562 }
5563
5564 mTrackFragmentHeaderInfo.mFlags = flags;
5565 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5566 offset += 8;
5567 size -= 8;
5568
5569 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5570
5571 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5572 if (size < 8) {
5573 return -EINVAL;
5574 }
5575
5576 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5577 return ERROR_MALFORMED;
5578 }
5579 offset += 8;
5580 size -= 8;
5581 }
5582
5583 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5584 if (size < 4) {
5585 return -EINVAL;
5586 }
5587
5588 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5589 return ERROR_MALFORMED;
5590 }
5591 offset += 4;
5592 size -= 4;
5593 }
5594
5595 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5596 if (size < 4) {
5597 return -EINVAL;
5598 }
5599
5600 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5601 return ERROR_MALFORMED;
5602 }
5603 offset += 4;
5604 size -= 4;
5605 }
5606
5607 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5608 if (size < 4) {
5609 return -EINVAL;
5610 }
5611
5612 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5613 return ERROR_MALFORMED;
5614 }
5615 offset += 4;
5616 size -= 4;
5617 }
5618
5619 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5620 if (size < 4) {
5621 return -EINVAL;
5622 }
5623
5624 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5625 return ERROR_MALFORMED;
5626 }
5627 offset += 4;
5628 size -= 4;
5629 }
5630
5631 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5632 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5633 }
5634
5635 mTrackFragmentHeaderInfo.mDataOffset = 0;
5636 return OK;
5637 }
5638
parseTrackFragmentRun(off64_t offset,off64_t size)5639 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5640
5641 ALOGV("MPEG4Source::parseTrackFragmentRun");
5642 if (size < 8) {
5643 return -EINVAL;
5644 }
5645
5646 enum {
5647 kDataOffsetPresent = 0x01,
5648 kFirstSampleFlagsPresent = 0x04,
5649 kSampleDurationPresent = 0x100,
5650 kSampleSizePresent = 0x200,
5651 kSampleFlagsPresent = 0x400,
5652 kSampleCompositionTimeOffsetPresent = 0x800,
5653 };
5654
5655 uint32_t flags;
5656 if (!mDataSource->getUInt32(offset, &flags)) {
5657 return ERROR_MALFORMED;
5658 }
5659 // |version| only affects SampleCompositionTimeOffset field.
5660 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5661 // Otherwise, SampleCompositionTimeOffset is int32_t.
5662 // Sample.compositionOffset is defined as int32_t.
5663 uint8_t version = flags >> 24;
5664 flags &= 0xffffff;
5665 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5666
5667 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5668 // These two shall not be used together.
5669 return -EINVAL;
5670 }
5671
5672 uint32_t sampleCount;
5673 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5674 return ERROR_MALFORMED;
5675 }
5676 offset += 8;
5677 size -= 8;
5678
5679 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5680
5681 uint32_t firstSampleFlags = 0;
5682
5683 if (flags & kDataOffsetPresent) {
5684 if (size < 4) {
5685 return -EINVAL;
5686 }
5687
5688 int32_t dataOffsetDelta;
5689 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5690 return ERROR_MALFORMED;
5691 }
5692
5693 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5694
5695 offset += 4;
5696 size -= 4;
5697 }
5698
5699 if (flags & kFirstSampleFlagsPresent) {
5700 if (size < 4) {
5701 return -EINVAL;
5702 }
5703
5704 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5705 return ERROR_MALFORMED;
5706 }
5707 offset += 4;
5708 size -= 4;
5709 }
5710
5711 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5712 sampleCtsOffset = 0;
5713
5714 size_t bytesPerSample = 0;
5715 if (flags & kSampleDurationPresent) {
5716 bytesPerSample += 4;
5717 } else if (mTrackFragmentHeaderInfo.mFlags
5718 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5719 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5720 } else if (mTrex) {
5721 sampleDuration = mTrex->default_sample_duration;
5722 }
5723
5724 if (flags & kSampleSizePresent) {
5725 bytesPerSample += 4;
5726 } else {
5727 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5728 #ifdef VERY_VERY_VERBOSE_LOGGING
5729 // We don't expect this, but also want to avoid spamming the log if
5730 // we hit this case.
5731 if (!(mTrackFragmentHeaderInfo.mFlags
5732 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5733 ALOGW("No sample size specified");
5734 }
5735 #endif
5736 }
5737
5738 if (flags & kSampleFlagsPresent) {
5739 bytesPerSample += 4;
5740 } else {
5741 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5742 #ifdef VERY_VERY_VERBOSE_LOGGING
5743 // We don't expect this, but also want to avoid spamming the log if
5744 // we hit this case.
5745 if (!(mTrackFragmentHeaderInfo.mFlags
5746 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5747 ALOGW("No sample flags specified");
5748 }
5749 #endif
5750 }
5751
5752 if (flags & kSampleCompositionTimeOffsetPresent) {
5753 bytesPerSample += 4;
5754 } else {
5755 sampleCtsOffset = 0;
5756 }
5757
5758 if (bytesPerSample != 0) {
5759 if (size < (off64_t)sampleCount * bytesPerSample) {
5760 return -EINVAL;
5761 }
5762 } else {
5763 if (sampleDuration == 0) {
5764 ALOGW("b/123389881 sampleDuration == 0");
5765 android_errorWriteLog(0x534e4554, "124389881 zero");
5766 return -EINVAL;
5767 }
5768
5769 // apply some sanity (vs strict legality) checks
5770 //
5771 static constexpr uint32_t kMaxTrunSampleCount = 10000;
5772 if (sampleCount > kMaxTrunSampleCount) {
5773 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5774 sampleCount, kMaxTrunSampleCount);
5775 android_errorWriteLog(0x534e4554, "124389881 count");
5776 return -EINVAL;
5777 }
5778 }
5779
5780 Sample tmp;
5781 for (uint32_t i = 0; i < sampleCount; ++i) {
5782 if (flags & kSampleDurationPresent) {
5783 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5784 return ERROR_MALFORMED;
5785 }
5786 offset += 4;
5787 }
5788
5789 if (flags & kSampleSizePresent) {
5790 if (!mDataSource->getUInt32(offset, &sampleSize)) {
5791 return ERROR_MALFORMED;
5792 }
5793 offset += 4;
5794 }
5795
5796 if (flags & kSampleFlagsPresent) {
5797 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
5798 return ERROR_MALFORMED;
5799 }
5800 offset += 4;
5801 }
5802
5803 if (flags & kSampleCompositionTimeOffsetPresent) {
5804 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
5805 return ERROR_MALFORMED;
5806 }
5807 offset += 4;
5808 }
5809
5810 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
5811 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
5812 dataOffset, sampleSize, sampleDuration,
5813 (flags & kFirstSampleFlagsPresent) && i == 0
5814 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
5815 tmp.offset = dataOffset;
5816 tmp.size = sampleSize;
5817 tmp.duration = sampleDuration;
5818 tmp.compositionOffset = sampleCtsOffset;
5819 memset(tmp.iv, 0, sizeof(tmp.iv));
5820 if (mCurrentSamples.add(tmp) < 0) {
5821 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
5822 android_errorWriteLog(0x534e4554, "124389881 allocation");
5823 mCurrentSamples.clear();
5824 return NO_MEMORY;
5825 }
5826
5827 dataOffset += sampleSize;
5828 }
5829
5830 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
5831
5832 return OK;
5833 }
5834
getFormat(AMediaFormat * meta)5835 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
5836 Mutex::Autolock autoLock(mLock);
5837 AMediaFormat_copy(meta, mFormat);
5838 return AMEDIA_OK;
5839 }
5840
parseNALSize(const uint8_t * data) const5841 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
5842 switch (mNALLengthSize) {
5843 case 1:
5844 return *data;
5845 case 2:
5846 return U16_AT(data);
5847 case 3:
5848 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
5849 case 4:
5850 return U32_AT(data);
5851 }
5852
5853 // This cannot happen, mNALLengthSize springs to life by adding 1 to
5854 // a 2-bit integer.
5855 CHECK(!"Should not be here.");
5856
5857 return 0;
5858 }
5859
parseHEVCLayerId(const uint8_t * data,size_t size)5860 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
5861 if (data == nullptr || size < mNALLengthSize + 2) {
5862 return -1;
5863 }
5864
5865 // HEVC NAL-header (16-bit)
5866 // 1 6 6 3
5867 // |-|uuuuuu|------|iii|
5868 // ^ ^
5869 // NAL_type layer_id + 1
5870 //
5871 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
5872 enum {
5873 TSA_N = 2,
5874 TSA_R = 3,
5875 STSA_N = 4,
5876 STSA_R = 5,
5877 };
5878
5879 data += mNALLengthSize;
5880 uint16_t nalHeader = data[0] << 8 | data[1];
5881
5882 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
5883 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
5884 int32_t layerIdPlusOne = nalHeader & 0x7u;
5885 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
5886 return layerIdPlusOne - 1;
5887 }
5888 return 0;
5889 }
5890
read(MediaBufferHelper ** out,const ReadOptions * options)5891 media_status_t MPEG4Source::read(
5892 MediaBufferHelper **out, const ReadOptions *options) {
5893 Mutex::Autolock autoLock(mLock);
5894
5895 CHECK(mStarted);
5896
5897 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
5898 *out = nullptr;
5899 return AMEDIA_ERROR_WOULD_BLOCK;
5900 }
5901
5902 if (mFirstMoofOffset > 0) {
5903 return fragmentedRead(out, options);
5904 }
5905
5906 *out = NULL;
5907
5908 int64_t targetSampleTimeUs = -1;
5909
5910 int64_t seekTimeUs;
5911 ReadOptions::SeekMode mode;
5912
5913 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5914 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
5915 if (mIsHeif) {
5916 CHECK(mSampleTable == NULL);
5917 CHECK(mItemTable != NULL);
5918 int32_t imageIndex;
5919 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
5920 return AMEDIA_ERROR_MALFORMED;
5921 }
5922
5923 status_t err;
5924 if (seekTimeUs >= 0) {
5925 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
5926 } else {
5927 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
5928 }
5929 if (err != OK) {
5930 return AMEDIA_ERROR_UNKNOWN;
5931 }
5932 } else {
5933 uint32_t findFlags = 0;
5934 switch (mode) {
5935 case ReadOptions::SEEK_PREVIOUS_SYNC:
5936 findFlags = SampleTable::kFlagBefore;
5937 break;
5938 case ReadOptions::SEEK_NEXT_SYNC:
5939 findFlags = SampleTable::kFlagAfter;
5940 break;
5941 case ReadOptions::SEEK_CLOSEST_SYNC:
5942 case ReadOptions::SEEK_CLOSEST:
5943 findFlags = SampleTable::kFlagClosest;
5944 break;
5945 case ReadOptions::SEEK_FRAME_INDEX:
5946 findFlags = SampleTable::kFlagFrameIndex;
5947 break;
5948 default:
5949 CHECK(!"Should not be here.");
5950 break;
5951 }
5952 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
5953 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
5954 if (mElstInitialEmptyEditTicks > 0) {
5955 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
5956 mTimescale;
5957 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
5958 * Hence, lower bound on seekTimeUs is 0.
5959 */
5960 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
5961 }
5962 if (mElstShiftStartTicks > 0) {
5963 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5964 seekTimeUs += elstShiftStartUs;
5965 }
5966 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
5967 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
5968 elstShiftStartUs);
5969 }
5970
5971 uint32_t sampleIndex;
5972 status_t err = mSampleTable->findSampleAtTime(
5973 seekTimeUs, 1000000, mTimescale,
5974 &sampleIndex, findFlags);
5975
5976 if (mode == ReadOptions::SEEK_CLOSEST
5977 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5978 // We found the closest sample already, now we want the sync
5979 // sample preceding it (or the sample itself of course), even
5980 // if the subsequent sync sample is closer.
5981 findFlags = SampleTable::kFlagBefore;
5982 }
5983
5984 uint32_t syncSampleIndex = sampleIndex;
5985 // assume every non-USAC audio sample is a sync sample. This works around
5986 // seek issues with files that were incorrectly written with an
5987 // empty or single-sample stss block for the audio track
5988 if (err == OK && (!mIsAudio || mIsUsac)) {
5989 err = mSampleTable->findSyncSampleNear(
5990 sampleIndex, &syncSampleIndex, findFlags);
5991 }
5992
5993 uint64_t sampleTime;
5994 if (err == OK) {
5995 err = mSampleTable->getMetaDataForSample(
5996 sampleIndex, NULL, NULL, &sampleTime);
5997 }
5998
5999 if (err != OK) {
6000 if (err == ERROR_OUT_OF_RANGE) {
6001 // An attempt to seek past the end of the stream would
6002 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6003 // this all the way to the MediaPlayer would cause abnormal
6004 // termination. Legacy behaviour appears to be to behave as if
6005 // we had seeked to the end of stream, ending normally.
6006 return AMEDIA_ERROR_END_OF_STREAM;
6007 }
6008 ALOGV("end of stream");
6009 return AMEDIA_ERROR_UNKNOWN;
6010 }
6011
6012 if (mode == ReadOptions::SEEK_CLOSEST
6013 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6014 if (mElstInitialEmptyEditTicks > 0) {
6015 sampleTime += mElstInitialEmptyEditTicks;
6016 }
6017 if (mElstShiftStartTicks > 0){
6018 if (sampleTime > mElstShiftStartTicks) {
6019 sampleTime -= mElstShiftStartTicks;
6020 } else {
6021 sampleTime = 0;
6022 }
6023 }
6024 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6025 }
6026
6027 #if 0
6028 uint32_t syncSampleTime;
6029 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6030 syncSampleIndex, NULL, NULL, &syncSampleTime));
6031
6032 ALOGI("seek to time %lld us => sample at time %lld us, "
6033 "sync sample at time %lld us",
6034 seekTimeUs,
6035 sampleTime * 1000000ll / mTimescale,
6036 syncSampleTime * 1000000ll / mTimescale);
6037 #endif
6038
6039 mCurrentSampleIndex = syncSampleIndex;
6040 }
6041
6042 if (mBuffer != NULL) {
6043 mBuffer->release();
6044 mBuffer = NULL;
6045 }
6046
6047 // fall through
6048 }
6049
6050 off64_t offset = 0;
6051 size_t size = 0;
6052 int64_t cts;
6053 uint64_t stts;
6054 bool isSyncSample;
6055 bool newBuffer = false;
6056 if (mBuffer == NULL) {
6057 newBuffer = true;
6058
6059 status_t err;
6060 if (!mIsHeif) {
6061 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6062 (uint64_t*)&cts, &isSyncSample, &stts);
6063 if(err == OK) {
6064 if (mElstInitialEmptyEditTicks > 0) {
6065 cts += mElstInitialEmptyEditTicks;
6066 }
6067 if (mElstShiftStartTicks > 0) {
6068 // cts can be negative. for example, initial audio samples for gapless playback.
6069 cts -= (int64_t)mElstShiftStartTicks;
6070 }
6071 }
6072 } else {
6073 err = mItemTable->getImageOffsetAndSize(
6074 options && options->getSeekTo(&seekTimeUs, &mode) ?
6075 &mCurrentSampleIndex : NULL, &offset, &size);
6076
6077 cts = stts = 0;
6078 isSyncSample = 0;
6079 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6080 }
6081
6082 if (err != OK) {
6083 if (err == ERROR_END_OF_STREAM) {
6084 return AMEDIA_ERROR_END_OF_STREAM;
6085 }
6086 return AMEDIA_ERROR_UNKNOWN;
6087 }
6088
6089 err = mBufferGroup->acquire_buffer(&mBuffer);
6090
6091 if (err != OK) {
6092 CHECK(mBuffer == NULL);
6093 return AMEDIA_ERROR_UNKNOWN;
6094 }
6095 if (size > mBuffer->size()) {
6096 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6097 mBuffer->release();
6098 mBuffer = NULL;
6099 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6100 }
6101 }
6102
6103 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6104 if (newBuffer) {
6105 if (mIsPcm) {
6106 // The twos' PCM block reader assumes that all samples has the same size.
6107
6108 uint32_t samplesToRead = mSampleTable->getLastSampleIndexInChunk()
6109 - mCurrentSampleIndex + 1;
6110 if (samplesToRead > kMaxPcmFrameSize) {
6111 samplesToRead = kMaxPcmFrameSize;
6112 }
6113
6114 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6115 samplesToRead, size, mCurrentSampleIndex,
6116 mSampleTable->getLastSampleIndexInChunk());
6117
6118 size_t totalSize = samplesToRead * size;
6119 uint8_t* buf = (uint8_t *)mBuffer->data();
6120 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6121 if (bytesRead < (ssize_t)totalSize) {
6122 mBuffer->release();
6123 mBuffer = NULL;
6124
6125 return AMEDIA_ERROR_IO;
6126 }
6127
6128 AMediaFormat *meta = mBuffer->meta_data();
6129 AMediaFormat_clear(meta);
6130 AMediaFormat_setInt64(
6131 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6132 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6133
6134 int32_t byteOrder = 0;
6135 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6136 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6137
6138 if (isGetBigEndian && byteOrder == 1) {
6139 // Big-endian -> little-endian
6140 uint16_t *dstData = (uint16_t *)buf;
6141 uint16_t *srcData = (uint16_t *)buf;
6142
6143 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6144 dstData[j] = ntohs(srcData[j]);
6145 }
6146 }
6147
6148 mCurrentSampleIndex += samplesToRead;
6149 mBuffer->set_range(0, totalSize);
6150 } else {
6151 ssize_t num_bytes_read =
6152 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6153
6154 if (num_bytes_read < (ssize_t)size) {
6155 mBuffer->release();
6156 mBuffer = NULL;
6157
6158 return AMEDIA_ERROR_IO;
6159 }
6160
6161 CHECK(mBuffer != NULL);
6162 mBuffer->set_range(0, size);
6163 AMediaFormat *meta = mBuffer->meta_data();
6164 AMediaFormat_clear(meta);
6165 AMediaFormat_setInt64(
6166 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6167 AMediaFormat_setInt64(
6168 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6169
6170 if (targetSampleTimeUs >= 0) {
6171 AMediaFormat_setInt64(
6172 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6173 }
6174
6175 if (isSyncSample) {
6176 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6177 }
6178
6179 ++mCurrentSampleIndex;
6180 }
6181 }
6182
6183 *out = mBuffer;
6184 mBuffer = NULL;
6185
6186 return AMEDIA_OK;
6187
6188 } else if (mIsAC4) {
6189 CHECK(mBuffer != NULL);
6190 // Make sure there is enough space to write the sync header and the raw frame
6191 if (mBuffer->range_length() < (7 + size)) {
6192 mBuffer->release();
6193 mBuffer = NULL;
6194
6195 return AMEDIA_ERROR_IO;
6196 }
6197
6198 uint8_t *dstData = (uint8_t *)mBuffer->data();
6199 size_t dstOffset = 0;
6200 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6201 // AC40 sync word, meaning no CRC at the end of the frame
6202 dstData[dstOffset++] = 0xAC;
6203 dstData[dstOffset++] = 0x40;
6204 dstData[dstOffset++] = 0xFF;
6205 dstData[dstOffset++] = 0xFF;
6206 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6207 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6208 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6209
6210 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6211 if (numBytesRead != (ssize_t)size) {
6212 mBuffer->release();
6213 mBuffer = NULL;
6214
6215 return AMEDIA_ERROR_IO;
6216 }
6217
6218 mBuffer->set_range(0, dstOffset + size);
6219 AMediaFormat *meta = mBuffer->meta_data();
6220 AMediaFormat_clear(meta);
6221 AMediaFormat_setInt64(
6222 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6223 AMediaFormat_setInt64(
6224 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6225
6226 if (targetSampleTimeUs >= 0) {
6227 AMediaFormat_setInt64(
6228 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6229 }
6230
6231 if (isSyncSample) {
6232 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6233 }
6234
6235 ++mCurrentSampleIndex;
6236
6237 *out = mBuffer;
6238 mBuffer = NULL;
6239
6240 return AMEDIA_OK;
6241 } else {
6242 // Whole NAL units are returned but each fragment is prefixed by
6243 // the start code (0x00 00 00 01).
6244 ssize_t num_bytes_read = 0;
6245 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6246
6247 if (num_bytes_read < (ssize_t)size) {
6248 mBuffer->release();
6249 mBuffer = NULL;
6250
6251 return AMEDIA_ERROR_IO;
6252 }
6253
6254 uint8_t *dstData = (uint8_t *)mBuffer->data();
6255 size_t srcOffset = 0;
6256 size_t dstOffset = 0;
6257
6258 while (srcOffset < size) {
6259 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6260 size_t nalLength = 0;
6261 if (!isMalFormed) {
6262 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6263 srcOffset += mNALLengthSize;
6264 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6265 }
6266
6267 if (isMalFormed) {
6268 //if nallength abnormal,ignore it.
6269 ALOGW("abnormal nallength, ignore this NAL");
6270 srcOffset = size;
6271 break;
6272 }
6273
6274 if (nalLength == 0) {
6275 continue;
6276 }
6277
6278 if (dstOffset > SIZE_MAX - 4 ||
6279 dstOffset + 4 > SIZE_MAX - nalLength ||
6280 dstOffset + 4 + nalLength > mBuffer->size()) {
6281 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6282 android_errorWriteLog(0x534e4554, "27208621");
6283 mBuffer->release();
6284 mBuffer = NULL;
6285 return AMEDIA_ERROR_MALFORMED;
6286 }
6287
6288 dstData[dstOffset++] = 0;
6289 dstData[dstOffset++] = 0;
6290 dstData[dstOffset++] = 0;
6291 dstData[dstOffset++] = 1;
6292 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6293 srcOffset += nalLength;
6294 dstOffset += nalLength;
6295 }
6296 CHECK_EQ(srcOffset, size);
6297 CHECK(mBuffer != NULL);
6298 mBuffer->set_range(0, dstOffset);
6299
6300 AMediaFormat *meta = mBuffer->meta_data();
6301 AMediaFormat_clear(meta);
6302 AMediaFormat_setInt64(
6303 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6304 AMediaFormat_setInt64(
6305 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6306
6307 if (targetSampleTimeUs >= 0) {
6308 AMediaFormat_setInt64(
6309 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6310 }
6311
6312 if (mIsAVC) {
6313 uint32_t layerId = FindAVCLayerId(
6314 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6315 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6316 } else if (mIsHEVC) {
6317 int32_t layerId = parseHEVCLayerId(
6318 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6319 if (layerId >= 0) {
6320 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6321 }
6322 }
6323
6324 if (isSyncSample) {
6325 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6326 }
6327
6328 ++mCurrentSampleIndex;
6329
6330 *out = mBuffer;
6331 mBuffer = NULL;
6332
6333 return AMEDIA_OK;
6334 }
6335 }
6336
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6337 media_status_t MPEG4Source::fragmentedRead(
6338 MediaBufferHelper **out, const ReadOptions *options) {
6339
6340 ALOGV("MPEG4Source::fragmentedRead");
6341
6342 CHECK(mStarted);
6343
6344 *out = NULL;
6345
6346 int64_t targetSampleTimeUs = -1;
6347
6348 int64_t seekTimeUs;
6349 ReadOptions::SeekMode mode;
6350 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6351 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6352 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6353 if (mElstInitialEmptyEditTicks > 0) {
6354 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6355 mTimescale;
6356 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6357 * Hence, lower bound on seekTimeUs is 0.
6358 */
6359 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6360 }
6361 if (mElstShiftStartTicks > 0){
6362 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6363 seekTimeUs += elstShiftStartUs;
6364 }
6365 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6366 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6367 elstShiftStartUs);
6368
6369 int numSidxEntries = mSegments.size();
6370 if (numSidxEntries != 0) {
6371 int64_t totalTime = 0;
6372 off64_t totalOffset = mFirstMoofOffset;
6373 for (int i = 0; i < numSidxEntries; i++) {
6374 const SidxEntry *se = &mSegments[i];
6375 if (totalTime + se->mDurationUs > seekTimeUs) {
6376 // The requested time is somewhere in this segment
6377 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6378 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6379 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6380 // requested next sync, or closest sync and it was closer to the end of
6381 // this segment
6382 totalTime += se->mDurationUs;
6383 totalOffset += se->mSize;
6384 }
6385 break;
6386 }
6387 totalTime += se->mDurationUs;
6388 totalOffset += se->mSize;
6389 }
6390 mCurrentMoofOffset = totalOffset;
6391 mNextMoofOffset = -1;
6392 mCurrentSamples.clear();
6393 mCurrentSampleIndex = 0;
6394 status_t err = parseChunk(&totalOffset);
6395 if (err != OK) {
6396 return AMEDIA_ERROR_UNKNOWN;
6397 }
6398 mCurrentTime = totalTime * mTimescale / 1000000ll;
6399 } else {
6400 // without sidx boxes, we can only seek to 0
6401 mCurrentMoofOffset = mFirstMoofOffset;
6402 mNextMoofOffset = -1;
6403 mCurrentSamples.clear();
6404 mCurrentSampleIndex = 0;
6405 off64_t tmp = mCurrentMoofOffset;
6406 status_t err = parseChunk(&tmp);
6407 if (err != OK) {
6408 return AMEDIA_ERROR_UNKNOWN;
6409 }
6410 mCurrentTime = 0;
6411 }
6412
6413 if (mBuffer != NULL) {
6414 mBuffer->release();
6415 mBuffer = NULL;
6416 }
6417
6418 // fall through
6419 }
6420
6421 off64_t offset = 0;
6422 size_t size = 0;
6423 int64_t cts = 0;
6424 bool isSyncSample = false;
6425 bool newBuffer = false;
6426 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6427 newBuffer = true;
6428
6429 if (mBuffer != NULL) {
6430 mBuffer->release();
6431 mBuffer = NULL;
6432 }
6433 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6434 // move to next fragment if there is one
6435 if (mNextMoofOffset <= mCurrentMoofOffset) {
6436 return AMEDIA_ERROR_END_OF_STREAM;
6437 }
6438 off64_t nextMoof = mNextMoofOffset;
6439 mCurrentMoofOffset = nextMoof;
6440 mCurrentSamples.clear();
6441 mCurrentSampleIndex = 0;
6442 status_t err = parseChunk(&nextMoof);
6443 if (err != OK) {
6444 return AMEDIA_ERROR_UNKNOWN;
6445 }
6446 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6447 return AMEDIA_ERROR_END_OF_STREAM;
6448 }
6449 }
6450
6451 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6452 offset = smpl->offset;
6453 size = smpl->size;
6454 cts = mCurrentTime + smpl->compositionOffset;
6455
6456 if (mElstInitialEmptyEditTicks > 0) {
6457 cts += mElstInitialEmptyEditTicks;
6458 }
6459 if (mElstShiftStartTicks > 0) {
6460 // cts can be negative. for example, initial audio samples for gapless playback.
6461 cts -= (int64_t)mElstShiftStartTicks;
6462 }
6463
6464 mCurrentTime += smpl->duration;
6465 isSyncSample = (mCurrentSampleIndex == 0);
6466
6467 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6468
6469 if (err != OK) {
6470 CHECK(mBuffer == NULL);
6471 ALOGV("acquire_buffer returned %d", err);
6472 return AMEDIA_ERROR_UNKNOWN;
6473 }
6474 if (size > mBuffer->size()) {
6475 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6476 mBuffer->release();
6477 mBuffer = NULL;
6478 return AMEDIA_ERROR_UNKNOWN;
6479 }
6480 }
6481
6482 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6483 AMediaFormat *bufmeta = mBuffer->meta_data();
6484 AMediaFormat_clear(bufmeta);
6485 if (smpl->encryptedsizes.size()) {
6486 // store clear/encrypted lengths in metadata
6487 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6488 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
6489 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6490 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
6491 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6492 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6493 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6494 AMediaFormat_setInt32(bufmeta,
6495 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6496 AMediaFormat_setInt32(bufmeta,
6497 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6498
6499 void *iv = NULL;
6500 size_t ivlength = 0;
6501 if (!AMediaFormat_getBuffer(mFormat,
6502 "crypto-iv", &iv, &ivlength)) {
6503 iv = (void *) smpl->iv;
6504 ivlength = 16; // use 16 or the actual size?
6505 }
6506 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6507 }
6508
6509 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6510 if (newBuffer) {
6511 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6512 mBuffer->release();
6513 mBuffer = NULL;
6514
6515 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6516 return AMEDIA_ERROR_MALFORMED;
6517 }
6518
6519 ssize_t num_bytes_read =
6520 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6521
6522 if (num_bytes_read < (ssize_t)size) {
6523 mBuffer->release();
6524 mBuffer = NULL;
6525
6526 ALOGE("i/o error");
6527 return AMEDIA_ERROR_IO;
6528 }
6529
6530 CHECK(mBuffer != NULL);
6531 mBuffer->set_range(0, size);
6532 AMediaFormat_setInt64(bufmeta,
6533 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6534 AMediaFormat_setInt64(bufmeta,
6535 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6536
6537 if (targetSampleTimeUs >= 0) {
6538 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6539 }
6540
6541 if (mIsAVC) {
6542 uint32_t layerId = FindAVCLayerId(
6543 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6544 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6545 } else if (mIsHEVC) {
6546 int32_t layerId = parseHEVCLayerId(
6547 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6548 if (layerId >= 0) {
6549 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6550 }
6551 }
6552
6553 if (isSyncSample) {
6554 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6555 }
6556
6557 ++mCurrentSampleIndex;
6558 }
6559
6560 *out = mBuffer;
6561 mBuffer = NULL;
6562
6563 return AMEDIA_OK;
6564
6565 } else {
6566 ALOGV("whole NAL");
6567 // Whole NAL units are returned but each fragment is prefixed by
6568 // the start code (0x00 00 00 01).
6569 ssize_t num_bytes_read = 0;
6570 void *data = NULL;
6571 bool isMalFormed = false;
6572 int32_t max_size;
6573 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6574 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6575 isMalFormed = true;
6576 } else {
6577 data = mSrcBuffer;
6578 }
6579
6580 if (isMalFormed || data == NULL) {
6581 ALOGE("isMalFormed size %zu", size);
6582 if (mBuffer != NULL) {
6583 mBuffer->release();
6584 mBuffer = NULL;
6585 }
6586 return AMEDIA_ERROR_MALFORMED;
6587 }
6588 num_bytes_read = mDataSource->readAt(offset, data, size);
6589
6590 if (num_bytes_read < (ssize_t)size) {
6591 mBuffer->release();
6592 mBuffer = NULL;
6593
6594 ALOGE("i/o error");
6595 return AMEDIA_ERROR_IO;
6596 }
6597
6598 uint8_t *dstData = (uint8_t *)mBuffer->data();
6599 size_t srcOffset = 0;
6600 size_t dstOffset = 0;
6601
6602 while (srcOffset < size) {
6603 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6604 size_t nalLength = 0;
6605 if (!isMalFormed) {
6606 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6607 srcOffset += mNALLengthSize;
6608 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6609 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6610 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6611 }
6612
6613 if (isMalFormed) {
6614 ALOGE("Video is malformed; nalLength %zu", nalLength);
6615 mBuffer->release();
6616 mBuffer = NULL;
6617 return AMEDIA_ERROR_MALFORMED;
6618 }
6619
6620 if (nalLength == 0) {
6621 continue;
6622 }
6623
6624 if (dstOffset > SIZE_MAX - 4 ||
6625 dstOffset + 4 > SIZE_MAX - nalLength ||
6626 dstOffset + 4 + nalLength > mBuffer->size()) {
6627 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6628 android_errorWriteLog(0x534e4554, "26365349");
6629 mBuffer->release();
6630 mBuffer = NULL;
6631 return AMEDIA_ERROR_MALFORMED;
6632 }
6633
6634 dstData[dstOffset++] = 0;
6635 dstData[dstOffset++] = 0;
6636 dstData[dstOffset++] = 0;
6637 dstData[dstOffset++] = 1;
6638 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6639 srcOffset += nalLength;
6640 dstOffset += nalLength;
6641 }
6642 CHECK_EQ(srcOffset, size);
6643 CHECK(mBuffer != NULL);
6644 mBuffer->set_range(0, dstOffset);
6645
6646 AMediaFormat *bufmeta = mBuffer->meta_data();
6647 AMediaFormat_setInt64(bufmeta,
6648 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6649 AMediaFormat_setInt64(bufmeta,
6650 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6651
6652 if (targetSampleTimeUs >= 0) {
6653 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6654 }
6655
6656 if (isSyncSample) {
6657 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6658 }
6659
6660 ++mCurrentSampleIndex;
6661
6662 *out = mBuffer;
6663 mBuffer = NULL;
6664
6665 return AMEDIA_OK;
6666 }
6667
6668 return AMEDIA_OK;
6669 }
6670
findTrackByMimePrefix(const char * mimePrefix)6671 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6672 const char *mimePrefix) {
6673 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6674 const char *mime;
6675 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6676 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6677 return track;
6678 }
6679 }
6680
6681 return NULL;
6682 }
6683
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6684 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6685 uint8_t header[8];
6686
6687 ssize_t n = source->readAt(4, header, sizeof(header));
6688 if (n < (ssize_t)sizeof(header)) {
6689 return false;
6690 }
6691
6692 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6693 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6694 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6695 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6696 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6697 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6698 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6699 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
6700 *confidence = 0.4;
6701
6702 return true;
6703 }
6704
6705 return false;
6706 }
6707
isCompatibleBrand(uint32_t fourcc)6708 static bool isCompatibleBrand(uint32_t fourcc) {
6709 static const uint32_t kCompatibleBrands[] = {
6710 FOURCC("isom"),
6711 FOURCC("iso2"),
6712 FOURCC("avc1"),
6713 FOURCC("hvc1"),
6714 FOURCC("hev1"),
6715 FOURCC("av01"),
6716 FOURCC("3gp4"),
6717 FOURCC("mp41"),
6718 FOURCC("mp42"),
6719 FOURCC("dash"),
6720 FOURCC("nvr1"),
6721
6722 // Won't promise that the following file types can be played.
6723 // Just give these file types a chance.
6724 FOURCC("qt "), // Apple's QuickTime
6725 FOURCC("MSNV"), // Sony's PSP
6726 FOURCC("wmf "),
6727
6728 FOURCC("3g2a"), // 3GPP2
6729 FOURCC("3g2b"),
6730 FOURCC("mif1"), // HEIF image
6731 FOURCC("heic"), // HEIF image
6732 FOURCC("msf1"), // HEIF image sequence
6733 FOURCC("hevc"), // HEIF image sequence
6734 };
6735
6736 for (size_t i = 0;
6737 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6738 ++i) {
6739 if (kCompatibleBrands[i] == fourcc) {
6740 return true;
6741 }
6742 }
6743
6744 return false;
6745 }
6746
6747 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6748 // compatible brand is present.
6749 // Also try to identify where this file's metadata ends
6750 // (end of the 'moov' atom) and report it to the caller as part of
6751 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6752 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6753 // We scan up to 128 bytes to identify this file as an MP4.
6754 static const off64_t kMaxScanOffset = 128ll;
6755
6756 off64_t offset = 0ll;
6757 bool foundGoodFileType = false;
6758 off64_t moovAtomEndOffset = -1ll;
6759 bool done = false;
6760
6761 while (!done && offset < kMaxScanOffset) {
6762 uint32_t hdr[2];
6763 if (source->readAt(offset, hdr, 8) < 8) {
6764 return false;
6765 }
6766
6767 uint64_t chunkSize = ntohl(hdr[0]);
6768 uint32_t chunkType = ntohl(hdr[1]);
6769 off64_t chunkDataOffset = offset + 8;
6770
6771 if (chunkSize == 1) {
6772 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
6773 return false;
6774 }
6775
6776 chunkSize = ntoh64(chunkSize);
6777 chunkDataOffset += 8;
6778
6779 if (chunkSize < 16) {
6780 // The smallest valid chunk is 16 bytes long in this case.
6781 return false;
6782 }
6783 if (chunkSize > INT64_MAX) {
6784 // reject overly large chunk sizes that could
6785 // be interpreted as negative
6786 ALOGE("chunk size too large");
6787 return false;
6788 }
6789
6790 } else if (chunkSize < 8) {
6791 // The smallest valid chunk is 8 bytes long.
6792 return false;
6793 }
6794
6795 // (data_offset - offset) is either 8 or 16
6796 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
6797 if (chunkDataSize < 0) {
6798 ALOGE("b/23540914");
6799 return false;
6800 }
6801
6802 char chunkstring[5];
6803 MakeFourCCString(chunkType, chunkstring);
6804 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
6805 chunkstring, chunkSize, (long long)offset);
6806 switch (chunkType) {
6807 case FOURCC("ftyp"):
6808 {
6809 if (chunkDataSize < 8) {
6810 return false;
6811 }
6812
6813 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
6814 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
6815 if (i == 1) {
6816 // Skip this index, it refers to the minorVersion,
6817 // not a brand.
6818 continue;
6819 }
6820
6821 uint32_t brand;
6822 if (source->readAt(
6823 chunkDataOffset + 4 * i, &brand, 4) < 4) {
6824 return false;
6825 }
6826
6827 brand = ntohl(brand);
6828
6829 if (isCompatibleBrand(brand)) {
6830 foundGoodFileType = true;
6831 break;
6832 }
6833 }
6834
6835 if (!foundGoodFileType) {
6836 return false;
6837 }
6838
6839 break;
6840 }
6841
6842 case FOURCC("moov"):
6843 {
6844 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
6845 ALOGE("chunk size + offset would overflow");
6846 return false;
6847 }
6848
6849 done = true;
6850 break;
6851 }
6852
6853 default:
6854 break;
6855 }
6856
6857 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
6858 ALOGE("chunk size + offset would overflow");
6859 return false;
6860 }
6861 }
6862
6863 if (!foundGoodFileType) {
6864 return false;
6865 }
6866
6867 *confidence = 0.4f;
6868
6869 return true;
6870 }
6871
CreateExtractor(CDataSource * source,void *)6872 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
6873 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
6874 }
6875
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)6876 static CreatorFunc Sniff(
6877 CDataSource *source, float *confidence, void **,
6878 FreeMetaFunc *) {
6879 DataSourceHelper helper(source);
6880 if (BetterSniffMPEG4(&helper, confidence)) {
6881 return CreateExtractor;
6882 }
6883
6884 if (LegacySniffMPEG4(&helper, confidence)) {
6885 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
6886 return CreateExtractor;
6887 }
6888
6889 return NULL;
6890 }
6891
6892 static const char *extensions[] = {
6893 "3g2",
6894 "3ga",
6895 "3gp",
6896 "3gpp",
6897 "3gpp2",
6898 "m4a",
6899 "m4r",
6900 "m4v",
6901 "mov",
6902 "mp4",
6903 "qt",
6904 NULL
6905 };
6906
6907 extern "C" {
6908 // This is the only symbol that needs to be exported
6909 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()6910 ExtractorDef GETEXTRACTORDEF() {
6911 return {
6912 EXTRACTORDEF_VERSION,
6913 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
6914 2, // version
6915 "MP4 Extractor",
6916 { .v3 = {Sniff, extensions} },
6917 };
6918 }
6919
6920 } // extern "C"
6921
6922 } // namespace android
6923