1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <log/log.h>
28 #include <utils/Log.h>
29
30 #include "AC4Parser.h"
31 #include "MPEG4Extractor.h"
32 #include "SampleTable.h"
33 #include "ItemTable.h"
34 #include "include/ESDS.h"
35
36 #include <media/DataSourceBase.h>
37 #include <media/ExtractorUtils.h>
38 #include <media/stagefright/foundation/ABitReader.h>
39 #include <media/stagefright/foundation/ABuffer.h>
40 #include <media/stagefright/foundation/ADebug.h>
41 #include <media/stagefright/foundation/AMessage.h>
42 #include <media/stagefright/foundation/AudioPresentationInfo.h>
43 #include <media/stagefright/foundation/AUtils.h>
44 #include <media/stagefright/foundation/ByteUtils.h>
45 #include <media/stagefright/foundation/ColorUtils.h>
46 #include <media/stagefright/foundation/avc_utils.h>
47 #include <media/stagefright/foundation/hexdump.h>
48 #include <media/stagefright/foundation/OpusHeader.h>
49 #include <media/stagefright/MediaBufferGroup.h>
50 #include <media/stagefright/MediaDefs.h>
51 #include <media/stagefright/MetaDataBase.h>
52 #include <utils/String8.h>
53
54 #include <byteswap.h>
55 #include "include/ID3.h"
56
57 #ifndef UINT32_MAX
58 #define UINT32_MAX (4294967295U)
59 #endif
60
61 #define ALAC_SPECIFIC_INFO_SIZE (36)
62
63 namespace android {
64
65 enum {
66 // max track header chunk to return
67 kMaxTrackHeaderSize = 32,
68
69 // maximum size of an atom. Some atoms can be bigger according to the spec,
70 // but we only allow up to this size.
71 kMaxAtomSize = 64 * 1024 * 1024,
72 };
73
74 class MPEG4Source : public MediaTrackHelper {
75 static const size_t kMaxPcmFrameSize = 8192;
76 public:
77 // Caller retains ownership of both "dataSource" and "sampleTable".
78 MPEG4Source(AMediaFormat *format,
79 DataSourceHelper *dataSource,
80 int32_t timeScale,
81 const sp<SampleTable> &sampleTable,
82 Vector<SidxEntry> &sidx,
83 const Trex *trex,
84 off64_t firstMoofOffset,
85 const sp<ItemTable> &itemTable,
86 uint64_t elstShiftStartTicks);
87 virtual status_t init();
88
89 virtual media_status_t start();
90 virtual media_status_t stop();
91
92 virtual media_status_t getFormat(AMediaFormat *);
93
94 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()95 bool supportsNonBlockingRead() override { return true; }
96 virtual media_status_t fragmentedRead(
97 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
98
99 virtual ~MPEG4Source();
100
101 private:
102 Mutex mLock;
103
104 AMediaFormat *mFormat;
105 DataSourceHelper *mDataSource;
106 int32_t mTimescale;
107 sp<SampleTable> mSampleTable;
108 uint32_t mCurrentSampleIndex;
109 uint32_t mCurrentFragmentIndex;
110 Vector<SidxEntry> &mSegments;
111 const Trex *mTrex;
112 off64_t mFirstMoofOffset;
113 off64_t mCurrentMoofOffset;
114 off64_t mNextMoofOffset;
115 uint32_t mCurrentTime; // in media timescale ticks
116 int32_t mLastParsedTrackId;
117 int32_t mTrackId;
118
119 int32_t mCryptoMode; // passed in from extractor
120 int32_t mDefaultIVSize; // passed in from extractor
121 uint8_t mCryptoKey[16]; // passed in from extractor
122 int32_t mDefaultEncryptedByteBlock;
123 int32_t mDefaultSkipByteBlock;
124 uint32_t mCurrentAuxInfoType;
125 uint32_t mCurrentAuxInfoTypeParameter;
126 int32_t mCurrentDefaultSampleInfoSize;
127 uint32_t mCurrentSampleInfoCount;
128 uint32_t mCurrentSampleInfoAllocSize;
129 uint8_t* mCurrentSampleInfoSizes;
130 uint32_t mCurrentSampleInfoOffsetCount;
131 uint32_t mCurrentSampleInfoOffsetsAllocSize;
132 uint64_t* mCurrentSampleInfoOffsets;
133
134 bool mIsAVC;
135 bool mIsHEVC;
136 bool mIsAC4;
137 bool mIsPcm;
138 size_t mNALLengthSize;
139
140 bool mStarted;
141
142 MediaBufferHelper *mBuffer;
143
144 uint8_t *mSrcBuffer;
145
146 bool mIsHeif;
147 bool mIsAudio;
148 sp<ItemTable> mItemTable;
149
150 // Start offset from composition time to presentation time.
151 // Support shift only for video tracks through mElstShiftStartTicks for now.
152 uint64_t mElstShiftStartTicks;
153
154 size_t parseNALSize(const uint8_t *data) const;
155 status_t parseChunk(off64_t *offset);
156 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
157 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
158 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
159 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
160 status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
161 status_t parseSampleEncryption(off64_t offset);
162 // returns -1 for invalid layer ID
163 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
164
165 struct TrackFragmentHeaderInfo {
166 enum Flags {
167 kBaseDataOffsetPresent = 0x01,
168 kSampleDescriptionIndexPresent = 0x02,
169 kDefaultSampleDurationPresent = 0x08,
170 kDefaultSampleSizePresent = 0x10,
171 kDefaultSampleFlagsPresent = 0x20,
172 kDurationIsEmpty = 0x10000,
173 };
174
175 uint32_t mTrackID;
176 uint32_t mFlags;
177 uint64_t mBaseDataOffset;
178 uint32_t mSampleDescriptionIndex;
179 uint32_t mDefaultSampleDuration;
180 uint32_t mDefaultSampleSize;
181 uint32_t mDefaultSampleFlags;
182
183 uint64_t mDataOffset;
184 };
185 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
186
187 struct Sample {
188 off64_t offset;
189 size_t size;
190 uint32_t duration;
191 int32_t compositionOffset;
192 uint8_t iv[16];
193 Vector<size_t> clearsizes;
194 Vector<size_t> encryptedsizes;
195 };
196 Vector<Sample> mCurrentSamples;
197
198 MPEG4Source(const MPEG4Source &);
199 MPEG4Source &operator=(const MPEG4Source &);
200 };
201
202 // This custom data source wraps an existing one and satisfies requests
203 // falling entirely within a cached range from the cache while forwarding
204 // all remaining requests to the wrapped datasource.
205 // This is used to cache the full sampletable metadata for a single track,
206 // possibly wrapping multiple times to cover all tracks, i.e.
207 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
208
209 class CachedRangedDataSource : public DataSourceHelper {
210 public:
211 explicit CachedRangedDataSource(DataSourceHelper *source);
212 virtual ~CachedRangedDataSource();
213
214 ssize_t readAt(off64_t offset, void *data, size_t size) override;
215 status_t getSize(off64_t *size) override;
216 uint32_t flags() override;
217
218 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
219
220
221 private:
222 Mutex mLock;
223
224 DataSourceHelper *mSource;
225 bool mOwnsDataSource;
226 off64_t mCachedOffset;
227 size_t mCachedSize;
228 uint8_t *mCache;
229
230 void clearCache();
231
232 CachedRangedDataSource(const CachedRangedDataSource &);
233 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
234 };
235
CachedRangedDataSource(DataSourceHelper * source)236 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
237 : DataSourceHelper(source),
238 mSource(source),
239 mOwnsDataSource(false),
240 mCachedOffset(0),
241 mCachedSize(0),
242 mCache(NULL) {
243 }
244
~CachedRangedDataSource()245 CachedRangedDataSource::~CachedRangedDataSource() {
246 clearCache();
247 if (mOwnsDataSource) {
248 delete mSource;
249 }
250 }
251
clearCache()252 void CachedRangedDataSource::clearCache() {
253 if (mCache) {
254 free(mCache);
255 mCache = NULL;
256 }
257
258 mCachedOffset = 0;
259 mCachedSize = 0;
260 }
261
readAt(off64_t offset,void * data,size_t size)262 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
263 Mutex::Autolock autoLock(mLock);
264
265 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
266 memcpy(data, &mCache[offset - mCachedOffset], size);
267 return size;
268 }
269
270 return mSource->readAt(offset, data, size);
271 }
272
getSize(off64_t * size)273 status_t CachedRangedDataSource::getSize(off64_t *size) {
274 return mSource->getSize(size);
275 }
276
flags()277 uint32_t CachedRangedDataSource::flags() {
278 return mSource->flags();
279 }
280
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)281 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
282 size_t size,
283 bool assumeSourceOwnershipOnSuccess) {
284 Mutex::Autolock autoLock(mLock);
285
286 clearCache();
287
288 mCache = (uint8_t *)malloc(size);
289
290 if (mCache == NULL) {
291 return -ENOMEM;
292 }
293
294 mCachedOffset = offset;
295 mCachedSize = size;
296
297 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
298
299 if (err < (ssize_t)size) {
300 clearCache();
301
302 return ERROR_IO;
303 }
304 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
305 return OK;
306 }
307
308 ////////////////////////////////////////////////////////////////////////////////
309
310 static const bool kUseHexDump = false;
311
FourCC2MIME(uint32_t fourcc)312 static const char *FourCC2MIME(uint32_t fourcc) {
313 switch (fourcc) {
314 case FOURCC("mp4a"):
315 return MEDIA_MIMETYPE_AUDIO_AAC;
316
317 case FOURCC("samr"):
318 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
319
320 case FOURCC("sawb"):
321 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
322
323 case FOURCC("ec-3"):
324 return MEDIA_MIMETYPE_AUDIO_EAC3;
325
326 case FOURCC("mp4v"):
327 return MEDIA_MIMETYPE_VIDEO_MPEG4;
328
329 case FOURCC("s263"):
330 case FOURCC("h263"):
331 case FOURCC("H263"):
332 return MEDIA_MIMETYPE_VIDEO_H263;
333
334 case FOURCC("avc1"):
335 return MEDIA_MIMETYPE_VIDEO_AVC;
336
337 case FOURCC("hvc1"):
338 case FOURCC("hev1"):
339 return MEDIA_MIMETYPE_VIDEO_HEVC;
340 case FOURCC("ac-4"):
341 return MEDIA_MIMETYPE_AUDIO_AC4;
342 case FOURCC("Opus"):
343 return MEDIA_MIMETYPE_AUDIO_OPUS;
344
345 case FOURCC("twos"):
346 case FOURCC("sowt"):
347 return MEDIA_MIMETYPE_AUDIO_RAW;
348 case FOURCC("alac"):
349 return MEDIA_MIMETYPE_AUDIO_ALAC;
350 case FOURCC("fLaC"):
351 return MEDIA_MIMETYPE_AUDIO_FLAC;
352 case FOURCC("av01"):
353 return MEDIA_MIMETYPE_VIDEO_AV1;
354 case FOURCC(".mp3"):
355 case 0x6D730055: // "ms U" mp3 audio
356 return MEDIA_MIMETYPE_AUDIO_MPEG;
357 default:
358 ALOGW("Unknown fourcc: %c%c%c%c",
359 (fourcc >> 24) & 0xff,
360 (fourcc >> 16) & 0xff,
361 (fourcc >> 8) & 0xff,
362 fourcc & 0xff
363 );
364 return "application/octet-stream";
365 }
366 }
367
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)368 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
369 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
370 // AMR NB audio is always mono, 8kHz
371 *channels = 1;
372 *rate = 8000;
373 return true;
374 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
375 // AMR WB audio is always mono, 16kHz
376 *channels = 1;
377 *rate = 16000;
378 return true;
379 }
380 return false;
381 }
382
MPEG4Extractor(DataSourceHelper * source,const char * mime)383 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
384 : mMoofOffset(0),
385 mMoofFound(false),
386 mMdatFound(false),
387 mDataSource(source),
388 mInitCheck(NO_INIT),
389 mHeaderTimescale(0),
390 mIsQT(false),
391 mIsHeif(false),
392 mHasMoovBox(false),
393 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
394 mFirstTrack(NULL),
395 mLastTrack(NULL) {
396 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
397 mFileMetaData = AMediaFormat_new();
398 }
399
~MPEG4Extractor()400 MPEG4Extractor::~MPEG4Extractor() {
401 Track *track = mFirstTrack;
402 while (track) {
403 Track *next = track->next;
404
405 delete track;
406 track = next;
407 }
408 mFirstTrack = mLastTrack = NULL;
409
410 for (size_t i = 0; i < mPssh.size(); i++) {
411 delete [] mPssh[i].data;
412 }
413 mPssh.clear();
414
415 delete mDataSource;
416 AMediaFormat_delete(mFileMetaData);
417 }
418
flags() const419 uint32_t MPEG4Extractor::flags() const {
420 return CAN_PAUSE |
421 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
422 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
423 }
424
getMetaData(AMediaFormat * meta)425 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
426 status_t err;
427 if ((err = readMetaData()) != OK) {
428 return AMEDIA_ERROR_UNKNOWN;
429 }
430 AMediaFormat_copy(meta, mFileMetaData);
431 return AMEDIA_OK;
432 }
433
countTracks()434 size_t MPEG4Extractor::countTracks() {
435 status_t err;
436 if ((err = readMetaData()) != OK) {
437 ALOGV("MPEG4Extractor::countTracks: no tracks");
438 return 0;
439 }
440
441 size_t n = 0;
442 Track *track = mFirstTrack;
443 while (track) {
444 ++n;
445 track = track->next;
446 }
447
448 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
449 return n;
450 }
451
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)452 media_status_t MPEG4Extractor::getTrackMetaData(
453 AMediaFormat *meta,
454 size_t index, uint32_t flags) {
455 status_t err;
456 if ((err = readMetaData()) != OK) {
457 return AMEDIA_ERROR_UNKNOWN;
458 }
459
460 Track *track = mFirstTrack;
461 while (index > 0) {
462 if (track == NULL) {
463 return AMEDIA_ERROR_UNKNOWN;
464 }
465
466 track = track->next;
467 --index;
468 }
469
470 if (track == NULL) {
471 return AMEDIA_ERROR_UNKNOWN;
472 }
473
474 [=] {
475 int64_t duration;
476 int32_t samplerate;
477 // Only for audio track.
478 if (track->has_elst && mHeaderTimescale != 0 &&
479 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
480 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
481
482 // Elst has to be processed only the first time this function is called.
483 track->has_elst = false;
484
485 if (track->elst_segment_duration > INT64_MAX) {
486 return;
487 }
488 int64_t segment_duration = track->elst_segment_duration;
489 int64_t media_time = track->elst_media_time;
490 int64_t halfscale = track->timescale / 2;
491
492 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
493 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
494 segment_duration, media_time,
495 halfscale, mHeaderTimescale, track->timescale);
496
497 if ((uint32_t)samplerate != track->timescale){
498 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
499 samplerate);
500 }
501 // Both delay and paddingsamples have to be set inorder for either to be
502 // effective in the lower layers.
503 int64_t delay = 0;
504 if (media_time > 0) { // Gapless playback
505 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
506 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
507 __builtin_add_overflow(delay, halfscale, &delay) ||
508 (delay /= track->timescale, false) ||
509 delay > INT32_MAX ||
510 delay < INT32_MIN) {
511 ALOGW("ignoring edit list with bogus values");
512 return;
513 }
514 }
515 ALOGV("delay = %" PRId64, delay);
516 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
517
518 int64_t paddingsamples = 0;
519 if (segment_duration > 0) {
520 int64_t scaled_duration;
521 // scaled_duration = duration * mHeaderTimescale;
522 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
523 return;
524 }
525 ALOGV("scaled_duration = %" PRId64, scaled_duration);
526
527 int64_t segment_end;
528 int64_t padding;
529 int64_t segment_duration_e6;
530 int64_t media_time_scaled_e6;
531 int64_t media_time_scaled;
532 // padding = scaled_duration - ((segment_duration * 1000000) +
533 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
534 // segment_duration is based on timescale in movie header box(mdhd)
535 // media_time is based on timescale track header/media timescale
536 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
537 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
538 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
539 return;
540 }
541 media_time_scaled_e6 /= track->timescale;
542 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
543 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
544 return;
545 }
546 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
547 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
548 // might be slightly shorter than the segment duration, which would make the
549 // padding negative. Clamp to zero.
550 if (padding > 0) {
551 int64_t halfscale_mht = mHeaderTimescale / 2;
552 int64_t halfscale_e6;
553 int64_t timescale_e6;
554 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
555 // / (mHeaderTimescale * 1000000);
556 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
557 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
558 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
559 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
560 (paddingsamples /= timescale_e6, false) ||
561 paddingsamples > INT32_MAX) {
562 return;
563 }
564 }
565 }
566 ALOGV("paddingsamples = %" PRId64, paddingsamples);
567 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
568 }
569 }();
570
571 if ((flags & kIncludeExtensiveMetaData)
572 && !track->includes_expensive_metadata) {
573 track->includes_expensive_metadata = true;
574
575 const char *mime;
576 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
577 if (!strncasecmp("video/", mime, 6)) {
578 // MPEG2 tracks do not provide CSD, so read the stream header
579 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
580 off64_t offset;
581 size_t size;
582 if (track->sampleTable->getMetaDataForSample(
583 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
584 if (size > kMaxTrackHeaderSize) {
585 size = kMaxTrackHeaderSize;
586 }
587 uint8_t header[kMaxTrackHeaderSize];
588 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
589 AMediaFormat_setBuffer(track->meta,
590 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
591 }
592 }
593 }
594
595 if (mMoofOffset > 0) {
596 int64_t duration;
597 if (AMediaFormat_getInt64(track->meta,
598 AMEDIAFORMAT_KEY_DURATION, &duration)) {
599 // nothing fancy, just pick a frame near 1/4th of the duration
600 AMediaFormat_setInt64(track->meta,
601 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
602 }
603 } else {
604 uint32_t sampleIndex;
605 uint64_t sampleTime;
606 if (track->timescale != 0 &&
607 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
608 && track->sampleTable->getMetaDataForSample(
609 sampleIndex, NULL /* offset */, NULL /* size */,
610 &sampleTime) == OK) {
611 AMediaFormat_setInt64(track->meta,
612 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
613 ((int64_t)sampleTime * 1000000) / track->timescale);
614 }
615 }
616 }
617 }
618
619 AMediaFormat_copy(meta, track->meta);
620 return AMEDIA_OK;
621 }
622
readMetaData()623 status_t MPEG4Extractor::readMetaData() {
624 if (mInitCheck != NO_INIT) {
625 return mInitCheck;
626 }
627
628 off64_t offset = 0;
629 status_t err;
630 bool sawMoovOrSidx = false;
631
632 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
633 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
634 (mItemTable != NULL) && mItemTable->isValid()))) {
635 off64_t orig_offset = offset;
636 err = parseChunk(&offset, 0);
637
638 if (err != OK && err != UNKNOWN_ERROR) {
639 break;
640 } else if (offset <= orig_offset) {
641 // only continue parsing if the offset was advanced,
642 // otherwise we might end up in an infinite loop
643 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
644 err = ERROR_MALFORMED;
645 break;
646 } else if (err == UNKNOWN_ERROR) {
647 sawMoovOrSidx = true;
648 }
649 }
650
651 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
652 off64_t exifOffset;
653 size_t exifSize;
654 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
655 AMediaFormat_setInt64(mFileMetaData,
656 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
657 AMediaFormat_setInt64(mFileMetaData,
658 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
659 }
660 for (uint32_t imageIndex = 0;
661 imageIndex < mItemTable->countImages(); imageIndex++) {
662 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
663 if (meta == NULL) {
664 ALOGE("heif image %u has no meta!", imageIndex);
665 continue;
666 }
667 // Some heif files advertise image sequence brands (eg. 'hevc') in
668 // ftyp box, but don't have any valid tracks in them. Instead of
669 // reporting the entire file as malformed, we override the error
670 // to allow still images to be extracted.
671 if (err != OK) {
672 ALOGW("Extracting still images only");
673 err = OK;
674 }
675 mInitCheck = OK;
676
677 ALOGV("adding HEIF image track %u", imageIndex);
678 Track *track = new Track;
679 if (mLastTrack != NULL) {
680 mLastTrack->next = track;
681 } else {
682 mFirstTrack = track;
683 }
684 mLastTrack = track;
685
686 track->meta = meta;
687 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
688 track->timescale = 1000000;
689 }
690 }
691
692 if (mInitCheck == OK) {
693 if (findTrackByMimePrefix("video/") != NULL) {
694 AMediaFormat_setString(mFileMetaData,
695 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
696 } else if (findTrackByMimePrefix("audio/") != NULL) {
697 AMediaFormat_setString(mFileMetaData,
698 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
699 } else if (findTrackByMimePrefix(
700 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
701 AMediaFormat_setString(mFileMetaData,
702 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
703 } else {
704 AMediaFormat_setString(mFileMetaData,
705 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
706 }
707 } else {
708 mInitCheck = err;
709 }
710
711 CHECK_NE(err, (status_t)NO_INIT);
712
713 // copy pssh data into file metadata
714 uint64_t psshsize = 0;
715 for (size_t i = 0; i < mPssh.size(); i++) {
716 psshsize += 20 + mPssh[i].datalen;
717 }
718 if (psshsize > 0 && psshsize <= UINT32_MAX) {
719 char *buf = (char*)malloc(psshsize);
720 if (!buf) {
721 ALOGE("b/28471206");
722 return NO_MEMORY;
723 }
724 char *ptr = buf;
725 for (size_t i = 0; i < mPssh.size(); i++) {
726 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
727 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
728 ptr += (20 + mPssh[i].datalen);
729 }
730 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
731 free(buf);
732 }
733
734 return mInitCheck;
735 }
736
737 struct PathAdder {
PathAdderandroid::PathAdder738 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
739 : mPath(path) {
740 mPath->push(chunkType);
741 }
742
~PathAdderandroid::PathAdder743 ~PathAdder() {
744 mPath->pop();
745 }
746
747 private:
748 Vector<uint32_t> *mPath;
749
750 PathAdder(const PathAdder &);
751 PathAdder &operator=(const PathAdder &);
752 };
753
underMetaDataPath(const Vector<uint32_t> & path)754 static bool underMetaDataPath(const Vector<uint32_t> &path) {
755 return path.size() >= 5
756 && path[0] == FOURCC("moov")
757 && path[1] == FOURCC("udta")
758 && path[2] == FOURCC("meta")
759 && path[3] == FOURCC("ilst");
760 }
761
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)762 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
763 return path.size() >= 2
764 && path[0] == FOURCC("moov")
765 && path[1] == FOURCC("meta")
766 && (depth == 2
767 || (depth == 3
768 && (path[2] == FOURCC("hdlr")
769 || path[2] == FOURCC("ilst")
770 || path[2] == FOURCC("keys"))));
771 }
772
773 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)774 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
775 // delta between mpeg4 time and unix epoch time
776 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
777 if (time_1904 < INT64_MIN + delta) {
778 return false;
779 }
780 time_t time_1970 = time_1904 - delta;
781
782 char tmp[32];
783 struct tm* tm = gmtime(&time_1970);
784 if (tm != NULL &&
785 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
786 s->setTo(tmp);
787 return true;
788 }
789 return false;
790 }
791
parseChunk(off64_t * offset,int depth)792 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
793 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
794
795 if (*offset < 0) {
796 ALOGE("b/23540914");
797 return ERROR_MALFORMED;
798 }
799 if (depth > 100) {
800 ALOGE("b/27456299");
801 return ERROR_MALFORMED;
802 }
803 uint32_t hdr[2];
804 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
805 return ERROR_IO;
806 }
807 uint64_t chunk_size = ntohl(hdr[0]);
808 int32_t chunk_type = ntohl(hdr[1]);
809 off64_t data_offset = *offset + 8;
810
811 if (chunk_size == 1) {
812 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
813 return ERROR_IO;
814 }
815 chunk_size = ntoh64(chunk_size);
816 data_offset += 8;
817
818 if (chunk_size < 16) {
819 // The smallest valid chunk is 16 bytes long in this case.
820 return ERROR_MALFORMED;
821 }
822 } else if (chunk_size == 0) {
823 if (depth == 0) {
824 // atom extends to end of file
825 off64_t sourceSize;
826 if (mDataSource->getSize(&sourceSize) == OK) {
827 chunk_size = (sourceSize - *offset);
828 } else {
829 // XXX could we just pick a "sufficiently large" value here?
830 ALOGE("atom size is 0, and data source has no size");
831 return ERROR_MALFORMED;
832 }
833 } else {
834 // not allowed for non-toplevel atoms, skip it
835 *offset += 4;
836 return OK;
837 }
838 } else if (chunk_size < 8) {
839 // The smallest valid chunk is 8 bytes long.
840 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
841 return ERROR_MALFORMED;
842 }
843
844 char chunk[5];
845 MakeFourCCString(chunk_type, chunk);
846 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
847
848 if (kUseHexDump) {
849 static const char kWhitespace[] = " ";
850 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
851 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
852
853 char buffer[256];
854 size_t n = chunk_size;
855 if (n > sizeof(buffer)) {
856 n = sizeof(buffer);
857 }
858 if (mDataSource->readAt(*offset, buffer, n)
859 < (ssize_t)n) {
860 return ERROR_IO;
861 }
862
863 hexdump(buffer, n);
864 }
865
866 PathAdder autoAdder(&mPath, chunk_type);
867
868 // (data_offset - *offset) is either 8 or 16
869 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
870 if (chunk_data_size < 0) {
871 ALOGE("b/23540914");
872 return ERROR_MALFORMED;
873 }
874 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
875 char errMsg[100];
876 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
877 ALOGE("%s (b/28615448)", errMsg);
878 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
879 return ERROR_MALFORMED;
880 }
881
882 if (chunk_type != FOURCC("cprt")
883 && chunk_type != FOURCC("covr")
884 && mPath.size() == 5 && underMetaDataPath(mPath)) {
885 off64_t stop_offset = *offset + chunk_size;
886 *offset = data_offset;
887 while (*offset < stop_offset) {
888 status_t err = parseChunk(offset, depth + 1);
889 if (err != OK) {
890 return err;
891 }
892 }
893
894 if (*offset != stop_offset) {
895 return ERROR_MALFORMED;
896 }
897
898 return OK;
899 }
900
901 switch(chunk_type) {
902 case FOURCC("moov"):
903 case FOURCC("trak"):
904 case FOURCC("mdia"):
905 case FOURCC("minf"):
906 case FOURCC("dinf"):
907 case FOURCC("stbl"):
908 case FOURCC("mvex"):
909 case FOURCC("moof"):
910 case FOURCC("traf"):
911 case FOURCC("mfra"):
912 case FOURCC("udta"):
913 case FOURCC("ilst"):
914 case FOURCC("sinf"):
915 case FOURCC("schi"):
916 case FOURCC("edts"):
917 case FOURCC("wave"):
918 {
919 if (chunk_type == FOURCC("moov") && depth != 0) {
920 ALOGE("moov: depth %d", depth);
921 return ERROR_MALFORMED;
922 }
923
924 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
925 ALOGE("duplicate moov");
926 return ERROR_MALFORMED;
927 }
928
929 if (chunk_type == FOURCC("moof") && !mMoofFound) {
930 // store the offset of the first segment
931 mMoofFound = true;
932 mMoofOffset = *offset;
933 }
934
935 if (chunk_type == FOURCC("stbl")) {
936 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
937
938 if (mDataSource->flags()
939 & (DataSourceBase::kWantsPrefetching
940 | DataSourceBase::kIsCachingDataSource)) {
941 CachedRangedDataSource *cachedSource =
942 new CachedRangedDataSource(mDataSource);
943
944 if (cachedSource->setCachedRange(
945 *offset, chunk_size,
946 true /* assume ownership on success */) == OK) {
947 mDataSource = cachedSource;
948 } else {
949 delete cachedSource;
950 }
951 }
952
953 if (mLastTrack == NULL) {
954 return ERROR_MALFORMED;
955 }
956
957 mLastTrack->sampleTable = new SampleTable(mDataSource);
958 }
959
960 bool isTrack = false;
961 if (chunk_type == FOURCC("trak")) {
962 if (depth != 1) {
963 ALOGE("trak: depth %d", depth);
964 return ERROR_MALFORMED;
965 }
966 isTrack = true;
967
968 ALOGV("adding new track");
969 Track *track = new Track;
970 if (mLastTrack) {
971 mLastTrack->next = track;
972 } else {
973 mFirstTrack = track;
974 }
975 mLastTrack = track;
976
977 track->meta = AMediaFormat_new();
978 AMediaFormat_setString(track->meta,
979 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
980 }
981
982 off64_t stop_offset = *offset + chunk_size;
983 *offset = data_offset;
984 while (*offset < stop_offset) {
985
986 // pass udata terminate
987 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
988 // handle the case that udta terminates with terminate code x00000000
989 // note that 0 terminator is optional and we just handle this case.
990 uint32_t terminate_code = 1;
991 mDataSource->readAt(*offset, &terminate_code, 4);
992 if (0 == terminate_code) {
993 *offset += 4;
994 ALOGD("Terminal code for udta");
995 continue;
996 } else {
997 ALOGW("invalid udta Terminal code");
998 }
999 }
1000
1001 status_t err = parseChunk(offset, depth + 1);
1002 if (err != OK) {
1003 if (isTrack) {
1004 mLastTrack->skipTrack = true;
1005 break;
1006 }
1007 return err;
1008 }
1009 }
1010
1011 if (*offset != stop_offset) {
1012 return ERROR_MALFORMED;
1013 }
1014
1015 if (isTrack) {
1016 int32_t trackId;
1017 // There must be exactly one track header per track.
1018
1019 if (!AMediaFormat_getInt32(mLastTrack->meta,
1020 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1021 mLastTrack->skipTrack = true;
1022 }
1023
1024 status_t err = verifyTrack(mLastTrack);
1025 if (err != OK) {
1026 mLastTrack->skipTrack = true;
1027 }
1028
1029
1030 if (mLastTrack->skipTrack) {
1031 ALOGV("skipping this track...");
1032 Track *cur = mFirstTrack;
1033
1034 if (cur == mLastTrack) {
1035 delete cur;
1036 mFirstTrack = mLastTrack = NULL;
1037 } else {
1038 while (cur && cur->next != mLastTrack) {
1039 cur = cur->next;
1040 }
1041 if (cur) {
1042 cur->next = NULL;
1043 }
1044 delete mLastTrack;
1045 mLastTrack = cur;
1046 }
1047
1048 return OK;
1049 }
1050
1051 // place things we built elsewhere into their final locations
1052
1053 // put aggregated tx3g data into the metadata
1054 if (mLastTrack->mTx3gFilled > 0) {
1055 ALOGV("Putting %zu bytes of tx3g data into meta data",
1056 mLastTrack->mTx3gFilled);
1057 AMediaFormat_setBuffer(mLastTrack->meta,
1058 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1059 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1060 // drop it now to reduce our footprint
1061 free(mLastTrack->mTx3gBuffer);
1062 mLastTrack->mTx3gBuffer = NULL;
1063 }
1064
1065 } else if (chunk_type == FOURCC("moov")) {
1066 mInitCheck = OK;
1067
1068 return UNKNOWN_ERROR; // Return a dummy error.
1069 }
1070 break;
1071 }
1072
1073 case FOURCC("schm"):
1074 {
1075
1076 *offset += chunk_size;
1077 if (!mLastTrack) {
1078 return ERROR_MALFORMED;
1079 }
1080
1081 uint32_t scheme_type;
1082 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1083 return ERROR_IO;
1084 }
1085 scheme_type = ntohl(scheme_type);
1086 int32_t mode = kCryptoModeUnencrypted;
1087 switch(scheme_type) {
1088 case FOURCC("cbc1"):
1089 {
1090 mode = kCryptoModeAesCbc;
1091 break;
1092 }
1093 case FOURCC("cbcs"):
1094 {
1095 mode = kCryptoModeAesCbc;
1096 mLastTrack->subsample_encryption = true;
1097 break;
1098 }
1099 case FOURCC("cenc"):
1100 {
1101 mode = kCryptoModeAesCtr;
1102 break;
1103 }
1104 case FOURCC("cens"):
1105 {
1106 mode = kCryptoModeAesCtr;
1107 mLastTrack->subsample_encryption = true;
1108 break;
1109 }
1110 }
1111 if (mode != kCryptoModeUnencrypted) {
1112 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1113 }
1114 break;
1115 }
1116
1117
1118 case FOURCC("elst"):
1119 {
1120 *offset += chunk_size;
1121
1122 if (!mLastTrack) {
1123 return ERROR_MALFORMED;
1124 }
1125
1126 // See 14496-12 8.6.6
1127 uint8_t version;
1128 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1129 return ERROR_IO;
1130 }
1131
1132 uint32_t entry_count;
1133 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1134 return ERROR_IO;
1135 }
1136
1137 if (entry_count != 1) {
1138 // we only support a single entry at the moment, for gapless playback
1139 // or start offset
1140 ALOGW("ignoring edit list with %d entries", entry_count);
1141 } else {
1142 off64_t entriesoffset = data_offset + 8;
1143 uint64_t segment_duration;
1144 int64_t media_time;
1145
1146 if (version == 1) {
1147 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1148 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1149 return ERROR_IO;
1150 }
1151 } else if (version == 0) {
1152 uint32_t sd;
1153 int32_t mt;
1154 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1155 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1156 return ERROR_IO;
1157 }
1158 segment_duration = sd;
1159 media_time = mt;
1160 } else {
1161 return ERROR_IO;
1162 }
1163
1164 // save these for later, because the elst atom might precede
1165 // the atoms that actually gives us the duration and sample rate
1166 // needed to calculate the padding and delay values
1167 mLastTrack->has_elst = true;
1168 mLastTrack->elst_media_time = media_time;
1169 mLastTrack->elst_segment_duration = segment_duration;
1170 }
1171 break;
1172 }
1173
1174 case FOURCC("frma"):
1175 {
1176 *offset += chunk_size;
1177
1178 uint32_t original_fourcc;
1179 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1180 return ERROR_IO;
1181 }
1182 original_fourcc = ntohl(original_fourcc);
1183 ALOGV("read original format: %d", original_fourcc);
1184
1185 if (mLastTrack == NULL) {
1186 return ERROR_MALFORMED;
1187 }
1188
1189 AMediaFormat_setString(mLastTrack->meta,
1190 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1191 uint32_t num_channels = 0;
1192 uint32_t sample_rate = 0;
1193 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1194 AMediaFormat_setInt32(mLastTrack->meta,
1195 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1196 AMediaFormat_setInt32(mLastTrack->meta,
1197 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1198 }
1199
1200 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1201 off64_t tmpOffset = *offset;
1202 status_t err = parseALACSampleEntry(&tmpOffset);
1203 if (err != OK) {
1204 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1205 return err;
1206 }
1207 *offset = tmpOffset + 8;
1208 }
1209
1210 break;
1211 }
1212
1213 case FOURCC("tenc"):
1214 {
1215 *offset += chunk_size;
1216
1217 if (chunk_size < 32) {
1218 return ERROR_MALFORMED;
1219 }
1220
1221 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1222 // default IV size, 16 bytes default KeyID
1223 // (ISO 23001-7)
1224
1225 uint8_t version;
1226 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1227 < (ssize_t)sizeof(version)) {
1228 return ERROR_IO;
1229 }
1230
1231 uint8_t buf[4];
1232 memset(buf, 0, 4);
1233 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1234 return ERROR_IO;
1235 }
1236
1237 if (mLastTrack == NULL) {
1238 return ERROR_MALFORMED;
1239 }
1240
1241 uint8_t defaultEncryptedByteBlock = 0;
1242 uint8_t defaultSkipByteBlock = 0;
1243 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1244 if (version == 1) {
1245 uint32_t pattern = buf[2];
1246 defaultEncryptedByteBlock = pattern >> 4;
1247 defaultSkipByteBlock = pattern & 0xf;
1248 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1249 // use (1,0) to mean "encrypt everything"
1250 defaultEncryptedByteBlock = 1;
1251 }
1252 } else if (mLastTrack->subsample_encryption) {
1253 ALOGW("subsample_encryption should be version 1");
1254 } else if (defaultAlgorithmId > 1) {
1255 // only 0 (clear) and 1 (AES-128) are valid
1256 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1257 defaultAlgorithmId = 1;
1258 }
1259
1260 memset(buf, 0, 4);
1261 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1262 return ERROR_IO;
1263 }
1264 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1265
1266 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1267 // only unencrypted data must have 0 IV size
1268 return ERROR_MALFORMED;
1269 } else if (defaultIVSize != 0 &&
1270 defaultIVSize != 8 &&
1271 defaultIVSize != 16) {
1272 return ERROR_MALFORMED;
1273 }
1274
1275 uint8_t defaultKeyId[16];
1276
1277 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1278 return ERROR_IO;
1279 }
1280
1281 sp<ABuffer> defaultConstantIv;
1282 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1283
1284 uint8_t ivlength;
1285 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1286 < (ssize_t)sizeof(ivlength)) {
1287 return ERROR_IO;
1288 }
1289
1290 if (ivlength != 8 && ivlength != 16) {
1291 ALOGW("unsupported IV length: %u", ivlength);
1292 return ERROR_MALFORMED;
1293 }
1294
1295 defaultConstantIv = new ABuffer(ivlength);
1296 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1297 < (ssize_t)ivlength) {
1298 return ERROR_IO;
1299 }
1300
1301 defaultConstantIv->setRange(0, ivlength);
1302 }
1303
1304 int32_t tmpAlgorithmId;
1305 if (!AMediaFormat_getInt32(mLastTrack->meta,
1306 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1307 AMediaFormat_setInt32(mLastTrack->meta,
1308 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1309 }
1310
1311 AMediaFormat_setInt32(mLastTrack->meta,
1312 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1313 AMediaFormat_setBuffer(mLastTrack->meta,
1314 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1315 AMediaFormat_setInt32(mLastTrack->meta,
1316 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1317 AMediaFormat_setInt32(mLastTrack->meta,
1318 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1319 if (defaultConstantIv != NULL) {
1320 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1321 defaultConstantIv->data(), defaultConstantIv->size());
1322 }
1323 break;
1324 }
1325
1326 case FOURCC("tkhd"):
1327 {
1328 *offset += chunk_size;
1329
1330 status_t err;
1331 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1332 return err;
1333 }
1334
1335 break;
1336 }
1337
1338 case FOURCC("tref"):
1339 {
1340 off64_t stop_offset = *offset + chunk_size;
1341 *offset = data_offset;
1342 while (*offset < stop_offset) {
1343 status_t err = parseChunk(offset, depth + 1);
1344 if (err != OK) {
1345 return err;
1346 }
1347 }
1348 if (*offset != stop_offset) {
1349 return ERROR_MALFORMED;
1350 }
1351 break;
1352 }
1353
1354 case FOURCC("thmb"):
1355 {
1356 *offset += chunk_size;
1357
1358 if (mLastTrack != NULL) {
1359 // Skip thumbnail track for now since we don't have an
1360 // API to retrieve it yet.
1361 // The thumbnail track can't be accessed by negative index or time,
1362 // because each timed sample has its own corresponding thumbnail
1363 // in the thumbnail track. We'll need a dedicated API to retrieve
1364 // thumbnail at time instead.
1365 mLastTrack->skipTrack = true;
1366 }
1367
1368 break;
1369 }
1370
1371 case FOURCC("pssh"):
1372 {
1373 *offset += chunk_size;
1374
1375 PsshInfo pssh;
1376
1377 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1378 return ERROR_IO;
1379 }
1380
1381 uint32_t psshdatalen = 0;
1382 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1383 return ERROR_IO;
1384 }
1385 pssh.datalen = ntohl(psshdatalen);
1386 ALOGV("pssh data size: %d", pssh.datalen);
1387 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1388 // pssh data length exceeds size of containing box
1389 return ERROR_MALFORMED;
1390 }
1391
1392 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1393 if (pssh.data == NULL) {
1394 return ERROR_MALFORMED;
1395 }
1396 ALOGV("allocated pssh @ %p", pssh.data);
1397 ssize_t requested = (ssize_t) pssh.datalen;
1398 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1399 delete[] pssh.data;
1400 return ERROR_IO;
1401 }
1402 mPssh.push_back(pssh);
1403
1404 break;
1405 }
1406
1407 case FOURCC("mdhd"):
1408 {
1409 *offset += chunk_size;
1410
1411 if (chunk_data_size < 4 || mLastTrack == NULL) {
1412 return ERROR_MALFORMED;
1413 }
1414
1415 uint8_t version;
1416 if (mDataSource->readAt(
1417 data_offset, &version, sizeof(version))
1418 < (ssize_t)sizeof(version)) {
1419 return ERROR_IO;
1420 }
1421
1422 off64_t timescale_offset;
1423
1424 if (version == 1) {
1425 timescale_offset = data_offset + 4 + 16;
1426 } else if (version == 0) {
1427 timescale_offset = data_offset + 4 + 8;
1428 } else {
1429 return ERROR_IO;
1430 }
1431
1432 uint32_t timescale;
1433 if (mDataSource->readAt(
1434 timescale_offset, ×cale, sizeof(timescale))
1435 < (ssize_t)sizeof(timescale)) {
1436 return ERROR_IO;
1437 }
1438
1439 if (!timescale) {
1440 ALOGE("timescale should not be ZERO.");
1441 return ERROR_MALFORMED;
1442 }
1443
1444 mLastTrack->timescale = ntohl(timescale);
1445
1446 // 14496-12 says all ones means indeterminate, but some files seem to use
1447 // 0 instead. We treat both the same.
1448 int64_t duration = 0;
1449 if (version == 1) {
1450 if (mDataSource->readAt(
1451 timescale_offset + 4, &duration, sizeof(duration))
1452 < (ssize_t)sizeof(duration)) {
1453 return ERROR_IO;
1454 }
1455 if (duration != -1) {
1456 duration = ntoh64(duration);
1457 }
1458 } else {
1459 uint32_t duration32;
1460 if (mDataSource->readAt(
1461 timescale_offset + 4, &duration32, sizeof(duration32))
1462 < (ssize_t)sizeof(duration32)) {
1463 return ERROR_IO;
1464 }
1465 if (duration32 != 0xffffffff) {
1466 duration = ntohl(duration32);
1467 }
1468 }
1469 if (duration != 0 && mLastTrack->timescale != 0) {
1470 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1471 if (durationUs < 0 || durationUs > INT64_MAX) {
1472 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1473 (long long) duration, (long long) mLastTrack->timescale);
1474 return ERROR_MALFORMED;
1475 }
1476 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1477 }
1478
1479 uint8_t lang[2];
1480 off64_t lang_offset;
1481 if (version == 1) {
1482 lang_offset = timescale_offset + 4 + 8;
1483 } else if (version == 0) {
1484 lang_offset = timescale_offset + 4 + 4;
1485 } else {
1486 return ERROR_IO;
1487 }
1488
1489 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1490 < (ssize_t)sizeof(lang)) {
1491 return ERROR_IO;
1492 }
1493
1494 // To get the ISO-639-2/T three character language code
1495 // 1 bit pad followed by 3 5-bits characters. Each character
1496 // is packed as the difference between its ASCII value and 0x60.
1497 char lang_code[4];
1498 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1499 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1500 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1501 lang_code[3] = '\0';
1502
1503 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1504
1505 break;
1506 }
1507
1508 case FOURCC("stsd"):
1509 {
1510 uint8_t buffer[8];
1511 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1512 return ERROR_MALFORMED;
1513 }
1514
1515 if (mDataSource->readAt(
1516 data_offset, buffer, 8) < 8) {
1517 return ERROR_IO;
1518 }
1519
1520 if (U32_AT(buffer) != 0) {
1521 // Should be version 0, flags 0.
1522 return ERROR_MALFORMED;
1523 }
1524
1525 uint32_t entry_count = U32_AT(&buffer[4]);
1526
1527 if (entry_count > 1) {
1528 // For 3GPP timed text, there could be multiple tx3g boxes contain
1529 // multiple text display formats. These formats will be used to
1530 // display the timed text.
1531 // For encrypted files, there may also be more than one entry.
1532 const char *mime;
1533
1534 if (mLastTrack == NULL)
1535 return ERROR_MALFORMED;
1536
1537 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1538 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1539 strcasecmp(mime, "application/octet-stream")) {
1540 // For now we only support a single type of media per track.
1541 mLastTrack->skipTrack = true;
1542 *offset += chunk_size;
1543 break;
1544 }
1545 }
1546 off64_t stop_offset = *offset + chunk_size;
1547 *offset = data_offset + 8;
1548 for (uint32_t i = 0; i < entry_count; ++i) {
1549 status_t err = parseChunk(offset, depth + 1);
1550 if (err != OK) {
1551 return err;
1552 }
1553 }
1554
1555 if (*offset != stop_offset) {
1556 return ERROR_MALFORMED;
1557 }
1558 break;
1559 }
1560 case FOURCC("mett"):
1561 {
1562 *offset += chunk_size;
1563
1564 if (mLastTrack == NULL)
1565 return ERROR_MALFORMED;
1566
1567 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1568 if (buffer.get() == NULL) {
1569 return NO_MEMORY;
1570 }
1571
1572 if (mDataSource->readAt(
1573 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1574 return ERROR_IO;
1575 }
1576
1577 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1578 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1579 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1580 // make it compliant with the standard. The workaround is to write the
1581 // null-terminated mime_format string twice. This allows compliant parsers to
1582 // read the missing reserved, data_reference_index, and content_encoding fields
1583 // from the first mime_type string. The actual mime_format field would then be
1584 // read correctly from the second string. The non-compliant Android frameworks
1585 // from API 28 and earlier would still be able to read the mime_format correctly
1586 // as it would only read the first null-terminated mime_format string. To enable
1587 // reading metadata tracks generated from both the non-compliant and compliant
1588 // formats, a check needs to be done to see which format is used.
1589 int null_pos = 0;
1590 const unsigned char *str = buffer.get();
1591 while (null_pos < chunk_data_size) {
1592 if (*(str + null_pos) == '\0') {
1593 break;
1594 }
1595 ++null_pos;
1596 }
1597
1598 if (null_pos == chunk_data_size - 1) {
1599 // This is not a standard ompliant metadata track.
1600 String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1601 AMediaFormat_setString(mLastTrack->meta,
1602 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1603 } else {
1604 // This is a standard compliant metadata track.
1605 String8 contentEncoding((const char *)(buffer.get() + 8));
1606 String8 mimeFormat((const char *)(buffer.get() + 8 + contentEncoding.size() + 1),
1607 chunk_data_size - 8 - contentEncoding.size() - 1);
1608 AMediaFormat_setString(mLastTrack->meta,
1609 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1610 }
1611 break;
1612 }
1613
1614 case FOURCC("mp4a"):
1615 case FOURCC("enca"):
1616 case FOURCC("samr"):
1617 case FOURCC("sawb"):
1618 case FOURCC("Opus"):
1619 case FOURCC("twos"):
1620 case FOURCC("sowt"):
1621 case FOURCC("alac"):
1622 case FOURCC("fLaC"):
1623 case FOURCC(".mp3"):
1624 case 0x6D730055: // "ms U" mp3 audio
1625 {
1626 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1627
1628 if (chunk_type == FOURCC("alac")) {
1629 off64_t offsetTmp = *offset;
1630 status_t err = parseALACSampleEntry(&offsetTmp);
1631 if (err != OK) {
1632 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1633 return err;
1634 }
1635 }
1636
1637 // Ignore all atoms embedded in QT wave atom
1638 ALOGV("Ignore all atoms embedded in QT wave atom");
1639 *offset += chunk_size;
1640 break;
1641 }
1642
1643 uint8_t buffer[8 + 20];
1644 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1645 // Basic AudioSampleEntry size.
1646 return ERROR_MALFORMED;
1647 }
1648
1649 if (mDataSource->readAt(
1650 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1651 return ERROR_IO;
1652 }
1653
1654 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1655 uint16_t version = U16_AT(&buffer[8]);
1656 uint32_t num_channels = U16_AT(&buffer[16]);
1657
1658 uint16_t sample_size = U16_AT(&buffer[18]);
1659 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1660
1661 if (mLastTrack == NULL)
1662 return ERROR_MALFORMED;
1663
1664 off64_t stop_offset = *offset + chunk_size;
1665 *offset = data_offset + sizeof(buffer);
1666
1667 if (mIsQT) {
1668 if (version == 1) {
1669 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1670 return ERROR_IO;
1671 }
1672
1673 #if 0
1674 U32_AT(buffer); // samples per packet
1675 U32_AT(&buffer[4]); // bytes per packet
1676 U32_AT(&buffer[8]); // bytes per frame
1677 U32_AT(&buffer[12]); // bytes per sample
1678 #endif
1679 *offset += 16;
1680 } else if (version == 2) {
1681 uint8_t v2buffer[36];
1682 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1683 return ERROR_IO;
1684 }
1685
1686 #if 0
1687 U32_AT(v2buffer); // size of struct only
1688 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1689 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1690 U32_AT(&v2buffer[16]); // always 0x7f000000
1691 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1692 U32_AT(&v2buffer[24]); // format specifc flags
1693 U32_AT(&v2buffer[28]); // const bytes per audio packet
1694 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1695 #endif
1696 *offset += 36;
1697 }
1698 }
1699
1700 if (chunk_type != FOURCC("enca")) {
1701 // if the chunk type is enca, we'll get the type from the frma box later
1702 AMediaFormat_setString(mLastTrack->meta,
1703 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1704 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1705
1706 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1707 AMediaFormat_setInt32(mLastTrack->meta,
1708 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1709 if (chunk_type == FOURCC("twos")) {
1710 AMediaFormat_setInt32(mLastTrack->meta,
1711 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1712 }
1713 }
1714 }
1715 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1716 chunk, num_channels, sample_size, sample_rate);
1717 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1718 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1719
1720 if (chunk_type == FOURCC("Opus")) {
1721 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1722 data_offset += sizeof(buffer);
1723 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1724
1725 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1726 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1727 return ERROR_MALFORMED;
1728 }
1729 // Read Opus Header
1730 if (mDataSource->readAt(
1731 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1732 return ERROR_IO;
1733 }
1734
1735 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1736 // http://wiki.xiph.org/OggOpus#ID_Header
1737 strncpy((char *)opusInfo, "OpusHead", 8);
1738
1739 // Version shall be 0 as per mp4 Opus Specific Box
1740 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1741 if (opusInfo[8]) {
1742 return ERROR_MALFORMED;
1743 }
1744 // Force version to 1 as per OpusHead definition
1745 // (http://wiki.xiph.org/OggOpus#ID_Header)
1746 opusInfo[8] = 1;
1747
1748 // Read Opus Specific Box values
1749 size_t opusOffset = 10;
1750 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1751 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1752 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1753
1754 // Convert Opus Specific Box values. ParseOpusHeader expects
1755 // the values in LE, however MP4 stores these values as BE
1756 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1757 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1758 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1759 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1760
1761 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1762 static const int32_t kOpusSampleRate = 48000;
1763 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1764
1765 AMediaFormat_setBuffer(mLastTrack->meta,
1766 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1767 AMediaFormat_setBuffer(mLastTrack->meta,
1768 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1769 AMediaFormat_setBuffer(mLastTrack->meta,
1770 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1771
1772 data_offset += opusInfoSize;
1773 *offset = data_offset;
1774 CHECK_EQ(*offset, stop_offset);
1775 }
1776
1777 if (!mIsQT && chunk_type == FOURCC("alac")) {
1778 data_offset += sizeof(buffer);
1779
1780 status_t err = parseALACSampleEntry(&data_offset);
1781 if (err != OK) {
1782 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1783 return err;
1784 }
1785 *offset = data_offset;
1786 CHECK_EQ(*offset, stop_offset);
1787 }
1788
1789 if (chunk_type == FOURCC("fLaC")) {
1790
1791 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1792 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1793 uint8_t flacInfo[4 + 4 + 34];
1794 // skipping dFla, version
1795 data_offset += sizeof(buffer) + 12;
1796 size_t flacOffset = 4;
1797 // Add flaC header mime type to CSD
1798 strncpy((char *)flacInfo, "fLaC", 4);
1799 if (mDataSource->readAt(
1800 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1801 (ssize_t)sizeof(flacInfo) - flacOffset) {
1802 return ERROR_IO;
1803 }
1804 data_offset += sizeof(flacInfo) - flacOffset;
1805
1806 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1807 sizeof(flacInfo));
1808 *offset = data_offset;
1809 CHECK_EQ(*offset, stop_offset);
1810 }
1811
1812 while (*offset < stop_offset) {
1813 status_t err = parseChunk(offset, depth + 1);
1814 if (err != OK) {
1815 return err;
1816 }
1817 }
1818
1819 if (*offset != stop_offset) {
1820 return ERROR_MALFORMED;
1821 }
1822 break;
1823 }
1824
1825 case FOURCC("mp4v"):
1826 case FOURCC("encv"):
1827 case FOURCC("s263"):
1828 case FOURCC("H263"):
1829 case FOURCC("h263"):
1830 case FOURCC("avc1"):
1831 case FOURCC("hvc1"):
1832 case FOURCC("hev1"):
1833 case FOURCC("av01"):
1834 {
1835 uint8_t buffer[78];
1836 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1837 // Basic VideoSampleEntry size.
1838 return ERROR_MALFORMED;
1839 }
1840
1841 if (mDataSource->readAt(
1842 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1843 return ERROR_IO;
1844 }
1845
1846 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1847 uint16_t width = U16_AT(&buffer[6 + 18]);
1848 uint16_t height = U16_AT(&buffer[6 + 20]);
1849
1850 // The video sample is not standard-compliant if it has invalid dimension.
1851 // Use some default width and height value, and
1852 // let the decoder figure out the actual width and height (and thus
1853 // be prepared for INFO_FOMRAT_CHANGED event).
1854 if (width == 0) width = 352;
1855 if (height == 0) height = 288;
1856
1857 // printf("*** coding='%s' width=%d height=%d\n",
1858 // chunk, width, height);
1859
1860 if (mLastTrack == NULL)
1861 return ERROR_MALFORMED;
1862
1863 if (chunk_type != FOURCC("encv")) {
1864 // if the chunk type is encv, we'll get the type from the frma box later
1865 AMediaFormat_setString(mLastTrack->meta,
1866 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1867 }
1868 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
1869 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
1870
1871 off64_t stop_offset = *offset + chunk_size;
1872 *offset = data_offset + sizeof(buffer);
1873 while (*offset < stop_offset) {
1874 status_t err = parseChunk(offset, depth + 1);
1875 if (err != OK) {
1876 return err;
1877 }
1878 }
1879
1880 if (*offset != stop_offset) {
1881 return ERROR_MALFORMED;
1882 }
1883 break;
1884 }
1885
1886 case FOURCC("stco"):
1887 case FOURCC("co64"):
1888 {
1889 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1890 return ERROR_MALFORMED;
1891 }
1892
1893 status_t err =
1894 mLastTrack->sampleTable->setChunkOffsetParams(
1895 chunk_type, data_offset, chunk_data_size);
1896
1897 *offset += chunk_size;
1898
1899 if (err != OK) {
1900 return err;
1901 }
1902
1903 break;
1904 }
1905
1906 case FOURCC("stsc"):
1907 {
1908 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1909 return ERROR_MALFORMED;
1910
1911 status_t err =
1912 mLastTrack->sampleTable->setSampleToChunkParams(
1913 data_offset, chunk_data_size);
1914
1915 *offset += chunk_size;
1916
1917 if (err != OK) {
1918 return err;
1919 }
1920
1921 break;
1922 }
1923
1924 case FOURCC("stsz"):
1925 case FOURCC("stz2"):
1926 {
1927 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1928 return ERROR_MALFORMED;
1929 }
1930
1931 status_t err =
1932 mLastTrack->sampleTable->setSampleSizeParams(
1933 chunk_type, data_offset, chunk_data_size);
1934
1935 *offset += chunk_size;
1936
1937 if (err != OK) {
1938 return err;
1939 }
1940
1941 adjustRawDefaultFrameSize();
1942
1943 size_t max_size;
1944 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1945
1946 if (err != OK) {
1947 return err;
1948 }
1949
1950 if (max_size != 0) {
1951 // Assume that a given buffer only contains at most 10 chunks,
1952 // each chunk originally prefixed with a 2 byte length will
1953 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1954 // and thus will grow by 2 bytes per chunk.
1955 if (max_size > SIZE_MAX - 10 * 2) {
1956 ALOGE("max sample size too big: %zu", max_size);
1957 return ERROR_MALFORMED;
1958 }
1959 AMediaFormat_setInt32(mLastTrack->meta,
1960 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
1961 } else {
1962 // No size was specified. Pick a conservatively large size.
1963 uint32_t width, height;
1964 if (!AMediaFormat_getInt32(mLastTrack->meta,
1965 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
1966 !AMediaFormat_getInt32(mLastTrack->meta,
1967 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
1968 ALOGE("No width or height, assuming worst case 1080p");
1969 width = 1920;
1970 height = 1080;
1971 } else {
1972 // A resolution was specified, check that it's not too big. The values below
1973 // were chosen so that the calculations below don't cause overflows, they're
1974 // not indicating that resolutions up to 32kx32k are actually supported.
1975 if (width > 32768 || height > 32768) {
1976 ALOGE("can't support %u x %u video", width, height);
1977 return ERROR_MALFORMED;
1978 }
1979 }
1980
1981 const char *mime;
1982 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1983 if (!strncmp(mime, "audio/", 6)) {
1984 // for audio, use 128KB
1985 max_size = 1024 * 128;
1986 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1987 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1988 // AVC & HEVC requires compression ratio of at least 2, and uses
1989 // macroblocks
1990 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1991 } else {
1992 // For all other formats there is no minimum compression
1993 // ratio. Use compression ratio of 1.
1994 max_size = width * height * 3 / 2;
1995 }
1996 // HACK: allow 10% overhead
1997 // TODO: read sample size from traf atom for fragmented MPEG4.
1998 max_size += max_size / 10;
1999 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2000 }
2001
2002 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2003 // mimetype) previously obtained, so don't cache them.
2004 const char *mime;
2005 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2006 // Calculate average frame rate.
2007 if (!strncasecmp("video/", mime, 6)) {
2008 size_t nSamples = mLastTrack->sampleTable->countSamples();
2009 if (nSamples == 0) {
2010 int32_t trackId;
2011 if (AMediaFormat_getInt32(mLastTrack->meta,
2012 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2013 for (size_t i = 0; i < mTrex.size(); i++) {
2014 Trex *t = &mTrex.editItemAt(i);
2015 if (t->track_ID == (uint32_t) trackId) {
2016 if (t->default_sample_duration > 0) {
2017 int32_t frameRate =
2018 mLastTrack->timescale / t->default_sample_duration;
2019 AMediaFormat_setInt32(mLastTrack->meta,
2020 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2021 }
2022 break;
2023 }
2024 }
2025 }
2026 } else {
2027 int64_t durationUs;
2028 if (AMediaFormat_getInt64(mLastTrack->meta,
2029 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2030 if (durationUs > 0) {
2031 int32_t frameRate = (nSamples * 1000000LL +
2032 (durationUs >> 1)) / durationUs;
2033 AMediaFormat_setInt32(mLastTrack->meta,
2034 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2035 }
2036 }
2037 ALOGV("setting frame count %zu", nSamples);
2038 AMediaFormat_setInt32(mLastTrack->meta,
2039 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2040 }
2041 }
2042
2043 break;
2044 }
2045
2046 case FOURCC("stts"):
2047 {
2048 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2049 return ERROR_MALFORMED;
2050
2051 *offset += chunk_size;
2052
2053 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2054 char chunk[5];
2055 MakeFourCCString(mPath[depth - 1], chunk);
2056 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2057 break;
2058 }
2059
2060 status_t err =
2061 mLastTrack->sampleTable->setTimeToSampleParams(
2062 data_offset, chunk_data_size);
2063
2064 if (err != OK) {
2065 return err;
2066 }
2067
2068 break;
2069 }
2070
2071 case FOURCC("ctts"):
2072 {
2073 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2074 return ERROR_MALFORMED;
2075
2076 *offset += chunk_size;
2077
2078 status_t err =
2079 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2080 data_offset, chunk_data_size);
2081
2082 if (err != OK) {
2083 return err;
2084 }
2085
2086 break;
2087 }
2088
2089 case FOURCC("stss"):
2090 {
2091 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2092 return ERROR_MALFORMED;
2093
2094 *offset += chunk_size;
2095
2096 status_t err =
2097 mLastTrack->sampleTable->setSyncSampleParams(
2098 data_offset, chunk_data_size);
2099
2100 if (err != OK) {
2101 return err;
2102 }
2103
2104 break;
2105 }
2106
2107 // \xA9xyz
2108 case FOURCC("\251xyz"):
2109 {
2110 *offset += chunk_size;
2111
2112 // Best case the total data length inside "\xA9xyz" box would
2113 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2114 // where "\x00\x05" is the text string length with value = 5,
2115 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2116 // location (string) value with longitude = 0 and latitude = 0.
2117 // Since some devices encountered in the wild omit the trailing
2118 // slash, we'll allow that.
2119 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2120 return ERROR_MALFORMED;
2121 }
2122
2123 uint16_t len;
2124 if (!mDataSource->getUInt16(data_offset, &len)) {
2125 return ERROR_IO;
2126 }
2127
2128 // allow "+0+0" without trailing slash
2129 if (len < 4 || len > chunk_data_size - 4) {
2130 return ERROR_MALFORMED;
2131 }
2132 // The location string following the language code is formatted
2133 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2134 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2135 // and to add a terminating 0.
2136 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2137 if (!buffer) {
2138 return NO_MEMORY;
2139 }
2140
2141 if (mDataSource->readAt(
2142 data_offset + 4, &buffer[0], len) < len) {
2143 return ERROR_IO;
2144 }
2145
2146 len = strlen(&buffer[0]);
2147 if (len < 4) {
2148 return ERROR_MALFORMED;
2149 }
2150 // Add a trailing slash if there wasn't one.
2151 if (buffer[len - 1] != '/') {
2152 buffer[len] = '/';
2153 }
2154 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2155 break;
2156 }
2157
2158 case FOURCC("esds"):
2159 {
2160 *offset += chunk_size;
2161
2162 if (chunk_data_size < 4) {
2163 return ERROR_MALFORMED;
2164 }
2165
2166 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2167 uint8_t *buffer = tmp.get();
2168 if (buffer == NULL) {
2169 return -ENOMEM;
2170 }
2171
2172 if (mDataSource->readAt(
2173 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2174 return ERROR_IO;
2175 }
2176
2177 if (U32_AT(buffer) != 0) {
2178 // Should be version 0, flags 0.
2179 return ERROR_MALFORMED;
2180 }
2181
2182 if (mLastTrack == NULL)
2183 return ERROR_MALFORMED;
2184
2185 AMediaFormat_setBuffer(mLastTrack->meta,
2186 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2187
2188 if (mPath.size() >= 2
2189 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2190 // Information from the ESDS must be relied on for proper
2191 // setup of sample rate and channel count for MPEG4 Audio.
2192 // The generic header appears to only contain generic
2193 // information...
2194
2195 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2196 &buffer[4], chunk_data_size - 4);
2197
2198 if (err != OK) {
2199 return err;
2200 }
2201 }
2202 if (mPath.size() >= 2
2203 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2204 // Check if the video is MPEG2
2205 ESDS esds(&buffer[4], chunk_data_size - 4);
2206
2207 uint8_t objectTypeIndication;
2208 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2209 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2210 AMediaFormat_setString(mLastTrack->meta,
2211 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2212 }
2213 }
2214 }
2215 break;
2216 }
2217
2218 case FOURCC("btrt"):
2219 {
2220 *offset += chunk_size;
2221 if (mLastTrack == NULL) {
2222 return ERROR_MALFORMED;
2223 }
2224
2225 uint8_t buffer[12];
2226 if (chunk_data_size != sizeof(buffer)) {
2227 return ERROR_MALFORMED;
2228 }
2229
2230 if (mDataSource->readAt(
2231 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2232 return ERROR_IO;
2233 }
2234
2235 uint32_t maxBitrate = U32_AT(&buffer[4]);
2236 uint32_t avgBitrate = U32_AT(&buffer[8]);
2237 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2238 AMediaFormat_setInt32(mLastTrack->meta,
2239 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2240 }
2241 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2242 AMediaFormat_setInt32(mLastTrack->meta,
2243 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2244 }
2245 break;
2246 }
2247
2248 case FOURCC("avcC"):
2249 {
2250 *offset += chunk_size;
2251
2252 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2253
2254 if (buffer.get() == NULL) {
2255 ALOGE("b/28471206");
2256 return NO_MEMORY;
2257 }
2258
2259 if (mDataSource->readAt(
2260 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2261 return ERROR_IO;
2262 }
2263
2264 if (mLastTrack == NULL)
2265 return ERROR_MALFORMED;
2266
2267 AMediaFormat_setBuffer(mLastTrack->meta,
2268 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2269
2270 break;
2271 }
2272 case FOURCC("hvcC"):
2273 {
2274 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2275
2276 if (buffer.get() == NULL) {
2277 ALOGE("b/28471206");
2278 return NO_MEMORY;
2279 }
2280
2281 if (mDataSource->readAt(
2282 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2283 return ERROR_IO;
2284 }
2285
2286 if (mLastTrack == NULL)
2287 return ERROR_MALFORMED;
2288
2289 AMediaFormat_setBuffer(mLastTrack->meta,
2290 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2291
2292 *offset += chunk_size;
2293 break;
2294 }
2295 case FOURCC("av1C"):
2296 {
2297 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2298
2299 if (buffer.get() == NULL) {
2300 ALOGE("b/28471206");
2301 return NO_MEMORY;
2302 }
2303
2304 if (mDataSource->readAt(
2305 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2306 return ERROR_IO;
2307 }
2308
2309 if (mLastTrack == NULL)
2310 return ERROR_MALFORMED;
2311
2312 AMediaFormat_setBuffer(mLastTrack->meta,
2313 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2314
2315 *offset += chunk_size;
2316 break;
2317 }
2318 case FOURCC("d263"):
2319 {
2320 *offset += chunk_size;
2321 /*
2322 * d263 contains a fixed 7 bytes part:
2323 * vendor - 4 bytes
2324 * version - 1 byte
2325 * level - 1 byte
2326 * profile - 1 byte
2327 * optionally, "d263" box itself may contain a 16-byte
2328 * bit rate box (bitr)
2329 * average bit rate - 4 bytes
2330 * max bit rate - 4 bytes
2331 */
2332 char buffer[23];
2333 if (chunk_data_size != 7 &&
2334 chunk_data_size != 23) {
2335 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2336 return ERROR_MALFORMED;
2337 }
2338
2339 if (mDataSource->readAt(
2340 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2341 return ERROR_IO;
2342 }
2343
2344 if (mLastTrack == NULL)
2345 return ERROR_MALFORMED;
2346
2347 AMediaFormat_setBuffer(mLastTrack->meta,
2348 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2349
2350 break;
2351 }
2352
2353 case FOURCC("meta"):
2354 {
2355 off64_t stop_offset = *offset + chunk_size;
2356 *offset = data_offset;
2357 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2358 if (!isParsingMetaKeys) {
2359 uint8_t buffer[4];
2360 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2361 *offset = stop_offset;
2362 return ERROR_MALFORMED;
2363 }
2364
2365 if (mDataSource->readAt(
2366 data_offset, buffer, 4) < 4) {
2367 *offset = stop_offset;
2368 return ERROR_IO;
2369 }
2370
2371 if (U32_AT(buffer) != 0) {
2372 // Should be version 0, flags 0.
2373
2374 // If it's not, let's assume this is one of those
2375 // apparently malformed chunks that don't have flags
2376 // and completely different semantics than what's
2377 // in the MPEG4 specs and skip it.
2378 *offset = stop_offset;
2379 return OK;
2380 }
2381 *offset += sizeof(buffer);
2382 }
2383
2384 while (*offset < stop_offset) {
2385 status_t err = parseChunk(offset, depth + 1);
2386 if (err != OK) {
2387 return err;
2388 }
2389 }
2390
2391 if (*offset != stop_offset) {
2392 return ERROR_MALFORMED;
2393 }
2394 break;
2395 }
2396
2397 case FOURCC("iloc"):
2398 case FOURCC("iinf"):
2399 case FOURCC("iprp"):
2400 case FOURCC("pitm"):
2401 case FOURCC("idat"):
2402 case FOURCC("iref"):
2403 case FOURCC("ipro"):
2404 {
2405 if (mIsHeif) {
2406 if (mItemTable == NULL) {
2407 mItemTable = new ItemTable(mDataSource);
2408 }
2409 status_t err = mItemTable->parse(
2410 chunk_type, data_offset, chunk_data_size);
2411 if (err != OK) {
2412 return err;
2413 }
2414 }
2415 *offset += chunk_size;
2416 break;
2417 }
2418
2419 case FOURCC("mean"):
2420 case FOURCC("name"):
2421 case FOURCC("data"):
2422 {
2423 *offset += chunk_size;
2424
2425 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2426 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2427
2428 if (err != OK) {
2429 return err;
2430 }
2431 }
2432
2433 break;
2434 }
2435
2436 case FOURCC("mvhd"):
2437 {
2438 *offset += chunk_size;
2439
2440 if (depth != 1) {
2441 ALOGE("mvhd: depth %d", depth);
2442 return ERROR_MALFORMED;
2443 }
2444 if (chunk_data_size < 32) {
2445 return ERROR_MALFORMED;
2446 }
2447
2448 uint8_t header[32];
2449 if (mDataSource->readAt(
2450 data_offset, header, sizeof(header))
2451 < (ssize_t)sizeof(header)) {
2452 return ERROR_IO;
2453 }
2454
2455 uint64_t creationTime;
2456 uint64_t duration = 0;
2457 if (header[0] == 1) {
2458 creationTime = U64_AT(&header[4]);
2459 mHeaderTimescale = U32_AT(&header[20]);
2460 duration = U64_AT(&header[24]);
2461 if (duration == 0xffffffffffffffff) {
2462 duration = 0;
2463 }
2464 } else if (header[0] != 0) {
2465 return ERROR_MALFORMED;
2466 } else {
2467 creationTime = U32_AT(&header[4]);
2468 mHeaderTimescale = U32_AT(&header[12]);
2469 uint32_t d32 = U32_AT(&header[16]);
2470 if (d32 == 0xffffffff) {
2471 d32 = 0;
2472 }
2473 duration = d32;
2474 }
2475 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2476 AMediaFormat_setInt64(mFileMetaData,
2477 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2478 }
2479
2480 String8 s;
2481 if (convertTimeToDate(creationTime, &s)) {
2482 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2483 }
2484
2485 break;
2486 }
2487
2488 case FOURCC("mehd"):
2489 {
2490 *offset += chunk_size;
2491
2492 if (chunk_data_size < 8) {
2493 return ERROR_MALFORMED;
2494 }
2495
2496 uint8_t flags[4];
2497 if (mDataSource->readAt(
2498 data_offset, flags, sizeof(flags))
2499 < (ssize_t)sizeof(flags)) {
2500 return ERROR_IO;
2501 }
2502
2503 uint64_t duration = 0;
2504 if (flags[0] == 1) {
2505 // 64 bit
2506 if (chunk_data_size < 12) {
2507 return ERROR_MALFORMED;
2508 }
2509 mDataSource->getUInt64(data_offset + 4, &duration);
2510 if (duration == 0xffffffffffffffff) {
2511 duration = 0;
2512 }
2513 } else if (flags[0] == 0) {
2514 // 32 bit
2515 uint32_t d32;
2516 mDataSource->getUInt32(data_offset + 4, &d32);
2517 if (d32 == 0xffffffff) {
2518 d32 = 0;
2519 }
2520 duration = d32;
2521 } else {
2522 return ERROR_MALFORMED;
2523 }
2524
2525 if (duration != 0 && mHeaderTimescale != 0) {
2526 AMediaFormat_setInt64(mFileMetaData,
2527 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2528 }
2529
2530 break;
2531 }
2532
2533 case FOURCC("mdat"):
2534 {
2535 mMdatFound = true;
2536
2537 *offset += chunk_size;
2538 break;
2539 }
2540
2541 case FOURCC("hdlr"):
2542 {
2543 *offset += chunk_size;
2544
2545 if (underQTMetaPath(mPath, 3)) {
2546 break;
2547 }
2548
2549 uint32_t buffer;
2550 if (mDataSource->readAt(
2551 data_offset + 8, &buffer, 4) < 4) {
2552 return ERROR_IO;
2553 }
2554
2555 uint32_t type = ntohl(buffer);
2556 // For the 3GPP file format, the handler-type within the 'hdlr' box
2557 // shall be 'text'. We also want to support 'sbtl' handler type
2558 // for a practical reason as various MPEG4 containers use it.
2559 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2560 if (mLastTrack != NULL) {
2561 AMediaFormat_setString(mLastTrack->meta,
2562 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2563 }
2564 }
2565
2566 break;
2567 }
2568
2569 case FOURCC("keys"):
2570 {
2571 *offset += chunk_size;
2572
2573 if (underQTMetaPath(mPath, 3)) {
2574 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2575 if (err != OK) {
2576 return err;
2577 }
2578 }
2579 break;
2580 }
2581
2582 case FOURCC("trex"):
2583 {
2584 *offset += chunk_size;
2585
2586 if (chunk_data_size < 24) {
2587 return ERROR_IO;
2588 }
2589 Trex trex;
2590 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2591 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2592 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2593 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2594 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2595 return ERROR_IO;
2596 }
2597 mTrex.add(trex);
2598 break;
2599 }
2600
2601 case FOURCC("tx3g"):
2602 {
2603 if (mLastTrack == NULL)
2604 return ERROR_MALFORMED;
2605
2606 // complain about ridiculous chunks
2607 if (chunk_size > kMaxAtomSize) {
2608 return ERROR_MALFORMED;
2609 }
2610
2611 // complain about empty atoms
2612 if (chunk_data_size <= 0) {
2613 ALOGE("b/124330204");
2614 android_errorWriteLog(0x534e4554, "124330204");
2615 return ERROR_MALFORMED;
2616 }
2617
2618 // should fill buffer based on "data_offset" and "chunk_data_size"
2619 // instead of *offset and chunk_size;
2620 // but we've been feeding the extra data to consumers for multiple releases and
2621 // if those apps are compensating for it, we'd break them with such a change
2622 //
2623
2624 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2625 size_t growth = kTx3gGrowth;
2626 if (growth < chunk_size) {
2627 growth = chunk_size;
2628 }
2629 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2630 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2631 ALOGE("b/124330204 - too much space");
2632 android_errorWriteLog(0x534e4554, "124330204");
2633 return ERROR_MALFORMED;
2634 }
2635 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2636 mLastTrack->mTx3gSize + growth);
2637 if (updated == NULL) {
2638 return ERROR_MALFORMED;
2639 }
2640 mLastTrack->mTx3gBuffer = updated;
2641 mLastTrack->mTx3gSize += growth;
2642 }
2643
2644 if ((size_t)(mDataSource->readAt(*offset,
2645 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2646 chunk_size))
2647 < chunk_size) {
2648
2649 // advance read pointer so we don't end up reading this again
2650 *offset += chunk_size;
2651 return ERROR_IO;
2652 }
2653
2654 mLastTrack->mTx3gFilled += chunk_size;
2655 *offset += chunk_size;
2656 break;
2657 }
2658
2659 case FOURCC("covr"):
2660 {
2661 *offset += chunk_size;
2662
2663 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2664 chunk_data_size, data_offset);
2665
2666 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2667 return ERROR_MALFORMED;
2668 }
2669 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2670 if (buffer.get() == NULL) {
2671 ALOGE("b/28471206");
2672 return NO_MEMORY;
2673 }
2674 if (mDataSource->readAt(
2675 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2676 return ERROR_IO;
2677 }
2678 const int kSkipBytesOfDataBox = 16;
2679 if (chunk_data_size <= kSkipBytesOfDataBox) {
2680 return ERROR_MALFORMED;
2681 }
2682
2683 AMediaFormat_setBuffer(mFileMetaData,
2684 AMEDIAFORMAT_KEY_ALBUMART,
2685 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2686
2687 break;
2688 }
2689
2690 case FOURCC("colr"):
2691 {
2692 *offset += chunk_size;
2693 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2694 // ignore otherwise
2695 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
2696 status_t err = parseColorInfo(data_offset, chunk_data_size);
2697 if (err != OK) {
2698 return err;
2699 }
2700 }
2701
2702 break;
2703 }
2704
2705 case FOURCC("titl"):
2706 case FOURCC("perf"):
2707 case FOURCC("auth"):
2708 case FOURCC("gnre"):
2709 case FOURCC("albm"):
2710 case FOURCC("yrrc"):
2711 {
2712 *offset += chunk_size;
2713
2714 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2715
2716 if (err != OK) {
2717 return err;
2718 }
2719
2720 break;
2721 }
2722
2723 case FOURCC("ID32"):
2724 {
2725 *offset += chunk_size;
2726
2727 if (chunk_data_size < 6) {
2728 return ERROR_MALFORMED;
2729 }
2730
2731 parseID3v2MetaData(data_offset + 6);
2732
2733 break;
2734 }
2735
2736 case FOURCC("----"):
2737 {
2738 mLastCommentMean.clear();
2739 mLastCommentName.clear();
2740 mLastCommentData.clear();
2741 *offset += chunk_size;
2742 break;
2743 }
2744
2745 case FOURCC("sidx"):
2746 {
2747 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2748 if (err != OK) {
2749 return err;
2750 }
2751 *offset += chunk_size;
2752 return UNKNOWN_ERROR; // stop parsing after sidx
2753 }
2754
2755 case FOURCC("ac-3"):
2756 {
2757 *offset += chunk_size;
2758 // bypass ac-3 if parse fail
2759 if (parseAC3SpecificBox(data_offset) != OK) {
2760 if (mLastTrack != NULL) {
2761 ALOGW("Fail to parse ac-3");
2762 mLastTrack->skipTrack = true;
2763 }
2764 }
2765 return OK;
2766 }
2767
2768 case FOURCC("ec-3"):
2769 {
2770 *offset += chunk_size;
2771 // bypass ec-3 if parse fail
2772 if (parseEAC3SpecificBox(data_offset) != OK) {
2773 if (mLastTrack != NULL) {
2774 ALOGW("Fail to parse ec-3");
2775 mLastTrack->skipTrack = true;
2776 }
2777 }
2778 return OK;
2779 }
2780
2781 case FOURCC("ac-4"):
2782 {
2783 *offset += chunk_size;
2784 // bypass ac-4 if parse fail
2785 if (parseAC4SpecificBox(data_offset) != OK) {
2786 if (mLastTrack != NULL) {
2787 ALOGW("Fail to parse ac-4");
2788 mLastTrack->skipTrack = true;
2789 }
2790 }
2791 return OK;
2792 }
2793
2794 case FOURCC("ftyp"):
2795 {
2796 if (chunk_data_size < 8 || depth != 0) {
2797 return ERROR_MALFORMED;
2798 }
2799
2800 off64_t stop_offset = *offset + chunk_size;
2801 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2802 std::set<uint32_t> brandSet;
2803 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2804 if (i == 1) {
2805 // Skip this index, it refers to the minorVersion,
2806 // not a brand.
2807 continue;
2808 }
2809
2810 uint32_t brand;
2811 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2812 return ERROR_MALFORMED;
2813 }
2814
2815 brand = ntohl(brand);
2816 brandSet.insert(brand);
2817 }
2818
2819 if (brandSet.count(FOURCC("qt ")) > 0) {
2820 mIsQT = true;
2821 } else {
2822 if (brandSet.count(FOURCC("mif1")) > 0
2823 && brandSet.count(FOURCC("heic")) > 0) {
2824 ALOGV("identified HEIF image");
2825
2826 mIsHeif = true;
2827 brandSet.erase(FOURCC("mif1"));
2828 brandSet.erase(FOURCC("heic"));
2829 }
2830
2831 if (!brandSet.empty()) {
2832 // This means that the file should have moov box.
2833 // It could be any iso files (mp4, heifs, etc.)
2834 mHasMoovBox = true;
2835 if (mIsHeif) {
2836 ALOGV("identified HEIF image with other tracks");
2837 }
2838 }
2839 }
2840
2841 *offset = stop_offset;
2842
2843 break;
2844 }
2845
2846 default:
2847 {
2848 // check if we're parsing 'ilst' for meta keys
2849 // if so, treat type as a number (key-id).
2850 if (underQTMetaPath(mPath, 3)) {
2851 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2852 if (err != OK) {
2853 return err;
2854 }
2855 }
2856
2857 *offset += chunk_size;
2858 break;
2859 }
2860 }
2861
2862 return OK;
2863 }
2864
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)2865 status_t MPEG4Extractor::parseChannelCountSampleRate(
2866 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
2867 // skip 16 bytes:
2868 // + 6-byte reserved,
2869 // + 2-byte data reference index,
2870 // + 8-byte reserved
2871 *offset += 16;
2872 if (!mDataSource->getUInt16(*offset, channelCount)) {
2873 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
2874 return ERROR_MALFORMED;
2875 }
2876 // skip 8 bytes:
2877 // + 2-byte channelCount,
2878 // + 2-byte sample size,
2879 // + 4-byte reserved
2880 *offset += 8;
2881 if (!mDataSource->getUInt16(*offset, sampleRate)) {
2882 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
2883 return ERROR_MALFORMED;
2884 }
2885 // skip 4 bytes:
2886 // + 2-byte sampleRate,
2887 // + 2-byte reserved
2888 *offset += 4;
2889 return OK;
2890 }
2891
parseAC4SpecificBox(off64_t offset)2892 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
2893 if (mLastTrack == NULL) {
2894 return ERROR_MALFORMED;
2895 }
2896
2897 uint16_t sampleRate, channelCount;
2898 status_t status;
2899 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
2900 return status;
2901 }
2902 uint32_t size;
2903 // + 4-byte size
2904 // + 4-byte type
2905 // + 3-byte payload
2906 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
2907 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
2908 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
2909 return ERROR_MALFORMED;
2910 }
2911
2912 // + 4-byte size
2913 offset += 4;
2914 uint32_t type;
2915 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
2916 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
2917 return ERROR_MALFORMED;
2918 }
2919
2920 // + 4-byte type
2921 offset += 4;
2922 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
2923 uint8_t chunk[kAC4SpecificBoxPayloadSize];
2924 ssize_t dsiSize = size - 8; // size of box - size and type fields
2925 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
2926 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
2927 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
2928 return ERROR_MALFORMED;
2929 }
2930 // + size-byte payload
2931 offset += dsiSize;
2932 ABitReader br(chunk, dsiSize);
2933 AC4DSIParser parser(br);
2934 if (!parser.parse()){
2935 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
2936 return ERROR_MALFORMED;
2937 }
2938
2939 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
2940 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
2941 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
2942
2943 AudioPresentationCollection presentations;
2944 // translate the AC4 presentation information to audio presentations for this track
2945 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
2946 if (!ac4Presentations.empty()) {
2947 for (const auto& ac4Presentation : ac4Presentations) {
2948 auto& presentation = ac4Presentation.second;
2949 if (!presentation.mEnabled) {
2950 continue;
2951 }
2952 AudioPresentationV1 ap;
2953 ap.mPresentationId = presentation.mGroupIndex;
2954 ap.mProgramId = presentation.mProgramID;
2955 ap.mLanguage = presentation.mLanguage;
2956 if (presentation.mPreVirtualized) {
2957 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
2958 } else {
2959 switch (presentation.mChannelMode) {
2960 case AC4Parser::AC4Presentation::kChannelMode_Mono:
2961 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
2962 ap.mMasteringIndication = MASTERED_FOR_STEREO;
2963 break;
2964 case AC4Parser::AC4Presentation::kChannelMode_3_0:
2965 case AC4Parser::AC4Presentation::kChannelMode_5_0:
2966 case AC4Parser::AC4Presentation::kChannelMode_5_1:
2967 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
2968 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
2969 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
2970 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
2971 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
2972 break;
2973 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
2974 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
2975 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
2976 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
2977 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
2978 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
2979 case AC4Parser::AC4Presentation::kChannelMode_22_2:
2980 ap.mMasteringIndication = MASTERED_FOR_3D;
2981 break;
2982 default:
2983 ALOGE("Invalid channel mode in AC4 presentation");
2984 return ERROR_MALFORMED;
2985 }
2986 }
2987
2988 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
2989 AC4Parser::AC4Presentation::kVisuallyImpaired);
2990 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
2991 AC4Parser::AC4Presentation::kVoiceOver);
2992 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
2993 if (!ap.mLanguage.empty()) {
2994 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
2995 }
2996 presentations.push_back(std::move(ap));
2997 }
2998 }
2999
3000 if (presentations.empty()) {
3001 // Clear audio presentation info in metadata.
3002 AMediaFormat_setBuffer(
3003 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3004 } else {
3005 std::ostringstream outStream(std::ios::out);
3006 serializeAudioPresentations(presentations, &outStream);
3007 AMediaFormat_setBuffer(
3008 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3009 outStream.str().data(), outStream.str().size());
3010 }
3011 return OK;
3012 }
3013
parseEAC3SpecificBox(off64_t offset)3014 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3015 if (mLastTrack == NULL) {
3016 return ERROR_MALFORMED;
3017 }
3018
3019 uint16_t sampleRate, channels;
3020 status_t status;
3021 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3022 return status;
3023 }
3024 uint32_t size;
3025 // + 4-byte size
3026 // + 4-byte type
3027 // + 3-byte payload
3028 const uint32_t kEAC3SpecificBoxMinSize = 11;
3029 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3030 // calculated from the required bits read below as well as the maximum number of independent
3031 // and dependant sub streams you can have
3032 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3033 if (!mDataSource->getUInt32(offset, &size) ||
3034 size < kEAC3SpecificBoxMinSize ||
3035 size > kEAC3SpecificBoxMaxSize) {
3036 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3037 return ERROR_MALFORMED;
3038 }
3039
3040 offset += 4;
3041 uint32_t type;
3042 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3043 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3044 return ERROR_MALFORMED;
3045 }
3046
3047 offset += 4;
3048 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3049 if (chunk == NULL) {
3050 return ERROR_MALFORMED;
3051 }
3052
3053 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3054 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3055 delete[] chunk;
3056 return ERROR_MALFORMED;
3057 }
3058
3059 ABitReader br(chunk, size);
3060 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3061 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3062
3063 if (br.numBitsLeft() < 16) {
3064 delete[] chunk;
3065 return ERROR_MALFORMED;
3066 }
3067 unsigned data_rate = br.getBits(13);
3068 ALOGV("EAC3 data rate = %d", data_rate);
3069
3070 unsigned num_ind_sub = br.getBits(3) + 1;
3071 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3072 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3073 delete[] chunk;
3074 return ERROR_MALFORMED;
3075 }
3076
3077 unsigned channelCount = 0;
3078 for (unsigned i = 0; i < num_ind_sub; i++) {
3079 unsigned fscod = br.getBits(2);
3080 if (fscod == 3) {
3081 ALOGE("Incorrect fscod (3) in EAC3 header");
3082 delete[] chunk;
3083 return ERROR_MALFORMED;
3084 }
3085 unsigned boxSampleRate = sampleRateTable[fscod];
3086 if (boxSampleRate != sampleRate) {
3087 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3088 boxSampleRate, sampleRate);
3089 delete[] chunk;
3090 return ERROR_MALFORMED;
3091 }
3092
3093 unsigned bsid = br.getBits(5);
3094 if (bsid == 9 || bsid == 10) {
3095 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3096 } else if (bsid > 16) {
3097 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3098 delete[] chunk;
3099 return ERROR_MALFORMED;
3100 }
3101
3102 // skip
3103 br.skipBits(2);
3104 unsigned bsmod = br.getBits(3);
3105 unsigned acmod = br.getBits(3);
3106 unsigned lfeon = br.getBits(1);
3107 // we currently only support the first stream
3108 if (i == 0)
3109 channelCount = channelCountTable[acmod] + lfeon;
3110 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3111
3112 br.skipBits(3);
3113 unsigned num_dep_sub = br.getBits(4);
3114 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3115 if (num_dep_sub != 0) {
3116 if (br.numBitsLeft() < 9) {
3117 delete[] chunk;
3118 return ERROR_MALFORMED;
3119 }
3120 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3121 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3122 unsigned chan_loc = br.getBits(9);
3123 unsigned mask = 1;
3124 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3125 if ((chan_loc & mask) != 0) {
3126 // we currently only support the first stream
3127 if (i == 0) {
3128 channelCount++;
3129 // these are 2 channels in the mask
3130 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3131 channelCount++;
3132 }
3133 }
3134 ALOGV(" %s", chan_loc_tbl[j]);
3135 }
3136 }
3137 } else {
3138 if (br.numBitsLeft() == 0) {
3139 delete[] chunk;
3140 return ERROR_MALFORMED;
3141 }
3142 br.skipBits(1);
3143 }
3144 }
3145
3146 if (br.numBitsLeft() != 0) {
3147 if (br.numBitsLeft() < 8) {
3148 delete[] chunk;
3149 return ERROR_MALFORMED;
3150 }
3151 unsigned mask = br.getBits(8);
3152 for (unsigned i = 0; i < 8; i++) {
3153 if (((0x1 << i) && mask) == 0)
3154 continue;
3155
3156 if (br.numBitsLeft() < 8) {
3157 delete[] chunk;
3158 return ERROR_MALFORMED;
3159 }
3160 switch (i) {
3161 case 0: {
3162 unsigned complexity = br.getBits(8);
3163 ALOGV("Found a JOC stream with complexity = %d", complexity);
3164 }break;
3165 default: {
3166 br.skipBits(8);
3167 }break;
3168 }
3169 }
3170 }
3171 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3172 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3173 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3174
3175 delete[] chunk;
3176 return OK;
3177 }
3178
parseAC3SpecificBox(off64_t offset)3179 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3180 if (mLastTrack == NULL) {
3181 return ERROR_MALFORMED;
3182 }
3183
3184 uint16_t sampleRate, channels;
3185 status_t status;
3186 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3187 return status;
3188 }
3189 uint32_t size;
3190 // + 4-byte size
3191 // + 4-byte type
3192 // + 3-byte payload
3193 const uint32_t kAC3SpecificBoxSize = 11;
3194 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3195 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3196 return ERROR_MALFORMED;
3197 }
3198
3199 offset += 4;
3200 uint32_t type;
3201 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3202 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3203 return ERROR_MALFORMED;
3204 }
3205
3206 offset += 4;
3207 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3208 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3209 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3210 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3211 return ERROR_MALFORMED;
3212 }
3213
3214 ABitReader br(chunk, sizeof(chunk));
3215 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3216 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3217
3218 unsigned fscod = br.getBits(2);
3219 if (fscod == 3) {
3220 ALOGE("Incorrect fscod (3) in AC3 header");
3221 return ERROR_MALFORMED;
3222 }
3223 unsigned boxSampleRate = sampleRateTable[fscod];
3224 if (boxSampleRate != sampleRate) {
3225 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3226 boxSampleRate, sampleRate);
3227 return ERROR_MALFORMED;
3228 }
3229
3230 unsigned bsid = br.getBits(5);
3231 if (bsid > 8) {
3232 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3233 return ERROR_MALFORMED;
3234 }
3235
3236 // skip
3237 unsigned bsmod __unused = br.getBits(3);
3238
3239 unsigned acmod = br.getBits(3);
3240 unsigned lfeon = br.getBits(1);
3241 unsigned channelCount = channelCountTable[acmod] + lfeon;
3242
3243 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3244 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3245 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3246 return OK;
3247 }
3248
parseALACSampleEntry(off64_t * offset)3249 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3250 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3251 // Store ALAC magic cookie (decoder needs it).
3252 uint8_t alacInfo[12];
3253 off64_t data_offset = *offset;
3254
3255 if (mDataSource->readAt(
3256 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3257 return ERROR_IO;
3258 }
3259 uint32_t size = U32_AT(&alacInfo[0]);
3260 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3261 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3262 (U32_AT(&alacInfo[8]) != 0)) {
3263 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3264 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3265 return ERROR_MALFORMED;
3266 }
3267 data_offset += sizeof(alacInfo);
3268 uint8_t cookie[size - sizeof(alacInfo)];
3269 if (mDataSource->readAt(
3270 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3271 return ERROR_IO;
3272 }
3273
3274 uint8_t bitsPerSample = cookie[5];
3275 AMediaFormat_setInt32(mLastTrack->meta,
3276 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3277 AMediaFormat_setInt32(mLastTrack->meta,
3278 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3279 AMediaFormat_setInt32(mLastTrack->meta,
3280 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3281 AMediaFormat_setBuffer(mLastTrack->meta,
3282 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3283 data_offset += sizeof(cookie);
3284 *offset = data_offset;
3285 return OK;
3286 }
3287
parseSegmentIndex(off64_t offset,size_t size)3288 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3289 ALOGV("MPEG4Extractor::parseSegmentIndex");
3290
3291 if (size < 12) {
3292 return -EINVAL;
3293 }
3294
3295 uint32_t flags;
3296 if (!mDataSource->getUInt32(offset, &flags)) {
3297 return ERROR_MALFORMED;
3298 }
3299
3300 uint32_t version = flags >> 24;
3301 flags &= 0xffffff;
3302
3303 ALOGV("sidx version %d", version);
3304
3305 uint32_t referenceId;
3306 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3307 return ERROR_MALFORMED;
3308 }
3309
3310 uint32_t timeScale;
3311 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3312 return ERROR_MALFORMED;
3313 }
3314 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3315 if (timeScale == 0)
3316 return ERROR_MALFORMED;
3317
3318 uint64_t earliestPresentationTime;
3319 uint64_t firstOffset;
3320
3321 offset += 12;
3322 size -= 12;
3323
3324 if (version == 0) {
3325 if (size < 8) {
3326 return -EINVAL;
3327 }
3328 uint32_t tmp;
3329 if (!mDataSource->getUInt32(offset, &tmp)) {
3330 return ERROR_MALFORMED;
3331 }
3332 earliestPresentationTime = tmp;
3333 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3334 return ERROR_MALFORMED;
3335 }
3336 firstOffset = tmp;
3337 offset += 8;
3338 size -= 8;
3339 } else {
3340 if (size < 16) {
3341 return -EINVAL;
3342 }
3343 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3344 return ERROR_MALFORMED;
3345 }
3346 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3347 return ERROR_MALFORMED;
3348 }
3349 offset += 16;
3350 size -= 16;
3351 }
3352 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3353
3354 if (size < 4) {
3355 return -EINVAL;
3356 }
3357
3358 uint16_t referenceCount;
3359 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3360 return ERROR_MALFORMED;
3361 }
3362 offset += 4;
3363 size -= 4;
3364 ALOGV("refcount: %d", referenceCount);
3365
3366 if (size < referenceCount * 12) {
3367 return -EINVAL;
3368 }
3369
3370 uint64_t total_duration = 0;
3371 for (unsigned int i = 0; i < referenceCount; i++) {
3372 uint32_t d1, d2, d3;
3373
3374 if (!mDataSource->getUInt32(offset, &d1) || // size
3375 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3376 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3377 return ERROR_MALFORMED;
3378 }
3379
3380 if (d1 & 0x80000000) {
3381 ALOGW("sub-sidx boxes not supported yet");
3382 }
3383 bool sap = d3 & 0x80000000;
3384 uint32_t saptype = (d3 >> 28) & 7;
3385 if (!sap || (saptype != 1 && saptype != 2)) {
3386 // type 1 and 2 are sync samples
3387 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3388 }
3389 total_duration += d2;
3390 offset += 12;
3391 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3392 SidxEntry se;
3393 se.mSize = d1 & 0x7fffffff;
3394 se.mDurationUs = 1000000LL * d2 / timeScale;
3395 mSidxEntries.add(se);
3396 }
3397
3398 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3399
3400 if (mLastTrack == NULL)
3401 return ERROR_MALFORMED;
3402
3403 int64_t metaDuration;
3404 if (!AMediaFormat_getInt64(mLastTrack->meta,
3405 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3406 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3407 }
3408 return OK;
3409 }
3410
parseQTMetaKey(off64_t offset,size_t size)3411 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3412 if (size < 8) {
3413 return ERROR_MALFORMED;
3414 }
3415
3416 uint32_t count;
3417 if (!mDataSource->getUInt32(offset + 4, &count)) {
3418 return ERROR_MALFORMED;
3419 }
3420
3421 if (mMetaKeyMap.size() > 0) {
3422 ALOGW("'keys' atom seen again, discarding existing entries");
3423 mMetaKeyMap.clear();
3424 }
3425
3426 off64_t keyOffset = offset + 8;
3427 off64_t stopOffset = offset + size;
3428 for (size_t i = 1; i <= count; i++) {
3429 if (keyOffset + 8 > stopOffset) {
3430 return ERROR_MALFORMED;
3431 }
3432
3433 uint32_t keySize;
3434 if (!mDataSource->getUInt32(keyOffset, &keySize)
3435 || keySize < 8
3436 || keyOffset + keySize > stopOffset) {
3437 return ERROR_MALFORMED;
3438 }
3439
3440 uint32_t type;
3441 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3442 || type != FOURCC("mdta")) {
3443 return ERROR_MALFORMED;
3444 }
3445
3446 keySize -= 8;
3447 keyOffset += 8;
3448
3449 auto keyData = heapbuffer<uint8_t>(keySize);
3450 if (keyData.get() == NULL) {
3451 return ERROR_MALFORMED;
3452 }
3453 if (mDataSource->readAt(
3454 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3455 return ERROR_MALFORMED;
3456 }
3457
3458 AString key((const char *)keyData.get(), keySize);
3459 mMetaKeyMap.add(i, key);
3460
3461 keyOffset += keySize;
3462 }
3463 return OK;
3464 }
3465
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3466 status_t MPEG4Extractor::parseQTMetaVal(
3467 int32_t keyId, off64_t offset, size_t size) {
3468 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3469 if (index < 0) {
3470 // corresponding key is not present, ignore
3471 return ERROR_MALFORMED;
3472 }
3473
3474 if (size <= 16) {
3475 return ERROR_MALFORMED;
3476 }
3477 uint32_t dataSize;
3478 if (!mDataSource->getUInt32(offset, &dataSize)
3479 || dataSize > size || dataSize <= 16) {
3480 return ERROR_MALFORMED;
3481 }
3482 uint32_t atomFourCC;
3483 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3484 || atomFourCC != FOURCC("data")) {
3485 return ERROR_MALFORMED;
3486 }
3487 uint32_t dataType;
3488 if (!mDataSource->getUInt32(offset + 8, &dataType)
3489 || ((dataType & 0xff000000) != 0)) {
3490 // not well-known type
3491 return ERROR_MALFORMED;
3492 }
3493
3494 dataSize -= 16;
3495 offset += 16;
3496
3497 if (dataType == 23 && dataSize >= 4) {
3498 // BE Float32
3499 uint32_t val;
3500 if (!mDataSource->getUInt32(offset, &val)) {
3501 return ERROR_MALFORMED;
3502 }
3503 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3504 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3505 }
3506 } else if (dataType == 67 && dataSize >= 4) {
3507 // BE signed int32
3508 uint32_t val;
3509 if (!mDataSource->getUInt32(offset, &val)) {
3510 return ERROR_MALFORMED;
3511 }
3512 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3513 AMediaFormat_setInt32(mFileMetaData,
3514 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3515 }
3516 } else {
3517 // add more keys if needed
3518 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3519 }
3520
3521 return OK;
3522 }
3523
parseTrackHeader(off64_t data_offset,off64_t data_size)3524 status_t MPEG4Extractor::parseTrackHeader(
3525 off64_t data_offset, off64_t data_size) {
3526 if (data_size < 4) {
3527 return ERROR_MALFORMED;
3528 }
3529
3530 uint8_t version;
3531 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3532 return ERROR_IO;
3533 }
3534
3535 size_t dynSize = (version == 1) ? 36 : 24;
3536
3537 uint8_t buffer[36 + 60];
3538
3539 if (data_size != (off64_t)dynSize + 60) {
3540 return ERROR_MALFORMED;
3541 }
3542
3543 if (mDataSource->readAt(
3544 data_offset, buffer, data_size) < (ssize_t)data_size) {
3545 return ERROR_IO;
3546 }
3547
3548 uint64_t ctime __unused, mtime __unused, duration __unused;
3549 int32_t id;
3550
3551 if (version == 1) {
3552 ctime = U64_AT(&buffer[4]);
3553 mtime = U64_AT(&buffer[12]);
3554 id = U32_AT(&buffer[20]);
3555 duration = U64_AT(&buffer[28]);
3556 } else if (version == 0) {
3557 ctime = U32_AT(&buffer[4]);
3558 mtime = U32_AT(&buffer[8]);
3559 id = U32_AT(&buffer[12]);
3560 duration = U32_AT(&buffer[20]);
3561 } else {
3562 return ERROR_UNSUPPORTED;
3563 }
3564
3565 if (mLastTrack == NULL)
3566 return ERROR_MALFORMED;
3567
3568 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3569
3570 size_t matrixOffset = dynSize + 16;
3571 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3572 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3573 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3574 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3575
3576 #if 0
3577 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3578 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3579
3580 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3581 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3582 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3583 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3584 #endif
3585
3586 uint32_t rotationDegrees;
3587
3588 static const int32_t kFixedOne = 0x10000;
3589 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3590 // Identity, no rotation
3591 rotationDegrees = 0;
3592 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3593 rotationDegrees = 90;
3594 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3595 rotationDegrees = 270;
3596 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3597 rotationDegrees = 180;
3598 } else {
3599 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3600 rotationDegrees = 0;
3601 }
3602
3603 if (rotationDegrees != 0) {
3604 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3605 }
3606
3607 // Handle presentation display size, which could be different
3608 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3609 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3610 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3611 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3612 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3613
3614 return OK;
3615 }
3616
parseITunesMetaData(off64_t offset,size_t size)3617 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3618 if (size == 0) {
3619 return OK;
3620 }
3621
3622 if (size < 4 || size == SIZE_MAX) {
3623 return ERROR_MALFORMED;
3624 }
3625
3626 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3627 if (buffer == NULL) {
3628 return ERROR_MALFORMED;
3629 }
3630 if (mDataSource->readAt(
3631 offset, buffer, size) != (ssize_t)size) {
3632 delete[] buffer;
3633 buffer = NULL;
3634
3635 return ERROR_IO;
3636 }
3637
3638 uint32_t flags = U32_AT(buffer);
3639
3640 const char *metadataKey = nullptr;
3641 char chunk[5];
3642 MakeFourCCString(mPath[4], chunk);
3643 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3644 switch ((int32_t)mPath[4]) {
3645 case FOURCC("\251alb"):
3646 {
3647 metadataKey = "album";
3648 break;
3649 }
3650 case FOURCC("\251ART"):
3651 {
3652 metadataKey = "artist";
3653 break;
3654 }
3655 case FOURCC("aART"):
3656 {
3657 metadataKey = "albumartist";
3658 break;
3659 }
3660 case FOURCC("\251day"):
3661 {
3662 metadataKey = "year";
3663 break;
3664 }
3665 case FOURCC("\251nam"):
3666 {
3667 metadataKey = "title";
3668 break;
3669 }
3670 case FOURCC("\251wrt"):
3671 {
3672 metadataKey = "writer";
3673 break;
3674 }
3675 case FOURCC("covr"):
3676 {
3677 metadataKey = "albumart";
3678 break;
3679 }
3680 case FOURCC("gnre"):
3681 case FOURCC("\251gen"):
3682 {
3683 metadataKey = "genre";
3684 break;
3685 }
3686 case FOURCC("cpil"):
3687 {
3688 if (size == 9 && flags == 21) {
3689 char tmp[16];
3690 sprintf(tmp, "%d",
3691 (int)buffer[size - 1]);
3692
3693 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
3694 }
3695 break;
3696 }
3697 case FOURCC("trkn"):
3698 {
3699 if (size == 16 && flags == 0) {
3700 char tmp[16];
3701 uint16_t* pTrack = (uint16_t*)&buffer[10];
3702 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3703 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3704
3705 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3706 }
3707 break;
3708 }
3709 case FOURCC("disk"):
3710 {
3711 if ((size == 14 || size == 16) && flags == 0) {
3712 char tmp[16];
3713 uint16_t* pDisc = (uint16_t*)&buffer[10];
3714 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3715 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3716
3717 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
3718 }
3719 break;
3720 }
3721 case FOURCC("----"):
3722 {
3723 buffer[size] = '\0';
3724 switch (mPath[5]) {
3725 case FOURCC("mean"):
3726 mLastCommentMean.setTo((const char *)buffer + 4);
3727 break;
3728 case FOURCC("name"):
3729 mLastCommentName.setTo((const char *)buffer + 4);
3730 break;
3731 case FOURCC("data"):
3732 if (size < 8) {
3733 delete[] buffer;
3734 buffer = NULL;
3735 ALOGE("b/24346430");
3736 return ERROR_MALFORMED;
3737 }
3738 mLastCommentData.setTo((const char *)buffer + 8);
3739 break;
3740 }
3741
3742 // Once we have a set of mean/name/data info, go ahead and process
3743 // it to see if its something we are interested in. Whether or not
3744 // were are interested in the specific tag, make sure to clear out
3745 // the set so we can be ready to process another tuple should one
3746 // show up later in the file.
3747 if ((mLastCommentMean.length() != 0) &&
3748 (mLastCommentName.length() != 0) &&
3749 (mLastCommentData.length() != 0)) {
3750
3751 if (mLastCommentMean == "com.apple.iTunes"
3752 && mLastCommentName == "iTunSMPB") {
3753 int32_t delay, padding;
3754 if (sscanf(mLastCommentData,
3755 " %*x %x %x %*x", &delay, &padding) == 2) {
3756 if (mLastTrack == NULL) {
3757 delete[] buffer;
3758 return ERROR_MALFORMED;
3759 }
3760
3761 AMediaFormat_setInt32(mLastTrack->meta,
3762 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
3763 AMediaFormat_setInt32(mLastTrack->meta,
3764 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
3765 }
3766 }
3767
3768 mLastCommentMean.clear();
3769 mLastCommentName.clear();
3770 mLastCommentData.clear();
3771 }
3772 break;
3773 }
3774
3775 default:
3776 break;
3777 }
3778
3779 void *tmpData;
3780 size_t tmpDataSize;
3781 const char *s;
3782 if (size >= 8 && metadataKey &&
3783 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
3784 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
3785 if (!strcmp(metadataKey, "albumart")) {
3786 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
3787 buffer + 8, size - 8);
3788 } else if (!strcmp(metadataKey, "genre")) {
3789 if (flags == 0) {
3790 // uint8_t genre code, iTunes genre codes are
3791 // the standard id3 codes, except they start
3792 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3793 // We use standard id3 numbering, so subtract 1.
3794 int genrecode = (int)buffer[size - 1];
3795 genrecode--;
3796 if (genrecode < 0) {
3797 genrecode = 255; // reserved for 'unknown genre'
3798 }
3799 char genre[10];
3800 sprintf(genre, "%d", genrecode);
3801
3802 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
3803 } else if (flags == 1) {
3804 // custom genre string
3805 buffer[size] = '\0';
3806
3807 AMediaFormat_setString(mFileMetaData,
3808 metadataKey, (const char *)buffer + 8);
3809 }
3810 } else {
3811 buffer[size] = '\0';
3812
3813 AMediaFormat_setString(mFileMetaData,
3814 metadataKey, (const char *)buffer + 8);
3815 }
3816 }
3817
3818 delete[] buffer;
3819 buffer = NULL;
3820
3821 return OK;
3822 }
3823
parseColorInfo(off64_t offset,size_t size)3824 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3825 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3826 return ERROR_MALFORMED;
3827 }
3828
3829 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3830 if (buffer == NULL) {
3831 return ERROR_MALFORMED;
3832 }
3833 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3834 delete[] buffer;
3835 buffer = NULL;
3836
3837 return ERROR_IO;
3838 }
3839
3840 int32_t type = U32_AT(&buffer[0]);
3841 if ((type == FOURCC("nclx") && size >= 11)
3842 || (type == FOURCC("nclc") && size >= 10)) {
3843 // only store the first color specification
3844 int32_t existingColor;
3845 if (!AMediaFormat_getInt32(mLastTrack->meta,
3846 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
3847 int32_t primaries = U16_AT(&buffer[4]);
3848 int32_t isotransfer = U16_AT(&buffer[6]);
3849 int32_t coeffs = U16_AT(&buffer[8]);
3850 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
3851
3852 int32_t range = 0;
3853 int32_t standard = 0;
3854 int32_t transfer = 0;
3855 ColorUtils::convertIsoColorAspectsToPlatformAspects(
3856 primaries, isotransfer, coeffs, fullRange,
3857 &range, &standard, &transfer);
3858
3859 if (range != 0) {
3860 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
3861 }
3862 if (standard != 0) {
3863 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
3864 }
3865 if (transfer != 0) {
3866 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
3867 }
3868 }
3869 }
3870
3871 delete[] buffer;
3872 buffer = NULL;
3873
3874 return OK;
3875 }
3876
parse3GPPMetaData(off64_t offset,size_t size,int depth)3877 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3878 if (size < 4 || size == SIZE_MAX) {
3879 return ERROR_MALFORMED;
3880 }
3881
3882 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3883 if (buffer == NULL) {
3884 return ERROR_MALFORMED;
3885 }
3886 if (mDataSource->readAt(
3887 offset, buffer, size) != (ssize_t)size) {
3888 delete[] buffer;
3889 buffer = NULL;
3890
3891 return ERROR_IO;
3892 }
3893
3894 const char *metadataKey = nullptr;
3895 switch (mPath[depth]) {
3896 case FOURCC("titl"):
3897 {
3898 metadataKey = "title";
3899 break;
3900 }
3901 case FOURCC("perf"):
3902 {
3903 metadataKey = "artist";
3904 break;
3905 }
3906 case FOURCC("auth"):
3907 {
3908 metadataKey = "writer";
3909 break;
3910 }
3911 case FOURCC("gnre"):
3912 {
3913 metadataKey = "genre";
3914 break;
3915 }
3916 case FOURCC("albm"):
3917 {
3918 if (buffer[size - 1] != '\0') {
3919 char tmp[4];
3920 sprintf(tmp, "%u", buffer[size - 1]);
3921
3922 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
3923 }
3924
3925 metadataKey = "album";
3926 break;
3927 }
3928 case FOURCC("yrrc"):
3929 {
3930 if (size < 6) {
3931 delete[] buffer;
3932 buffer = NULL;
3933 ALOGE("b/62133227");
3934 android_errorWriteLog(0x534e4554, "62133227");
3935 return ERROR_MALFORMED;
3936 }
3937 char tmp[5];
3938 uint16_t year = U16_AT(&buffer[4]);
3939
3940 if (year < 10000) {
3941 sprintf(tmp, "%u", year);
3942
3943 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
3944 }
3945 break;
3946 }
3947
3948 default:
3949 break;
3950 }
3951
3952 if (metadataKey) {
3953 bool isUTF8 = true; // Common case
3954 char16_t *framedata = NULL;
3955 int len16 = 0; // Number of UTF-16 characters
3956
3957 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3958 if (size < 6) {
3959 delete[] buffer;
3960 buffer = NULL;
3961 return ERROR_MALFORMED;
3962 }
3963
3964 if (size - 6 >= 4) {
3965 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3966 framedata = (char16_t *)(buffer + 6);
3967 if (0xfffe == *framedata) {
3968 // endianness marker (BOM) doesn't match host endianness
3969 for (int i = 0; i < len16; i++) {
3970 framedata[i] = bswap_16(framedata[i]);
3971 }
3972 // BOM is now swapped to 0xfeff, we will execute next block too
3973 }
3974
3975 if (0xfeff == *framedata) {
3976 // Remove the BOM
3977 framedata++;
3978 len16--;
3979 isUTF8 = false;
3980 }
3981 // else normal non-zero-length UTF-8 string
3982 // we can't handle UTF-16 without BOM as there is no other
3983 // indication of encoding.
3984 }
3985
3986 if (isUTF8) {
3987 buffer[size] = 0;
3988 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
3989 } else {
3990 // Convert from UTF-16 string to UTF-8 string.
3991 String8 tmpUTF8str(framedata, len16);
3992 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
3993 }
3994 }
3995
3996 delete[] buffer;
3997 buffer = NULL;
3998
3999 return OK;
4000 }
4001
parseID3v2MetaData(off64_t offset)4002 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
4003 ID3 id3(mDataSource, true /* ignorev1 */, offset);
4004
4005 if (id3.isValid()) {
4006 struct Map {
4007 const char *key;
4008 const char *tag1;
4009 const char *tag2;
4010 };
4011 static const Map kMap[] = {
4012 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4013 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4014 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4015 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4016 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4017 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4018 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4019 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4020 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4021 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4022 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4023 };
4024 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4025
4026 for (size_t i = 0; i < kNumMapEntries; ++i) {
4027 const char *ss;
4028 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4029 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4030 if (it->done()) {
4031 delete it;
4032 it = new ID3::Iterator(id3, kMap[i].tag2);
4033 }
4034
4035 if (it->done()) {
4036 delete it;
4037 continue;
4038 }
4039
4040 String8 s;
4041 it->getString(&s);
4042 delete it;
4043
4044 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4045 }
4046 }
4047
4048 size_t dataSize;
4049 String8 mime;
4050 const void *data = id3.getAlbumArt(&dataSize, &mime);
4051
4052 if (data) {
4053 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4054 }
4055 }
4056 }
4057
getTrack(size_t index)4058 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4059 status_t err;
4060 if ((err = readMetaData()) != OK) {
4061 return NULL;
4062 }
4063
4064 Track *track = mFirstTrack;
4065 while (index > 0) {
4066 if (track == NULL) {
4067 return NULL;
4068 }
4069
4070 track = track->next;
4071 --index;
4072 }
4073
4074 if (track == NULL) {
4075 return NULL;
4076 }
4077
4078
4079 Trex *trex = NULL;
4080 int32_t trackId;
4081 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4082 for (size_t i = 0; i < mTrex.size(); i++) {
4083 Trex *t = &mTrex.editItemAt(i);
4084 if (t->track_ID == (uint32_t) trackId) {
4085 trex = t;
4086 break;
4087 }
4088 }
4089 } else {
4090 ALOGE("b/21657957");
4091 return NULL;
4092 }
4093
4094 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4095
4096 const char *mime;
4097 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4098 return NULL;
4099 }
4100
4101 sp<ItemTable> itemTable;
4102 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4103 void *data;
4104 size_t size;
4105 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4106 return NULL;
4107 }
4108
4109 const uint8_t *ptr = (const uint8_t *)data;
4110
4111 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4112 return NULL;
4113 }
4114 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4115 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4116 void *data;
4117 size_t size;
4118 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4119 return NULL;
4120 }
4121
4122 const uint8_t *ptr = (const uint8_t *)data;
4123
4124 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4125 return NULL;
4126 }
4127 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4128 itemTable = mItemTable;
4129 }
4130 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4131 void *data;
4132 size_t size;
4133 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4134 return NULL;
4135 }
4136
4137 const uint8_t *ptr = (const uint8_t *)data;
4138
4139 if (size < 5 || ptr[0] != 0x81) { // configurationVersion == 1
4140 return NULL;
4141 }
4142 }
4143
4144 if (track->has_elst and !strncasecmp("video/", mime, 6) and track->elst_media_time > 0) {
4145 track->elstShiftStartTicks = track->elst_media_time;
4146 ALOGV("video track->elstShiftStartTicks :%" PRIu64, track->elstShiftStartTicks);
4147 }
4148
4149 MPEG4Source *source = new MPEG4Source(
4150 track->meta, mDataSource, track->timescale, track->sampleTable,
4151 mSidxEntries, trex, mMoofOffset, itemTable,
4152 track->elstShiftStartTicks);
4153 if (source->init() != OK) {
4154 delete source;
4155 return NULL;
4156 }
4157 return source;
4158 }
4159
4160 // static
verifyTrack(Track * track)4161 status_t MPEG4Extractor::verifyTrack(Track *track) {
4162 const char *mime;
4163 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4164
4165 void *data;
4166 size_t size;
4167 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4168 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4169 return ERROR_MALFORMED;
4170 }
4171 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4172 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4173 return ERROR_MALFORMED;
4174 }
4175 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4176 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4177 return ERROR_MALFORMED;
4178 }
4179 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4180 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4181 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4182 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4183 return ERROR_MALFORMED;
4184 }
4185 }
4186
4187 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4188 // Make sure we have all the metadata we need.
4189 ALOGE("stbl atom missing/invalid.");
4190 return ERROR_MALFORMED;
4191 }
4192
4193 if (track->timescale == 0) {
4194 ALOGE("timescale invalid.");
4195 return ERROR_MALFORMED;
4196 }
4197
4198 return OK;
4199 }
4200
4201 typedef enum {
4202 //AOT_NONE = -1,
4203 //AOT_NULL_OBJECT = 0,
4204 //AOT_AAC_MAIN = 1, /**< Main profile */
4205 AOT_AAC_LC = 2, /**< Low Complexity object */
4206 //AOT_AAC_SSR = 3,
4207 //AOT_AAC_LTP = 4,
4208 AOT_SBR = 5,
4209 //AOT_AAC_SCAL = 6,
4210 //AOT_TWIN_VQ = 7,
4211 //AOT_CELP = 8,
4212 //AOT_HVXC = 9,
4213 //AOT_RSVD_10 = 10, /**< (reserved) */
4214 //AOT_RSVD_11 = 11, /**< (reserved) */
4215 //AOT_TTSI = 12, /**< TTSI Object */
4216 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4217 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4218 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4219 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4220 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4221 //AOT_RSVD_18 = 18, /**< (reserved) */
4222 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4223 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4224 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4225 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4226 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4227 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4228 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4229 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4230 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4231 //AOT_RSVD_28 = 28, /**< might become SSC */
4232 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4233 //AOT_MPEGS = 30, /**< MPEG Surround */
4234
4235 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4236
4237 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4238 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4239 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4240 //AOT_RSVD_35 = 35, /**< might become DST */
4241 //AOT_RSVD_36 = 36, /**< might become ALS */
4242 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4243 //AOT_SLS = 38, /**< SLS */
4244 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4245
4246 //AOT_USAC = 42, /**< USAC */
4247 //AOT_SAOC = 43, /**< SAOC */
4248 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4249
4250 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4251 } AUDIO_OBJECT_TYPE;
4252
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4253 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4254 const void *esds_data, size_t esds_size) {
4255 ESDS esds(esds_data, esds_size);
4256
4257 uint8_t objectTypeIndication;
4258 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4259 return ERROR_MALFORMED;
4260 }
4261
4262 if (objectTypeIndication == 0xe1) {
4263 // This isn't MPEG4 audio at all, it's QCELP 14k...
4264 if (mLastTrack == NULL)
4265 return ERROR_MALFORMED;
4266
4267 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4268 return OK;
4269 }
4270
4271 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4272 // mp3 audio
4273 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4274 return OK;
4275 }
4276
4277 if (mLastTrack != NULL) {
4278 uint32_t maxBitrate = 0;
4279 uint32_t avgBitrate = 0;
4280 esds.getBitRate(&maxBitrate, &avgBitrate);
4281 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4282 AMediaFormat_setInt32(mLastTrack->meta,
4283 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4284 }
4285 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4286 AMediaFormat_setInt32(mLastTrack->meta,
4287 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4288 }
4289 }
4290
4291 const uint8_t *csd;
4292 size_t csd_size;
4293 if (esds.getCodecSpecificInfo(
4294 (const void **)&csd, &csd_size) != OK) {
4295 return ERROR_MALFORMED;
4296 }
4297
4298 if (kUseHexDump) {
4299 printf("ESD of size %zu\n", csd_size);
4300 hexdump(csd, csd_size);
4301 }
4302
4303 if (csd_size == 0) {
4304 // There's no further information, i.e. no codec specific data
4305 // Let's assume that the information provided in the mpeg4 headers
4306 // is accurate and hope for the best.
4307
4308 return OK;
4309 }
4310
4311 if (csd_size < 2) {
4312 return ERROR_MALFORMED;
4313 }
4314
4315 if (objectTypeIndication == 0xdd) {
4316 // vorbis audio
4317 if (csd[0] != 0x02) {
4318 return ERROR_MALFORMED;
4319 }
4320
4321 // codecInfo starts with two lengths, len1 and len2, that are
4322 // "Xiph-style-lacing encoded"..
4323
4324 size_t offset = 1;
4325 size_t len1 = 0;
4326 while (offset < csd_size && csd[offset] == 0xff) {
4327 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4328 return ERROR_MALFORMED;
4329 }
4330 ++offset;
4331 }
4332 if (offset >= csd_size) {
4333 return ERROR_MALFORMED;
4334 }
4335 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4336 return ERROR_MALFORMED;
4337 }
4338 ++offset;
4339 if (len1 == 0) {
4340 return ERROR_MALFORMED;
4341 }
4342
4343 size_t len2 = 0;
4344 while (offset < csd_size && csd[offset] == 0xff) {
4345 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4346 return ERROR_MALFORMED;
4347 }
4348 ++offset;
4349 }
4350 if (offset >= csd_size) {
4351 return ERROR_MALFORMED;
4352 }
4353 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4354 return ERROR_MALFORMED;
4355 }
4356 ++offset;
4357 if (len2 == 0) {
4358 return ERROR_MALFORMED;
4359 }
4360 if (offset >= csd_size || csd[offset] != 0x01) {
4361 return ERROR_MALFORMED;
4362 }
4363 // formerly kKeyVorbisInfo
4364 AMediaFormat_setBuffer(mLastTrack->meta,
4365 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4366
4367 if (__builtin_add_overflow(offset, len1, &offset) ||
4368 offset >= csd_size || csd[offset] != 0x03) {
4369 return ERROR_MALFORMED;
4370 }
4371
4372 if (__builtin_add_overflow(offset, len2, &offset) ||
4373 offset >= csd_size || csd[offset] != 0x05) {
4374 return ERROR_MALFORMED;
4375 }
4376
4377 // formerly kKeyVorbisBooks
4378 AMediaFormat_setBuffer(mLastTrack->meta,
4379 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4380 AMediaFormat_setString(mLastTrack->meta,
4381 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4382
4383 return OK;
4384 }
4385
4386 static uint32_t kSamplingRate[] = {
4387 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4388 16000, 12000, 11025, 8000, 7350
4389 };
4390
4391 ABitReader br(csd, csd_size);
4392 uint32_t objectType = br.getBits(5);
4393
4394 if (objectType == 31) { // AAC-ELD => additional 6 bits
4395 objectType = 32 + br.getBits(6);
4396 }
4397
4398 if (mLastTrack == NULL)
4399 return ERROR_MALFORMED;
4400
4401 //keep AOT type
4402 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4403
4404 uint32_t freqIndex = br.getBits(4);
4405
4406 int32_t sampleRate = 0;
4407 int32_t numChannels = 0;
4408 if (freqIndex == 15) {
4409 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4410 sampleRate = br.getBits(24);
4411 numChannels = br.getBits(4);
4412 } else {
4413 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4414 numChannels = br.getBits(4);
4415
4416 if (freqIndex == 13 || freqIndex == 14) {
4417 return ERROR_MALFORMED;
4418 }
4419
4420 sampleRate = kSamplingRate[freqIndex];
4421 }
4422
4423 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4424 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4425 uint32_t extFreqIndex = br.getBits(4);
4426 int32_t extSampleRate __unused;
4427 if (extFreqIndex == 15) {
4428 if (csd_size < 8) {
4429 return ERROR_MALFORMED;
4430 }
4431 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4432 extSampleRate = br.getBits(24);
4433 } else {
4434 if (extFreqIndex == 13 || extFreqIndex == 14) {
4435 return ERROR_MALFORMED;
4436 }
4437 extSampleRate = kSamplingRate[extFreqIndex];
4438 }
4439 //TODO: save the extension sampling rate value in meta data =>
4440 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4441 }
4442
4443 switch (numChannels) {
4444 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4445 case 0:
4446 case 1:// FC
4447 case 2:// FL FR
4448 case 3:// FC, FL FR
4449 case 4:// FC, FL FR, RC
4450 case 5:// FC, FL FR, SL SR
4451 case 6:// FC, FL FR, SL SR, LFE
4452 //numChannels already contains the right value
4453 break;
4454 case 11:// FC, FL FR, SL SR, RC, LFE
4455 numChannels = 7;
4456 break;
4457 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4458 case 12:// FC, FL FR, SL SR, RL RR, LFE
4459 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4460 numChannels = 8;
4461 break;
4462 default:
4463 return ERROR_UNSUPPORTED;
4464 }
4465
4466 {
4467 if (objectType == AOT_SBR || objectType == AOT_PS) {
4468 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4469 objectType = br.getBits(5);
4470
4471 if (objectType == AOT_ESCAPE) {
4472 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4473 objectType = 32 + br.getBits(6);
4474 }
4475 }
4476 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4477 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4478 objectType == AOT_ER_BSAC) {
4479 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4480 const int32_t frameLengthFlag __unused = br.getBits(1);
4481
4482 const int32_t dependsOnCoreCoder = br.getBits(1);
4483
4484 if (dependsOnCoreCoder ) {
4485 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4486 const int32_t coreCoderDelay __unused = br.getBits(14);
4487 }
4488
4489 int32_t extensionFlag = -1;
4490 if (br.numBitsLeft() > 0) {
4491 extensionFlag = br.getBits(1);
4492 } else {
4493 switch (objectType) {
4494 // 14496-3 4.5.1.1 extensionFlag
4495 case AOT_AAC_LC:
4496 extensionFlag = 0;
4497 break;
4498 case AOT_ER_AAC_LC:
4499 case AOT_ER_AAC_SCAL:
4500 case AOT_ER_BSAC:
4501 case AOT_ER_AAC_LD:
4502 extensionFlag = 1;
4503 break;
4504 default:
4505 return ERROR_MALFORMED;
4506 break;
4507 }
4508 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4509 extensionFlag, objectType);
4510 }
4511
4512 if (numChannels == 0) {
4513 int32_t channelsEffectiveNum = 0;
4514 int32_t channelsNum = 0;
4515 if (br.numBitsLeft() < 32) {
4516 return ERROR_MALFORMED;
4517 }
4518 const int32_t ElementInstanceTag __unused = br.getBits(4);
4519 const int32_t Profile __unused = br.getBits(2);
4520 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
4521 const int32_t NumFrontChannelElements = br.getBits(4);
4522 const int32_t NumSideChannelElements = br.getBits(4);
4523 const int32_t NumBackChannelElements = br.getBits(4);
4524 const int32_t NumLfeChannelElements = br.getBits(2);
4525 const int32_t NumAssocDataElements __unused = br.getBits(3);
4526 const int32_t NumValidCcElements __unused = br.getBits(4);
4527
4528 const int32_t MonoMixdownPresent = br.getBits(1);
4529
4530 if (MonoMixdownPresent != 0) {
4531 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4532 const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
4533 }
4534
4535 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4536 const int32_t StereoMixdownPresent = br.getBits(1);
4537 if (StereoMixdownPresent != 0) {
4538 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4539 const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
4540 }
4541
4542 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4543 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4544 if (MatrixMixdownIndexPresent != 0) {
4545 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4546 const int32_t MatrixMixdownIndex __unused = br.getBits(2);
4547 const int32_t PseudoSurroundEnable __unused = br.getBits(1);
4548 }
4549
4550 int i;
4551 for (i=0; i < NumFrontChannelElements; i++) {
4552 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4553 const int32_t FrontElementIsCpe = br.getBits(1);
4554 const int32_t FrontElementTagSelect __unused = br.getBits(4);
4555 channelsNum += FrontElementIsCpe ? 2 : 1;
4556 }
4557
4558 for (i=0; i < NumSideChannelElements; i++) {
4559 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4560 const int32_t SideElementIsCpe = br.getBits(1);
4561 const int32_t SideElementTagSelect __unused = br.getBits(4);
4562 channelsNum += SideElementIsCpe ? 2 : 1;
4563 }
4564
4565 for (i=0; i < NumBackChannelElements; i++) {
4566 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4567 const int32_t BackElementIsCpe = br.getBits(1);
4568 const int32_t BackElementTagSelect __unused = br.getBits(4);
4569 channelsNum += BackElementIsCpe ? 2 : 1;
4570 }
4571 channelsEffectiveNum = channelsNum;
4572
4573 for (i=0; i < NumLfeChannelElements; i++) {
4574 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4575 const int32_t LfeElementTagSelect __unused = br.getBits(4);
4576 channelsNum += 1;
4577 }
4578 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4579 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4580 numChannels = channelsNum;
4581 }
4582 }
4583 }
4584
4585 if (numChannels == 0) {
4586 return ERROR_UNSUPPORTED;
4587 }
4588
4589 if (mLastTrack == NULL)
4590 return ERROR_MALFORMED;
4591
4592 int32_t prevSampleRate;
4593 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
4594
4595 if (prevSampleRate != sampleRate) {
4596 ALOGV("mpeg4 audio sample rate different from previous setting. "
4597 "was: %d, now: %d", prevSampleRate, sampleRate);
4598 }
4599
4600 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
4601
4602 int32_t prevChannelCount;
4603 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
4604 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
4605
4606 if (prevChannelCount != numChannels) {
4607 ALOGV("mpeg4 audio channel count different from previous setting. "
4608 "was: %d, now: %d", prevChannelCount, numChannels);
4609 }
4610
4611 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
4612
4613 return OK;
4614 }
4615
adjustRawDefaultFrameSize()4616 void MPEG4Extractor::adjustRawDefaultFrameSize() {
4617 int32_t chanCount = 0;
4618 int32_t bitWidth = 0;
4619 const char *mimeStr = NULL;
4620
4621 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
4622 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
4623 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
4624 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
4625 // samplesize in stsz may not right , so updade default samplesize
4626 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
4627 }
4628 }
4629
4630 ////////////////////////////////////////////////////////////////////////////////
4631
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks)4632 MPEG4Source::MPEG4Source(
4633 AMediaFormat *format,
4634 DataSourceHelper *dataSource,
4635 int32_t timeScale,
4636 const sp<SampleTable> &sampleTable,
4637 Vector<SidxEntry> &sidx,
4638 const Trex *trex,
4639 off64_t firstMoofOffset,
4640 const sp<ItemTable> &itemTable,
4641 uint64_t elstShiftStartTicks)
4642 : mFormat(format),
4643 mDataSource(dataSource),
4644 mTimescale(timeScale),
4645 mSampleTable(sampleTable),
4646 mCurrentSampleIndex(0),
4647 mCurrentFragmentIndex(0),
4648 mSegments(sidx),
4649 mTrex(trex),
4650 mFirstMoofOffset(firstMoofOffset),
4651 mCurrentMoofOffset(firstMoofOffset),
4652 mNextMoofOffset(-1),
4653 mCurrentTime(0),
4654 mDefaultEncryptedByteBlock(0),
4655 mDefaultSkipByteBlock(0),
4656 mCurrentSampleInfoAllocSize(0),
4657 mCurrentSampleInfoSizes(NULL),
4658 mCurrentSampleInfoOffsetsAllocSize(0),
4659 mCurrentSampleInfoOffsets(NULL),
4660 mIsAVC(false),
4661 mIsHEVC(false),
4662 mIsAC4(false),
4663 mIsPcm(false),
4664 mNALLengthSize(0),
4665 mStarted(false),
4666 mBuffer(NULL),
4667 mSrcBuffer(NULL),
4668 mIsHeif(itemTable != NULL),
4669 mItemTable(itemTable),
4670 mElstShiftStartTicks(elstShiftStartTicks) {
4671
4672 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
4673
4674 AMediaFormat_getInt32(mFormat,
4675 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
4676 mDefaultIVSize = 0;
4677 AMediaFormat_getInt32(mFormat,
4678 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
4679 void *key;
4680 size_t keysize;
4681 if (AMediaFormat_getBuffer(mFormat,
4682 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
4683 CHECK(keysize <= 16);
4684 memset(mCryptoKey, 0, 16);
4685 memcpy(mCryptoKey, key, keysize);
4686 }
4687
4688 AMediaFormat_getInt32(mFormat,
4689 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
4690 AMediaFormat_getInt32(mFormat,
4691 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
4692
4693 const char *mime;
4694 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
4695 CHECK(success);
4696
4697 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
4698 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
4699 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
4700 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
4701
4702 if (mIsAVC) {
4703 void *data;
4704 size_t size;
4705 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
4706
4707 const uint8_t *ptr = (const uint8_t *)data;
4708
4709 CHECK(size >= 7);
4710 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4711
4712 // The number of bytes used to encode the length of a NAL unit.
4713 mNALLengthSize = 1 + (ptr[4] & 3);
4714 } else if (mIsHEVC) {
4715 void *data;
4716 size_t size;
4717 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
4718
4719 const uint8_t *ptr = (const uint8_t *)data;
4720
4721 CHECK(size >= 22);
4722 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
4723
4724 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
4725 }
4726
4727 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
4728 mIsAudio = !strncasecmp(mime, "audio/", 6);
4729
4730 if (mIsPcm) {
4731 int32_t numChannels = 0;
4732 int32_t bitsPerSample = 0;
4733 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
4734 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
4735
4736 int32_t bytesPerSample = bitsPerSample >> 3;
4737 int32_t pcmSampleSize = bytesPerSample * numChannels;
4738
4739 size_t maxSampleSize;
4740 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
4741 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
4742 || bitsPerSample != 16) {
4743 // Not supported
4744 mIsPcm = false;
4745 } else {
4746 AMediaFormat_setInt32(mFormat,
4747 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
4748 }
4749 }
4750
4751 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
4752
4753 }
4754
init()4755 status_t MPEG4Source::init() {
4756 status_t err = OK;
4757 const char *mime;
4758 CHECK(AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime));
4759 if (mFirstMoofOffset != 0) {
4760 off64_t offset = mFirstMoofOffset;
4761 err = parseChunk(&offset);
4762 if(err == OK && !strncasecmp("video/", mime, 6)
4763 && !mCurrentSamples.isEmpty()) {
4764 // Start offset should be less or equal to composition time of first sample.
4765 // ISO : sample_composition_time_offset, version 0 (unsigned) for major brands.
4766 mElstShiftStartTicks = std::min(mElstShiftStartTicks,
4767 (uint64_t)(*mCurrentSamples.begin()).compositionOffset);
4768 }
4769 return err;
4770 }
4771
4772 if (!strncasecmp("video/", mime, 6)) {
4773 uint64_t firstSampleCTS = 0;
4774 err = mSampleTable->getMetaDataForSample(0, NULL, NULL, &firstSampleCTS);
4775 // Start offset should be less or equal to composition time of first sample.
4776 // Composition time stamp of first sample cannot be negative.
4777 mElstShiftStartTicks = std::min(mElstShiftStartTicks, firstSampleCTS);
4778 }
4779
4780 return err;
4781 }
4782
~MPEG4Source()4783 MPEG4Source::~MPEG4Source() {
4784 if (mStarted) {
4785 stop();
4786 }
4787 free(mCurrentSampleInfoSizes);
4788 free(mCurrentSampleInfoOffsets);
4789 }
4790
start()4791 media_status_t MPEG4Source::start() {
4792 Mutex::Autolock autoLock(mLock);
4793
4794 CHECK(!mStarted);
4795
4796 int32_t tmp;
4797 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
4798 size_t max_size = tmp;
4799
4800 // A somewhat arbitrary limit that should be sufficient for 8k video frames
4801 // If you see the message below for a valid input stream: increase the limit
4802 const size_t kMaxBufferSize = 64 * 1024 * 1024;
4803 if (max_size > kMaxBufferSize) {
4804 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
4805 return AMEDIA_ERROR_MALFORMED;
4806 }
4807 if (max_size == 0) {
4808 ALOGE("zero max input size");
4809 return AMEDIA_ERROR_MALFORMED;
4810 }
4811
4812 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
4813 const size_t kInitialBuffers = 2;
4814 const size_t kMaxBuffers = 8;
4815 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
4816 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
4817 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
4818 if (mSrcBuffer == NULL) {
4819 // file probably specified a bad max size
4820 return AMEDIA_ERROR_MALFORMED;
4821 }
4822
4823 mStarted = true;
4824
4825 return AMEDIA_OK;
4826 }
4827
stop()4828 media_status_t MPEG4Source::stop() {
4829 Mutex::Autolock autoLock(mLock);
4830
4831 CHECK(mStarted);
4832
4833 if (mBuffer != NULL) {
4834 mBuffer->release();
4835 mBuffer = NULL;
4836 }
4837
4838 delete[] mSrcBuffer;
4839 mSrcBuffer = NULL;
4840
4841 mStarted = false;
4842 mCurrentSampleIndex = 0;
4843
4844 return AMEDIA_OK;
4845 }
4846
parseChunk(off64_t * offset)4847 status_t MPEG4Source::parseChunk(off64_t *offset) {
4848 uint32_t hdr[2];
4849 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4850 return ERROR_IO;
4851 }
4852 uint64_t chunk_size = ntohl(hdr[0]);
4853 uint32_t chunk_type = ntohl(hdr[1]);
4854 off64_t data_offset = *offset + 8;
4855
4856 if (chunk_size == 1) {
4857 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4858 return ERROR_IO;
4859 }
4860 chunk_size = ntoh64(chunk_size);
4861 data_offset += 8;
4862
4863 if (chunk_size < 16) {
4864 // The smallest valid chunk is 16 bytes long in this case.
4865 return ERROR_MALFORMED;
4866 }
4867 } else if (chunk_size < 8) {
4868 // The smallest valid chunk is 8 bytes long.
4869 return ERROR_MALFORMED;
4870 }
4871
4872 char chunk[5];
4873 MakeFourCCString(chunk_type, chunk);
4874 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4875
4876 off64_t chunk_data_size = *offset + chunk_size - data_offset;
4877
4878 switch(chunk_type) {
4879
4880 case FOURCC("traf"):
4881 case FOURCC("moof"): {
4882 off64_t stop_offset = *offset + chunk_size;
4883 *offset = data_offset;
4884 while (*offset < stop_offset) {
4885 status_t err = parseChunk(offset);
4886 if (err != OK) {
4887 return err;
4888 }
4889 }
4890 if (chunk_type == FOURCC("moof")) {
4891 // *offset points to the box following this moof. Find the next moof from there.
4892
4893 while (true) {
4894 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4895 // no more box to the end of file.
4896 break;
4897 }
4898 chunk_size = ntohl(hdr[0]);
4899 chunk_type = ntohl(hdr[1]);
4900 if (chunk_size == 1) {
4901 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4902 // which is defined in 4.2 Object Structure.
4903 // When chunk_size==1, 8 bytes follows as "largesize".
4904 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4905 return ERROR_IO;
4906 }
4907 chunk_size = ntoh64(chunk_size);
4908 if (chunk_size < 16) {
4909 // The smallest valid chunk is 16 bytes long in this case.
4910 return ERROR_MALFORMED;
4911 }
4912 } else if (chunk_size == 0) {
4913 // next box extends to end of file.
4914 } else if (chunk_size < 8) {
4915 // The smallest valid chunk is 8 bytes long in this case.
4916 return ERROR_MALFORMED;
4917 }
4918
4919 if (chunk_type == FOURCC("moof")) {
4920 mNextMoofOffset = *offset;
4921 break;
4922 } else if (chunk_size == 0) {
4923 break;
4924 }
4925 *offset += chunk_size;
4926 }
4927 }
4928 break;
4929 }
4930
4931 case FOURCC("tfhd"): {
4932 status_t err;
4933 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4934 return err;
4935 }
4936 *offset += chunk_size;
4937 break;
4938 }
4939
4940 case FOURCC("trun"): {
4941 status_t err;
4942 if (mLastParsedTrackId == mTrackId) {
4943 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4944 return err;
4945 }
4946 }
4947
4948 *offset += chunk_size;
4949 break;
4950 }
4951
4952 case FOURCC("saiz"): {
4953 status_t err;
4954 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4955 return err;
4956 }
4957 *offset += chunk_size;
4958 break;
4959 }
4960 case FOURCC("saio"): {
4961 status_t err;
4962 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
4963 != OK) {
4964 return err;
4965 }
4966 *offset += chunk_size;
4967 break;
4968 }
4969
4970 case FOURCC("senc"): {
4971 status_t err;
4972 if ((err = parseSampleEncryption(data_offset)) != OK) {
4973 return err;
4974 }
4975 *offset += chunk_size;
4976 break;
4977 }
4978
4979 case FOURCC("mdat"): {
4980 // parse DRM info if present
4981 ALOGV("MPEG4Source::parseChunk mdat");
4982 // if saiz/saoi was previously observed, do something with the sampleinfos
4983 *offset += chunk_size;
4984 break;
4985 }
4986
4987 default: {
4988 *offset += chunk_size;
4989 break;
4990 }
4991 }
4992 return OK;
4993 }
4994
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)4995 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4996 off64_t offset, off64_t /* size */) {
4997 ALOGV("parseSampleAuxiliaryInformationSizes");
4998 // 14496-12 8.7.12
4999 uint8_t version;
5000 if (mDataSource->readAt(
5001 offset, &version, sizeof(version))
5002 < (ssize_t)sizeof(version)) {
5003 return ERROR_IO;
5004 }
5005
5006 if (version != 0) {
5007 return ERROR_UNSUPPORTED;
5008 }
5009 offset++;
5010
5011 uint32_t flags;
5012 if (!mDataSource->getUInt24(offset, &flags)) {
5013 return ERROR_IO;
5014 }
5015 offset += 3;
5016
5017 if (flags & 1) {
5018 uint32_t tmp;
5019 if (!mDataSource->getUInt32(offset, &tmp)) {
5020 return ERROR_MALFORMED;
5021 }
5022 mCurrentAuxInfoType = tmp;
5023 offset += 4;
5024 if (!mDataSource->getUInt32(offset, &tmp)) {
5025 return ERROR_MALFORMED;
5026 }
5027 mCurrentAuxInfoTypeParameter = tmp;
5028 offset += 4;
5029 }
5030
5031 uint8_t defsize;
5032 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5033 return ERROR_MALFORMED;
5034 }
5035 mCurrentDefaultSampleInfoSize = defsize;
5036 offset++;
5037
5038 uint32_t smplcnt;
5039 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5040 return ERROR_MALFORMED;
5041 }
5042 mCurrentSampleInfoCount = smplcnt;
5043 offset += 4;
5044
5045 if (mCurrentDefaultSampleInfoSize != 0) {
5046 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5047 return OK;
5048 }
5049 if (smplcnt > mCurrentSampleInfoAllocSize) {
5050 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5051 if (newPtr == NULL) {
5052 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5053 return NO_MEMORY;
5054 }
5055 mCurrentSampleInfoSizes = newPtr;
5056 mCurrentSampleInfoAllocSize = smplcnt;
5057 }
5058
5059 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5060 return OK;
5061 }
5062
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)5063 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5064 off64_t offset, off64_t /* size */) {
5065 ALOGV("parseSampleAuxiliaryInformationOffsets");
5066 // 14496-12 8.7.13
5067 uint8_t version;
5068 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5069 return ERROR_IO;
5070 }
5071 offset++;
5072
5073 uint32_t flags;
5074 if (!mDataSource->getUInt24(offset, &flags)) {
5075 return ERROR_IO;
5076 }
5077 offset += 3;
5078
5079 uint32_t entrycount;
5080 if (!mDataSource->getUInt32(offset, &entrycount)) {
5081 return ERROR_IO;
5082 }
5083 offset += 4;
5084 if (entrycount == 0) {
5085 return OK;
5086 }
5087 if (entrycount > UINT32_MAX / 8) {
5088 return ERROR_MALFORMED;
5089 }
5090
5091 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5092 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5093 if (newPtr == NULL) {
5094 ALOGE("failed to realloc %u -> %u",
5095 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5096 return NO_MEMORY;
5097 }
5098 mCurrentSampleInfoOffsets = newPtr;
5099 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5100 }
5101 mCurrentSampleInfoOffsetCount = entrycount;
5102
5103 if (mCurrentSampleInfoOffsets == NULL) {
5104 return OK;
5105 }
5106
5107 for (size_t i = 0; i < entrycount; i++) {
5108 if (version == 0) {
5109 uint32_t tmp;
5110 if (!mDataSource->getUInt32(offset, &tmp)) {
5111 return ERROR_IO;
5112 }
5113 mCurrentSampleInfoOffsets[i] = tmp;
5114 offset += 4;
5115 } else {
5116 uint64_t tmp;
5117 if (!mDataSource->getUInt64(offset, &tmp)) {
5118 return ERROR_IO;
5119 }
5120 mCurrentSampleInfoOffsets[i] = tmp;
5121 offset += 8;
5122 }
5123 }
5124
5125 // parse clear/encrypted data
5126
5127 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5128
5129 drmoffset += mCurrentMoofOffset;
5130
5131 return parseClearEncryptedSizes(drmoffset, false, 0);
5132 }
5133
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)5134 status_t MPEG4Source::parseClearEncryptedSizes(
5135 off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
5136
5137 int32_t ivlength;
5138 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5139 return ERROR_MALFORMED;
5140 }
5141
5142 // only 0, 8 and 16 byte initialization vectors are supported
5143 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5144 ALOGW("unsupported IV length: %d", ivlength);
5145 return ERROR_MALFORMED;
5146 }
5147
5148 uint32_t sampleCount = mCurrentSampleInfoCount;
5149 if (isSubsampleEncryption) {
5150 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5151 return ERROR_IO;
5152 }
5153 offset += 4;
5154 }
5155
5156 // read CencSampleAuxiliaryDataFormats
5157 for (size_t i = 0; i < sampleCount; i++) {
5158 if (i >= mCurrentSamples.size()) {
5159 ALOGW("too few samples");
5160 break;
5161 }
5162 Sample *smpl = &mCurrentSamples.editItemAt(i);
5163 if (!smpl->clearsizes.isEmpty()) {
5164 continue;
5165 }
5166
5167 memset(smpl->iv, 0, 16);
5168 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5169 return ERROR_IO;
5170 }
5171
5172 offset += ivlength;
5173
5174 bool readSubsamples;
5175 if (isSubsampleEncryption) {
5176 readSubsamples = flags & 2;
5177 } else {
5178 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5179 if (smplinfosize == 0) {
5180 smplinfosize = mCurrentSampleInfoSizes[i];
5181 }
5182 readSubsamples = smplinfosize > ivlength;
5183 }
5184
5185 if (readSubsamples) {
5186 uint16_t numsubsamples;
5187 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5188 return ERROR_IO;
5189 }
5190 offset += 2;
5191 for (size_t j = 0; j < numsubsamples; j++) {
5192 uint16_t numclear;
5193 uint32_t numencrypted;
5194 if (!mDataSource->getUInt16(offset, &numclear)) {
5195 return ERROR_IO;
5196 }
5197 offset += 2;
5198 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5199 return ERROR_IO;
5200 }
5201 offset += 4;
5202 smpl->clearsizes.add(numclear);
5203 smpl->encryptedsizes.add(numencrypted);
5204 }
5205 } else {
5206 smpl->clearsizes.add(0);
5207 smpl->encryptedsizes.add(smpl->size);
5208 }
5209 }
5210
5211 return OK;
5212 }
5213
parseSampleEncryption(off64_t offset)5214 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
5215 uint32_t flags;
5216 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5217 return ERROR_MALFORMED;
5218 }
5219 return parseClearEncryptedSizes(offset + 4, true, flags);
5220 }
5221
parseTrackFragmentHeader(off64_t offset,off64_t size)5222 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5223
5224 if (size < 8) {
5225 return -EINVAL;
5226 }
5227
5228 uint32_t flags;
5229 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5230 return ERROR_MALFORMED;
5231 }
5232
5233 if (flags & 0xff000000) {
5234 return -EINVAL;
5235 }
5236
5237 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5238 return ERROR_MALFORMED;
5239 }
5240
5241 if (mLastParsedTrackId != mTrackId) {
5242 // this is not the right track, skip it
5243 return OK;
5244 }
5245
5246 mTrackFragmentHeaderInfo.mFlags = flags;
5247 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5248 offset += 8;
5249 size -= 8;
5250
5251 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5252
5253 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5254 if (size < 8) {
5255 return -EINVAL;
5256 }
5257
5258 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5259 return ERROR_MALFORMED;
5260 }
5261 offset += 8;
5262 size -= 8;
5263 }
5264
5265 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5266 if (size < 4) {
5267 return -EINVAL;
5268 }
5269
5270 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5271 return ERROR_MALFORMED;
5272 }
5273 offset += 4;
5274 size -= 4;
5275 }
5276
5277 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5278 if (size < 4) {
5279 return -EINVAL;
5280 }
5281
5282 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5283 return ERROR_MALFORMED;
5284 }
5285 offset += 4;
5286 size -= 4;
5287 }
5288
5289 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5290 if (size < 4) {
5291 return -EINVAL;
5292 }
5293
5294 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5295 return ERROR_MALFORMED;
5296 }
5297 offset += 4;
5298 size -= 4;
5299 }
5300
5301 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5302 if (size < 4) {
5303 return -EINVAL;
5304 }
5305
5306 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5307 return ERROR_MALFORMED;
5308 }
5309 offset += 4;
5310 size -= 4;
5311 }
5312
5313 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5314 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5315 }
5316
5317 mTrackFragmentHeaderInfo.mDataOffset = 0;
5318 return OK;
5319 }
5320
parseTrackFragmentRun(off64_t offset,off64_t size)5321 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5322
5323 ALOGV("MPEG4Source::parseTrackFragmentRun");
5324 if (size < 8) {
5325 return -EINVAL;
5326 }
5327
5328 enum {
5329 kDataOffsetPresent = 0x01,
5330 kFirstSampleFlagsPresent = 0x04,
5331 kSampleDurationPresent = 0x100,
5332 kSampleSizePresent = 0x200,
5333 kSampleFlagsPresent = 0x400,
5334 kSampleCompositionTimeOffsetPresent = 0x800,
5335 };
5336
5337 uint32_t flags;
5338 if (!mDataSource->getUInt32(offset, &flags)) {
5339 return ERROR_MALFORMED;
5340 }
5341 // |version| only affects SampleCompositionTimeOffset field.
5342 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5343 // Otherwise, SampleCompositionTimeOffset is int32_t.
5344 // Sample.compositionOffset is defined as int32_t.
5345 uint8_t version = flags >> 24;
5346 flags &= 0xffffff;
5347 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5348
5349 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5350 // These two shall not be used together.
5351 return -EINVAL;
5352 }
5353
5354 uint32_t sampleCount;
5355 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5356 return ERROR_MALFORMED;
5357 }
5358 offset += 8;
5359 size -= 8;
5360
5361 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5362
5363 uint32_t firstSampleFlags = 0;
5364
5365 if (flags & kDataOffsetPresent) {
5366 if (size < 4) {
5367 return -EINVAL;
5368 }
5369
5370 int32_t dataOffsetDelta;
5371 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5372 return ERROR_MALFORMED;
5373 }
5374
5375 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5376
5377 offset += 4;
5378 size -= 4;
5379 }
5380
5381 if (flags & kFirstSampleFlagsPresent) {
5382 if (size < 4) {
5383 return -EINVAL;
5384 }
5385
5386 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5387 return ERROR_MALFORMED;
5388 }
5389 offset += 4;
5390 size -= 4;
5391 }
5392
5393 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5394 sampleCtsOffset = 0;
5395
5396 size_t bytesPerSample = 0;
5397 if (flags & kSampleDurationPresent) {
5398 bytesPerSample += 4;
5399 } else if (mTrackFragmentHeaderInfo.mFlags
5400 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5401 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5402 } else if (mTrex) {
5403 sampleDuration = mTrex->default_sample_duration;
5404 }
5405
5406 if (flags & kSampleSizePresent) {
5407 bytesPerSample += 4;
5408 } else if (mTrackFragmentHeaderInfo.mFlags
5409 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5410 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5411 } else {
5412 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5413 }
5414
5415 if (flags & kSampleFlagsPresent) {
5416 bytesPerSample += 4;
5417 } else if (mTrackFragmentHeaderInfo.mFlags
5418 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5419 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5420 } else {
5421 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5422 }
5423
5424 if (flags & kSampleCompositionTimeOffsetPresent) {
5425 bytesPerSample += 4;
5426 } else {
5427 sampleCtsOffset = 0;
5428 }
5429
5430 if (bytesPerSample != 0) {
5431 if (size < (off64_t)sampleCount * bytesPerSample) {
5432 return -EINVAL;
5433 }
5434 } else {
5435 if (sampleDuration == 0) {
5436 ALOGW("b/123389881 sampleDuration == 0");
5437 android_errorWriteLog(0x534e4554, "124389881 zero");
5438 return -EINVAL;
5439 }
5440
5441 // apply some sanity (vs strict legality) checks
5442 //
5443 // clamp the count of entries in the trun box, to avoid spending forever parsing
5444 // this box. Clamping (vs error) lets us play *something*.
5445 // 1 million is about 400 msecs on a Pixel3, should be no more than a couple seconds
5446 // on the slowest devices.
5447 static constexpr uint32_t kMaxTrunSampleCount = 1000000;
5448 if (sampleCount > kMaxTrunSampleCount) {
5449 ALOGW("b/123389881 clamp sampleCount(%u) @ kMaxTrunSampleCount(%u)",
5450 sampleCount, kMaxTrunSampleCount);
5451 android_errorWriteLog(0x534e4554, "124389881 count");
5452
5453 }
5454 }
5455
5456 Sample tmp;
5457 for (uint32_t i = 0; i < sampleCount; ++i) {
5458 if (flags & kSampleDurationPresent) {
5459 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5460 return ERROR_MALFORMED;
5461 }
5462 offset += 4;
5463 }
5464
5465 if (flags & kSampleSizePresent) {
5466 if (!mDataSource->getUInt32(offset, &sampleSize)) {
5467 return ERROR_MALFORMED;
5468 }
5469 offset += 4;
5470 }
5471
5472 if (flags & kSampleFlagsPresent) {
5473 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
5474 return ERROR_MALFORMED;
5475 }
5476 offset += 4;
5477 }
5478
5479 if (flags & kSampleCompositionTimeOffsetPresent) {
5480 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
5481 return ERROR_MALFORMED;
5482 }
5483 offset += 4;
5484 }
5485
5486 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
5487 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
5488 dataOffset, sampleSize, sampleDuration,
5489 (flags & kFirstSampleFlagsPresent) && i == 0
5490 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
5491 tmp.offset = dataOffset;
5492 tmp.size = sampleSize;
5493 tmp.duration = sampleDuration;
5494 tmp.compositionOffset = sampleCtsOffset;
5495 memset(tmp.iv, 0, sizeof(tmp.iv));
5496 mCurrentSamples.add(tmp);
5497
5498 dataOffset += sampleSize;
5499 }
5500
5501 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
5502
5503 return OK;
5504 }
5505
getFormat(AMediaFormat * meta)5506 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
5507 Mutex::Autolock autoLock(mLock);
5508 AMediaFormat_copy(meta, mFormat);
5509 return AMEDIA_OK;
5510 }
5511
parseNALSize(const uint8_t * data) const5512 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
5513 switch (mNALLengthSize) {
5514 case 1:
5515 return *data;
5516 case 2:
5517 return U16_AT(data);
5518 case 3:
5519 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
5520 case 4:
5521 return U32_AT(data);
5522 }
5523
5524 // This cannot happen, mNALLengthSize springs to life by adding 1 to
5525 // a 2-bit integer.
5526 CHECK(!"Should not be here.");
5527
5528 return 0;
5529 }
5530
parseHEVCLayerId(const uint8_t * data,size_t size)5531 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
5532 if (data == nullptr || size < mNALLengthSize + 2) {
5533 return -1;
5534 }
5535
5536 // HEVC NAL-header (16-bit)
5537 // 1 6 6 3
5538 // |-|uuuuuu|------|iii|
5539 // ^ ^
5540 // NAL_type layer_id + 1
5541 //
5542 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
5543 enum {
5544 TSA_N = 2,
5545 TSA_R = 3,
5546 STSA_N = 4,
5547 STSA_R = 5,
5548 };
5549
5550 data += mNALLengthSize;
5551 uint16_t nalHeader = data[0] << 8 | data[1];
5552
5553 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
5554 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
5555 int32_t layerIdPlusOne = nalHeader & 0x7u;
5556 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
5557 return layerIdPlusOne - 1;
5558 }
5559 return 0;
5560 }
5561
read(MediaBufferHelper ** out,const ReadOptions * options)5562 media_status_t MPEG4Source::read(
5563 MediaBufferHelper **out, const ReadOptions *options) {
5564 Mutex::Autolock autoLock(mLock);
5565
5566 CHECK(mStarted);
5567
5568 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
5569 *out = nullptr;
5570 return AMEDIA_ERROR_WOULD_BLOCK;
5571 }
5572
5573 if (mFirstMoofOffset > 0) {
5574 return fragmentedRead(out, options);
5575 }
5576
5577 *out = NULL;
5578
5579 int64_t targetSampleTimeUs = -1;
5580
5581 int64_t seekTimeUs;
5582 ReadOptions::SeekMode mode;
5583 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5584
5585 if (mIsHeif) {
5586 CHECK(mSampleTable == NULL);
5587 CHECK(mItemTable != NULL);
5588 int32_t imageIndex;
5589 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
5590 return AMEDIA_ERROR_MALFORMED;
5591 }
5592
5593 status_t err;
5594 if (seekTimeUs >= 0) {
5595 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
5596 } else {
5597 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
5598 }
5599 if (err != OK) {
5600 return AMEDIA_ERROR_UNKNOWN;
5601 }
5602 } else {
5603 uint32_t findFlags = 0;
5604 switch (mode) {
5605 case ReadOptions::SEEK_PREVIOUS_SYNC:
5606 findFlags = SampleTable::kFlagBefore;
5607 break;
5608 case ReadOptions::SEEK_NEXT_SYNC:
5609 findFlags = SampleTable::kFlagAfter;
5610 break;
5611 case ReadOptions::SEEK_CLOSEST_SYNC:
5612 case ReadOptions::SEEK_CLOSEST:
5613 findFlags = SampleTable::kFlagClosest;
5614 break;
5615 case ReadOptions::SEEK_FRAME_INDEX:
5616 findFlags = SampleTable::kFlagFrameIndex;
5617 break;
5618 default:
5619 CHECK(!"Should not be here.");
5620 break;
5621 }
5622 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
5623 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5624 }
5625
5626 uint32_t sampleIndex;
5627 status_t err = mSampleTable->findSampleAtTime(
5628 seekTimeUs, 1000000, mTimescale,
5629 &sampleIndex, findFlags);
5630
5631 if (mode == ReadOptions::SEEK_CLOSEST
5632 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5633 // We found the closest sample already, now we want the sync
5634 // sample preceding it (or the sample itself of course), even
5635 // if the subsequent sync sample is closer.
5636 findFlags = SampleTable::kFlagBefore;
5637 }
5638
5639 uint32_t syncSampleIndex = sampleIndex;
5640 // assume every audio sample is a sync sample. This works around
5641 // seek issues with files that were incorrectly written with an
5642 // empty or single-sample stss block for the audio track
5643 if (err == OK && !mIsAudio) {
5644 err = mSampleTable->findSyncSampleNear(
5645 sampleIndex, &syncSampleIndex, findFlags);
5646 }
5647
5648 uint64_t sampleTime;
5649 if (err == OK) {
5650 err = mSampleTable->getMetaDataForSample(
5651 sampleIndex, NULL, NULL, &sampleTime);
5652 }
5653
5654 if (err != OK) {
5655 if (err == ERROR_OUT_OF_RANGE) {
5656 // An attempt to seek past the end of the stream would
5657 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
5658 // this all the way to the MediaPlayer would cause abnormal
5659 // termination. Legacy behaviour appears to be to behave as if
5660 // we had seeked to the end of stream, ending normally.
5661 return AMEDIA_ERROR_END_OF_STREAM;
5662 }
5663 ALOGV("end of stream");
5664 return AMEDIA_ERROR_UNKNOWN;
5665 }
5666
5667 if (mode == ReadOptions::SEEK_CLOSEST
5668 || mode == ReadOptions::SEEK_FRAME_INDEX) {
5669 sampleTime -= mElstShiftStartTicks;
5670 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
5671 }
5672
5673 #if 0
5674 uint32_t syncSampleTime;
5675 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
5676 syncSampleIndex, NULL, NULL, &syncSampleTime));
5677
5678 ALOGI("seek to time %lld us => sample at time %lld us, "
5679 "sync sample at time %lld us",
5680 seekTimeUs,
5681 sampleTime * 1000000ll / mTimescale,
5682 syncSampleTime * 1000000ll / mTimescale);
5683 #endif
5684
5685 mCurrentSampleIndex = syncSampleIndex;
5686 }
5687
5688 if (mBuffer != NULL) {
5689 mBuffer->release();
5690 mBuffer = NULL;
5691 }
5692
5693 // fall through
5694 }
5695
5696 off64_t offset = 0;
5697 size_t size = 0;
5698 uint64_t cts, stts;
5699 bool isSyncSample;
5700 bool newBuffer = false;
5701 if (mBuffer == NULL) {
5702 newBuffer = true;
5703
5704 status_t err;
5705 if (!mIsHeif) {
5706 err = mSampleTable->getMetaDataForSample(
5707 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
5708 if(err == OK) {
5709 /* Composition Time Stamp cannot be negative. Some files have video Sample
5710 * Time(STTS)delta with zero value(b/117402420). Hence subtract only
5711 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
5712 */
5713 cts -= std::min(cts, mElstShiftStartTicks);
5714 }
5715
5716 } else {
5717 err = mItemTable->getImageOffsetAndSize(
5718 options && options->getSeekTo(&seekTimeUs, &mode) ?
5719 &mCurrentSampleIndex : NULL, &offset, &size);
5720
5721 cts = stts = 0;
5722 isSyncSample = 0;
5723 ALOGV("image offset %lld, size %zu", (long long)offset, size);
5724 }
5725
5726 if (err != OK) {
5727 if (err == ERROR_END_OF_STREAM) {
5728 return AMEDIA_ERROR_END_OF_STREAM;
5729 }
5730 return AMEDIA_ERROR_UNKNOWN;
5731 }
5732
5733 err = mBufferGroup->acquire_buffer(&mBuffer);
5734
5735 if (err != OK) {
5736 CHECK(mBuffer == NULL);
5737 return AMEDIA_ERROR_UNKNOWN;
5738 }
5739 if (size > mBuffer->size()) {
5740 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5741 mBuffer->release();
5742 mBuffer = NULL;
5743 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
5744 }
5745 }
5746
5747 if (!mIsAVC && !mIsHEVC && !mIsAC4) {
5748 if (newBuffer) {
5749 if (mIsPcm) {
5750 // The twos' PCM block reader assumes that all samples has the same size.
5751
5752 uint32_t samplesToRead = mSampleTable->getLastSampleIndexInChunk()
5753 - mCurrentSampleIndex + 1;
5754 if (samplesToRead > kMaxPcmFrameSize) {
5755 samplesToRead = kMaxPcmFrameSize;
5756 }
5757
5758 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
5759 samplesToRead, size, mCurrentSampleIndex,
5760 mSampleTable->getLastSampleIndexInChunk());
5761
5762 size_t totalSize = samplesToRead * size;
5763 uint8_t* buf = (uint8_t *)mBuffer->data();
5764 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
5765 if (bytesRead < (ssize_t)totalSize) {
5766 mBuffer->release();
5767 mBuffer = NULL;
5768
5769 return AMEDIA_ERROR_IO;
5770 }
5771
5772 AMediaFormat *meta = mBuffer->meta_data();
5773 AMediaFormat_clear(meta);
5774 AMediaFormat_setInt64(
5775 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5776 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5777
5778 int32_t byteOrder;
5779 AMediaFormat_getInt32(mFormat,
5780 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
5781
5782 if (byteOrder == 1) {
5783 // Big-endian -> little-endian
5784 uint16_t *dstData = (uint16_t *)buf;
5785 uint16_t *srcData = (uint16_t *)buf;
5786
5787 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
5788 dstData[j] = ntohs(srcData[j]);
5789 }
5790 }
5791
5792 mCurrentSampleIndex += samplesToRead;
5793 mBuffer->set_range(0, totalSize);
5794 } else {
5795 ssize_t num_bytes_read =
5796 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5797
5798 if (num_bytes_read < (ssize_t)size) {
5799 mBuffer->release();
5800 mBuffer = NULL;
5801
5802 return AMEDIA_ERROR_IO;
5803 }
5804
5805 CHECK(mBuffer != NULL);
5806 mBuffer->set_range(0, size);
5807 AMediaFormat *meta = mBuffer->meta_data();
5808 AMediaFormat_clear(meta);
5809 AMediaFormat_setInt64(
5810 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5811 AMediaFormat_setInt64(
5812 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5813
5814 if (targetSampleTimeUs >= 0) {
5815 AMediaFormat_setInt64(
5816 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5817 }
5818
5819 if (isSyncSample) {
5820 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5821 }
5822
5823 ++mCurrentSampleIndex;
5824 }
5825 }
5826
5827 *out = mBuffer;
5828 mBuffer = NULL;
5829
5830 return AMEDIA_OK;
5831
5832 } else if (mIsAC4) {
5833 CHECK(mBuffer != NULL);
5834 // Make sure there is enough space to write the sync header and the raw frame
5835 if (mBuffer->range_length() < (7 + size)) {
5836 mBuffer->release();
5837 mBuffer = NULL;
5838
5839 return AMEDIA_ERROR_IO;
5840 }
5841
5842 uint8_t *dstData = (uint8_t *)mBuffer->data();
5843 size_t dstOffset = 0;
5844 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
5845 // AC40 sync word, meaning no CRC at the end of the frame
5846 dstData[dstOffset++] = 0xAC;
5847 dstData[dstOffset++] = 0x40;
5848 dstData[dstOffset++] = 0xFF;
5849 dstData[dstOffset++] = 0xFF;
5850 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
5851 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
5852 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
5853
5854 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
5855 if (numBytesRead != (ssize_t)size) {
5856 mBuffer->release();
5857 mBuffer = NULL;
5858
5859 return AMEDIA_ERROR_IO;
5860 }
5861
5862 mBuffer->set_range(0, dstOffset + size);
5863 AMediaFormat *meta = mBuffer->meta_data();
5864 AMediaFormat_clear(meta);
5865 AMediaFormat_setInt64(
5866 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5867 AMediaFormat_setInt64(
5868 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5869
5870 if (targetSampleTimeUs >= 0) {
5871 AMediaFormat_setInt64(
5872 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5873 }
5874
5875 if (isSyncSample) {
5876 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5877 }
5878
5879 ++mCurrentSampleIndex;
5880
5881 *out = mBuffer;
5882 mBuffer = NULL;
5883
5884 return AMEDIA_OK;
5885 } else {
5886 // Whole NAL units are returned but each fragment is prefixed by
5887 // the start code (0x00 00 00 01).
5888 ssize_t num_bytes_read = 0;
5889 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
5890
5891 if (num_bytes_read < (ssize_t)size) {
5892 mBuffer->release();
5893 mBuffer = NULL;
5894
5895 return AMEDIA_ERROR_IO;
5896 }
5897
5898 uint8_t *dstData = (uint8_t *)mBuffer->data();
5899 size_t srcOffset = 0;
5900 size_t dstOffset = 0;
5901
5902 while (srcOffset < size) {
5903 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5904 size_t nalLength = 0;
5905 if (!isMalFormed) {
5906 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5907 srcOffset += mNALLengthSize;
5908 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
5909 }
5910
5911 if (isMalFormed) {
5912 //if nallength abnormal,ignore it.
5913 ALOGW("abnormal nallength, ignore this NAL");
5914 srcOffset = size;
5915 break;
5916 }
5917
5918 if (nalLength == 0) {
5919 continue;
5920 }
5921
5922 if (dstOffset > SIZE_MAX - 4 ||
5923 dstOffset + 4 > SIZE_MAX - nalLength ||
5924 dstOffset + 4 + nalLength > mBuffer->size()) {
5925 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
5926 android_errorWriteLog(0x534e4554, "27208621");
5927 mBuffer->release();
5928 mBuffer = NULL;
5929 return AMEDIA_ERROR_MALFORMED;
5930 }
5931
5932 dstData[dstOffset++] = 0;
5933 dstData[dstOffset++] = 0;
5934 dstData[dstOffset++] = 0;
5935 dstData[dstOffset++] = 1;
5936 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5937 srcOffset += nalLength;
5938 dstOffset += nalLength;
5939 }
5940 CHECK_EQ(srcOffset, size);
5941 CHECK(mBuffer != NULL);
5942 mBuffer->set_range(0, dstOffset);
5943
5944 AMediaFormat *meta = mBuffer->meta_data();
5945 AMediaFormat_clear(meta);
5946 AMediaFormat_setInt64(
5947 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
5948 AMediaFormat_setInt64(
5949 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
5950
5951 if (targetSampleTimeUs >= 0) {
5952 AMediaFormat_setInt64(
5953 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
5954 }
5955
5956 if (mIsAVC) {
5957 uint32_t layerId = FindAVCLayerId(
5958 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5959 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
5960 } else if (mIsHEVC) {
5961 int32_t layerId = parseHEVCLayerId(
5962 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5963 if (layerId >= 0) {
5964 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
5965 }
5966 }
5967
5968 if (isSyncSample) {
5969 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
5970 }
5971
5972 ++mCurrentSampleIndex;
5973
5974 *out = mBuffer;
5975 mBuffer = NULL;
5976
5977 return AMEDIA_OK;
5978 }
5979 }
5980
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)5981 media_status_t MPEG4Source::fragmentedRead(
5982 MediaBufferHelper **out, const ReadOptions *options) {
5983
5984 ALOGV("MPEG4Source::fragmentedRead");
5985
5986 CHECK(mStarted);
5987
5988 *out = NULL;
5989
5990 int64_t targetSampleTimeUs = -1;
5991
5992 int64_t seekTimeUs;
5993 ReadOptions::SeekMode mode;
5994 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5995
5996 seekTimeUs += ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
5997 ALOGV("shifted seekTimeUs :%" PRId64 ", mElstShiftStartTicks:%" PRIu64, seekTimeUs,
5998 mElstShiftStartTicks);
5999
6000 int numSidxEntries = mSegments.size();
6001 if (numSidxEntries != 0) {
6002 int64_t totalTime = 0;
6003 off64_t totalOffset = mFirstMoofOffset;
6004 for (int i = 0; i < numSidxEntries; i++) {
6005 const SidxEntry *se = &mSegments[i];
6006 if (totalTime + se->mDurationUs > seekTimeUs) {
6007 // The requested time is somewhere in this segment
6008 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6009 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6010 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6011 // requested next sync, or closest sync and it was closer to the end of
6012 // this segment
6013 totalTime += se->mDurationUs;
6014 totalOffset += se->mSize;
6015 }
6016 break;
6017 }
6018 totalTime += se->mDurationUs;
6019 totalOffset += se->mSize;
6020 }
6021 mCurrentMoofOffset = totalOffset;
6022 mNextMoofOffset = -1;
6023 mCurrentSamples.clear();
6024 mCurrentSampleIndex = 0;
6025 status_t err = parseChunk(&totalOffset);
6026 if (err != OK) {
6027 return AMEDIA_ERROR_UNKNOWN;
6028 }
6029 mCurrentTime = totalTime * mTimescale / 1000000ll;
6030 } else {
6031 // without sidx boxes, we can only seek to 0
6032 mCurrentMoofOffset = mFirstMoofOffset;
6033 mNextMoofOffset = -1;
6034 mCurrentSamples.clear();
6035 mCurrentSampleIndex = 0;
6036 off64_t tmp = mCurrentMoofOffset;
6037 status_t err = parseChunk(&tmp);
6038 if (err != OK) {
6039 return AMEDIA_ERROR_UNKNOWN;
6040 }
6041 mCurrentTime = 0;
6042 }
6043
6044 if (mBuffer != NULL) {
6045 mBuffer->release();
6046 mBuffer = NULL;
6047 }
6048
6049 // fall through
6050 }
6051
6052 off64_t offset = 0;
6053 size_t size = 0;
6054 uint64_t cts = 0;
6055 bool isSyncSample = false;
6056 bool newBuffer = false;
6057 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6058 newBuffer = true;
6059
6060 if (mBuffer != NULL) {
6061 mBuffer->release();
6062 mBuffer = NULL;
6063 }
6064 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6065 // move to next fragment if there is one
6066 if (mNextMoofOffset <= mCurrentMoofOffset) {
6067 return AMEDIA_ERROR_END_OF_STREAM;
6068 }
6069 off64_t nextMoof = mNextMoofOffset;
6070 mCurrentMoofOffset = nextMoof;
6071 mCurrentSamples.clear();
6072 mCurrentSampleIndex = 0;
6073 status_t err = parseChunk(&nextMoof);
6074 if (err != OK) {
6075 return AMEDIA_ERROR_UNKNOWN;
6076 }
6077 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6078 return AMEDIA_ERROR_END_OF_STREAM;
6079 }
6080 }
6081
6082 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6083 offset = smpl->offset;
6084 size = smpl->size;
6085 cts = mCurrentTime + smpl->compositionOffset;
6086 /* Composition Time Stamp cannot be negative. Some files have video Sample
6087 * Time(STTS)delta with zero value(b/117402420). Hence subtract only
6088 * min(cts, mElstShiftStartTicks), so that audio tracks can be played.
6089 */
6090 cts -= std::min(cts, mElstShiftStartTicks);
6091
6092 mCurrentTime += smpl->duration;
6093 isSyncSample = (mCurrentSampleIndex == 0);
6094
6095 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6096
6097 if (err != OK) {
6098 CHECK(mBuffer == NULL);
6099 ALOGV("acquire_buffer returned %d", err);
6100 return AMEDIA_ERROR_UNKNOWN;
6101 }
6102 if (size > mBuffer->size()) {
6103 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6104 mBuffer->release();
6105 mBuffer = NULL;
6106 return AMEDIA_ERROR_UNKNOWN;
6107 }
6108 }
6109
6110 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6111 AMediaFormat *bufmeta = mBuffer->meta_data();
6112 AMediaFormat_clear(bufmeta);
6113 if (smpl->encryptedsizes.size()) {
6114 // store clear/encrypted lengths in metadata
6115 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6116 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
6117 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6118 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
6119 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6120 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6121 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6122 AMediaFormat_setInt32(bufmeta,
6123 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6124 AMediaFormat_setInt32(bufmeta,
6125 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6126
6127 void *iv = NULL;
6128 size_t ivlength = 0;
6129 if (!AMediaFormat_getBuffer(mFormat,
6130 "crypto-iv", &iv, &ivlength)) {
6131 iv = (void *) smpl->iv;
6132 ivlength = 16; // use 16 or the actual size?
6133 }
6134 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6135 }
6136
6137 if (!mIsAVC && !mIsHEVC) {
6138 if (newBuffer) {
6139 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6140 mBuffer->release();
6141 mBuffer = NULL;
6142
6143 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6144 return AMEDIA_ERROR_MALFORMED;
6145 }
6146
6147 ssize_t num_bytes_read =
6148 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6149
6150 if (num_bytes_read < (ssize_t)size) {
6151 mBuffer->release();
6152 mBuffer = NULL;
6153
6154 ALOGE("i/o error");
6155 return AMEDIA_ERROR_IO;
6156 }
6157
6158 CHECK(mBuffer != NULL);
6159 mBuffer->set_range(0, size);
6160 AMediaFormat_setInt64(bufmeta,
6161 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6162 AMediaFormat_setInt64(bufmeta,
6163 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6164
6165 if (targetSampleTimeUs >= 0) {
6166 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6167 }
6168
6169 if (mIsAVC) {
6170 uint32_t layerId = FindAVCLayerId(
6171 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6172 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6173 } else if (mIsHEVC) {
6174 int32_t layerId = parseHEVCLayerId(
6175 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6176 if (layerId >= 0) {
6177 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6178 }
6179 }
6180
6181 if (isSyncSample) {
6182 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6183 }
6184
6185 ++mCurrentSampleIndex;
6186 }
6187
6188 *out = mBuffer;
6189 mBuffer = NULL;
6190
6191 return AMEDIA_OK;
6192
6193 } else {
6194 ALOGV("whole NAL");
6195 // Whole NAL units are returned but each fragment is prefixed by
6196 // the start code (0x00 00 00 01).
6197 ssize_t num_bytes_read = 0;
6198 void *data = NULL;
6199 bool isMalFormed = false;
6200 int32_t max_size;
6201 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6202 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6203 isMalFormed = true;
6204 } else {
6205 data = mSrcBuffer;
6206 }
6207
6208 if (isMalFormed || data == NULL) {
6209 ALOGE("isMalFormed size %zu", size);
6210 if (mBuffer != NULL) {
6211 mBuffer->release();
6212 mBuffer = NULL;
6213 }
6214 return AMEDIA_ERROR_MALFORMED;
6215 }
6216 num_bytes_read = mDataSource->readAt(offset, data, size);
6217
6218 if (num_bytes_read < (ssize_t)size) {
6219 mBuffer->release();
6220 mBuffer = NULL;
6221
6222 ALOGE("i/o error");
6223 return AMEDIA_ERROR_IO;
6224 }
6225
6226 uint8_t *dstData = (uint8_t *)mBuffer->data();
6227 size_t srcOffset = 0;
6228 size_t dstOffset = 0;
6229
6230 while (srcOffset < size) {
6231 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6232 size_t nalLength = 0;
6233 if (!isMalFormed) {
6234 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6235 srcOffset += mNALLengthSize;
6236 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6237 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6238 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6239 }
6240
6241 if (isMalFormed) {
6242 ALOGE("Video is malformed; nalLength %zu", nalLength);
6243 mBuffer->release();
6244 mBuffer = NULL;
6245 return AMEDIA_ERROR_MALFORMED;
6246 }
6247
6248 if (nalLength == 0) {
6249 continue;
6250 }
6251
6252 if (dstOffset > SIZE_MAX - 4 ||
6253 dstOffset + 4 > SIZE_MAX - nalLength ||
6254 dstOffset + 4 + nalLength > mBuffer->size()) {
6255 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6256 android_errorWriteLog(0x534e4554, "26365349");
6257 mBuffer->release();
6258 mBuffer = NULL;
6259 return AMEDIA_ERROR_MALFORMED;
6260 }
6261
6262 dstData[dstOffset++] = 0;
6263 dstData[dstOffset++] = 0;
6264 dstData[dstOffset++] = 0;
6265 dstData[dstOffset++] = 1;
6266 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6267 srcOffset += nalLength;
6268 dstOffset += nalLength;
6269 }
6270 CHECK_EQ(srcOffset, size);
6271 CHECK(mBuffer != NULL);
6272 mBuffer->set_range(0, dstOffset);
6273
6274 AMediaFormat *bufmeta = mBuffer->meta_data();
6275 AMediaFormat_setInt64(bufmeta,
6276 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6277 AMediaFormat_setInt64(bufmeta,
6278 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6279
6280 if (targetSampleTimeUs >= 0) {
6281 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6282 }
6283
6284 if (isSyncSample) {
6285 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6286 }
6287
6288 ++mCurrentSampleIndex;
6289
6290 *out = mBuffer;
6291 mBuffer = NULL;
6292
6293 return AMEDIA_OK;
6294 }
6295
6296 return AMEDIA_OK;
6297 }
6298
findTrackByMimePrefix(const char * mimePrefix)6299 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6300 const char *mimePrefix) {
6301 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6302 const char *mime;
6303 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6304 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6305 return track;
6306 }
6307 }
6308
6309 return NULL;
6310 }
6311
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6312 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6313 uint8_t header[8];
6314
6315 ssize_t n = source->readAt(4, header, sizeof(header));
6316 if (n < (ssize_t)sizeof(header)) {
6317 return false;
6318 }
6319
6320 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6321 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6322 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6323 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6324 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6325 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6326 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6327 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
6328 *confidence = 0.4;
6329
6330 return true;
6331 }
6332
6333 return false;
6334 }
6335
isCompatibleBrand(uint32_t fourcc)6336 static bool isCompatibleBrand(uint32_t fourcc) {
6337 static const uint32_t kCompatibleBrands[] = {
6338 FOURCC("isom"),
6339 FOURCC("iso2"),
6340 FOURCC("avc1"),
6341 FOURCC("hvc1"),
6342 FOURCC("hev1"),
6343 FOURCC("av01"),
6344 FOURCC("3gp4"),
6345 FOURCC("mp41"),
6346 FOURCC("mp42"),
6347 FOURCC("dash"),
6348 FOURCC("nvr1"),
6349
6350 // Won't promise that the following file types can be played.
6351 // Just give these file types a chance.
6352 FOURCC("qt "), // Apple's QuickTime
6353 FOURCC("MSNV"), // Sony's PSP
6354 FOURCC("wmf "),
6355
6356 FOURCC("3g2a"), // 3GPP2
6357 FOURCC("3g2b"),
6358 FOURCC("mif1"), // HEIF image
6359 FOURCC("heic"), // HEIF image
6360 FOURCC("msf1"), // HEIF image sequence
6361 FOURCC("hevc"), // HEIF image sequence
6362 };
6363
6364 for (size_t i = 0;
6365 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6366 ++i) {
6367 if (kCompatibleBrands[i] == fourcc) {
6368 return true;
6369 }
6370 }
6371
6372 return false;
6373 }
6374
6375 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6376 // compatible brand is present.
6377 // Also try to identify where this file's metadata ends
6378 // (end of the 'moov' atom) and report it to the caller as part of
6379 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6380 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6381 // We scan up to 128 bytes to identify this file as an MP4.
6382 static const off64_t kMaxScanOffset = 128ll;
6383
6384 off64_t offset = 0ll;
6385 bool foundGoodFileType = false;
6386 off64_t moovAtomEndOffset = -1ll;
6387 bool done = false;
6388
6389 while (!done && offset < kMaxScanOffset) {
6390 uint32_t hdr[2];
6391 if (source->readAt(offset, hdr, 8) < 8) {
6392 return false;
6393 }
6394
6395 uint64_t chunkSize = ntohl(hdr[0]);
6396 uint32_t chunkType = ntohl(hdr[1]);
6397 off64_t chunkDataOffset = offset + 8;
6398
6399 if (chunkSize == 1) {
6400 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
6401 return false;
6402 }
6403
6404 chunkSize = ntoh64(chunkSize);
6405 chunkDataOffset += 8;
6406
6407 if (chunkSize < 16) {
6408 // The smallest valid chunk is 16 bytes long in this case.
6409 return false;
6410 }
6411
6412 } else if (chunkSize < 8) {
6413 // The smallest valid chunk is 8 bytes long.
6414 return false;
6415 }
6416
6417 // (data_offset - offset) is either 8 or 16
6418 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
6419 if (chunkDataSize < 0) {
6420 ALOGE("b/23540914");
6421 return false;
6422 }
6423
6424 char chunkstring[5];
6425 MakeFourCCString(chunkType, chunkstring);
6426 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
6427 chunkstring, chunkSize, (long long)offset);
6428 switch (chunkType) {
6429 case FOURCC("ftyp"):
6430 {
6431 if (chunkDataSize < 8) {
6432 return false;
6433 }
6434
6435 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
6436 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
6437 if (i == 1) {
6438 // Skip this index, it refers to the minorVersion,
6439 // not a brand.
6440 continue;
6441 }
6442
6443 uint32_t brand;
6444 if (source->readAt(
6445 chunkDataOffset + 4 * i, &brand, 4) < 4) {
6446 return false;
6447 }
6448
6449 brand = ntohl(brand);
6450
6451 if (isCompatibleBrand(brand)) {
6452 foundGoodFileType = true;
6453 break;
6454 }
6455 }
6456
6457 if (!foundGoodFileType) {
6458 return false;
6459 }
6460
6461 break;
6462 }
6463
6464 case FOURCC("moov"):
6465 {
6466 moovAtomEndOffset = offset + chunkSize;
6467
6468 done = true;
6469 break;
6470 }
6471
6472 default:
6473 break;
6474 }
6475
6476 offset += chunkSize;
6477 }
6478
6479 if (!foundGoodFileType) {
6480 return false;
6481 }
6482
6483 *confidence = 0.4f;
6484
6485 return true;
6486 }
6487
CreateExtractor(CDataSource * source,void *)6488 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
6489 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
6490 }
6491
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)6492 static CreatorFunc Sniff(
6493 CDataSource *source, float *confidence, void **,
6494 FreeMetaFunc *) {
6495 DataSourceHelper helper(source);
6496 if (BetterSniffMPEG4(&helper, confidence)) {
6497 return CreateExtractor;
6498 }
6499
6500 if (LegacySniffMPEG4(&helper, confidence)) {
6501 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
6502 return CreateExtractor;
6503 }
6504
6505 return NULL;
6506 }
6507
6508 static const char *extensions[] = {
6509 "3g2",
6510 "3ga",
6511 "3gp",
6512 "3gpp",
6513 "3gpp2",
6514 "m4a",
6515 "m4r",
6516 "m4v",
6517 "mov",
6518 "mp4",
6519 "qt",
6520 NULL
6521 };
6522
6523 extern "C" {
6524 // This is the only symbol that needs to be exported
6525 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()6526 ExtractorDef GETEXTRACTORDEF() {
6527 return {
6528 EXTRACTORDEF_VERSION,
6529 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
6530 2, // version
6531 "MP4 Extractor",
6532 { .v3 = {Sniff, extensions} },
6533 };
6534 }
6535
6536 } // extern "C"
6537
6538 } // namespace android
6539