1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <utils/Log.h>
28
29 #include "MPEG4Extractor.h"
30 #include "SampleTable.h"
31 #include "ItemTable.h"
32 #include "include/ESDS.h"
33
34 #include <media/ExtractorUtils.h>
35 #include <media/MediaTrack.h>
36 #include <media/stagefright/foundation/ABitReader.h>
37 #include <media/stagefright/foundation/ABuffer.h>
38 #include <media/stagefright/foundation/ADebug.h>
39 #include <media/stagefright/foundation/AMessage.h>
40 #include <media/stagefright/foundation/AUtils.h>
41 #include <media/stagefright/foundation/ByteUtils.h>
42 #include <media/stagefright/foundation/ColorUtils.h>
43 #include <media/stagefright/foundation/avc_utils.h>
44 #include <media/stagefright/foundation/hexdump.h>
45 #include <media/stagefright/MediaBufferBase.h>
46 #include <media/stagefright/MediaBufferGroup.h>
47 #include <media/stagefright/MediaDefs.h>
48 #include <media/stagefright/MetaData.h>
49 #include <utils/String8.h>
50
51 #include <byteswap.h>
52 #include "include/ID3.h"
53
54 #ifndef UINT32_MAX
55 #define UINT32_MAX (4294967295U)
56 #endif
57
58 namespace android {
59
60 enum {
61 // max track header chunk to return
62 kMaxTrackHeaderSize = 32,
63
64 // maximum size of an atom. Some atoms can be bigger according to the spec,
65 // but we only allow up to this size.
66 kMaxAtomSize = 64 * 1024 * 1024,
67 };
68
69 class MPEG4Source : public MediaTrack {
70 public:
71 // Caller retains ownership of both "dataSource" and "sampleTable".
72 MPEG4Source(MetaDataBase &format,
73 DataSourceBase *dataSource,
74 int32_t timeScale,
75 const sp<SampleTable> &sampleTable,
76 Vector<SidxEntry> &sidx,
77 const Trex *trex,
78 off64_t firstMoofOffset,
79 const sp<ItemTable> &itemTable);
80 virtual status_t init();
81
82 virtual status_t start(MetaDataBase *params = NULL);
83 virtual status_t stop();
84
85 virtual status_t getFormat(MetaDataBase &);
86
87 virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
supportNonblockingRead()88 virtual bool supportNonblockingRead() { return true; }
89 virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
90
91 virtual ~MPEG4Source();
92
93 private:
94 Mutex mLock;
95
96 MetaDataBase &mFormat;
97 DataSourceBase *mDataSource;
98 int32_t mTimescale;
99 sp<SampleTable> mSampleTable;
100 uint32_t mCurrentSampleIndex;
101 uint32_t mCurrentFragmentIndex;
102 Vector<SidxEntry> &mSegments;
103 const Trex *mTrex;
104 off64_t mFirstMoofOffset;
105 off64_t mCurrentMoofOffset;
106 off64_t mNextMoofOffset;
107 uint32_t mCurrentTime;
108 int32_t mLastParsedTrackId;
109 int32_t mTrackId;
110
111 int32_t mCryptoMode; // passed in from extractor
112 int32_t mDefaultIVSize; // passed in from extractor
113 uint8_t mCryptoKey[16]; // passed in from extractor
114 int32_t mDefaultEncryptedByteBlock;
115 int32_t mDefaultSkipByteBlock;
116 uint32_t mCurrentAuxInfoType;
117 uint32_t mCurrentAuxInfoTypeParameter;
118 int32_t mCurrentDefaultSampleInfoSize;
119 uint32_t mCurrentSampleInfoCount;
120 uint32_t mCurrentSampleInfoAllocSize;
121 uint8_t* mCurrentSampleInfoSizes;
122 uint32_t mCurrentSampleInfoOffsetCount;
123 uint32_t mCurrentSampleInfoOffsetsAllocSize;
124 uint64_t* mCurrentSampleInfoOffsets;
125
126 bool mIsAVC;
127 bool mIsHEVC;
128 size_t mNALLengthSize;
129
130 bool mStarted;
131
132 MediaBufferGroup *mGroup;
133
134 MediaBufferBase *mBuffer;
135
136 bool mWantsNALFragments;
137
138 uint8_t *mSrcBuffer;
139
140 bool mIsHeif;
141 sp<ItemTable> mItemTable;
142
143 size_t parseNALSize(const uint8_t *data) const;
144 status_t parseChunk(off64_t *offset);
145 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
146 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
147 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
148 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
149 status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
150 status_t parseSampleEncryption(off64_t offset);
151
152 struct TrackFragmentHeaderInfo {
153 enum Flags {
154 kBaseDataOffsetPresent = 0x01,
155 kSampleDescriptionIndexPresent = 0x02,
156 kDefaultSampleDurationPresent = 0x08,
157 kDefaultSampleSizePresent = 0x10,
158 kDefaultSampleFlagsPresent = 0x20,
159 kDurationIsEmpty = 0x10000,
160 };
161
162 uint32_t mTrackID;
163 uint32_t mFlags;
164 uint64_t mBaseDataOffset;
165 uint32_t mSampleDescriptionIndex;
166 uint32_t mDefaultSampleDuration;
167 uint32_t mDefaultSampleSize;
168 uint32_t mDefaultSampleFlags;
169
170 uint64_t mDataOffset;
171 };
172 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
173
174 struct Sample {
175 off64_t offset;
176 size_t size;
177 uint32_t duration;
178 int32_t compositionOffset;
179 uint8_t iv[16];
180 Vector<size_t> clearsizes;
181 Vector<size_t> encryptedsizes;
182 };
183 Vector<Sample> mCurrentSamples;
184
185 MPEG4Source(const MPEG4Source &);
186 MPEG4Source &operator=(const MPEG4Source &);
187 };
188
189 // This custom data source wraps an existing one and satisfies requests
190 // falling entirely within a cached range from the cache while forwarding
191 // all remaining requests to the wrapped datasource.
192 // This is used to cache the full sampletable metadata for a single track,
193 // possibly wrapping multiple times to cover all tracks, i.e.
194 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
195
196 struct CachedRangedDataSource : public DataSourceBase {
197 explicit CachedRangedDataSource(DataSourceBase *source);
198 virtual ~CachedRangedDataSource();
199
200 virtual status_t initCheck() const;
201 virtual ssize_t readAt(off64_t offset, void *data, size_t size);
202 virtual status_t getSize(off64_t *size);
203 virtual uint32_t flags();
204
205 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
206
207
208 private:
209 Mutex mLock;
210
211 DataSourceBase *mSource;
212 bool mOwnsDataSource;
213 off64_t mCachedOffset;
214 size_t mCachedSize;
215 uint8_t *mCache;
216
217 void clearCache();
218
219 CachedRangedDataSource(const CachedRangedDataSource &);
220 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
221 };
222
CachedRangedDataSource(DataSourceBase * source)223 CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
224 : mSource(source),
225 mOwnsDataSource(false),
226 mCachedOffset(0),
227 mCachedSize(0),
228 mCache(NULL) {
229 }
230
~CachedRangedDataSource()231 CachedRangedDataSource::~CachedRangedDataSource() {
232 clearCache();
233 if (mOwnsDataSource) {
234 delete (CachedRangedDataSource*)mSource;
235 }
236 }
237
clearCache()238 void CachedRangedDataSource::clearCache() {
239 if (mCache) {
240 free(mCache);
241 mCache = NULL;
242 }
243
244 mCachedOffset = 0;
245 mCachedSize = 0;
246 }
247
initCheck() const248 status_t CachedRangedDataSource::initCheck() const {
249 return mSource->initCheck();
250 }
251
readAt(off64_t offset,void * data,size_t size)252 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
253 Mutex::Autolock autoLock(mLock);
254
255 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
256 memcpy(data, &mCache[offset - mCachedOffset], size);
257 return size;
258 }
259
260 return mSource->readAt(offset, data, size);
261 }
262
getSize(off64_t * size)263 status_t CachedRangedDataSource::getSize(off64_t *size) {
264 return mSource->getSize(size);
265 }
266
flags()267 uint32_t CachedRangedDataSource::flags() {
268 return mSource->flags();
269 }
270
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)271 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
272 size_t size,
273 bool assumeSourceOwnershipOnSuccess) {
274 Mutex::Autolock autoLock(mLock);
275
276 clearCache();
277
278 mCache = (uint8_t *)malloc(size);
279
280 if (mCache == NULL) {
281 return -ENOMEM;
282 }
283
284 mCachedOffset = offset;
285 mCachedSize = size;
286
287 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
288
289 if (err < (ssize_t)size) {
290 clearCache();
291
292 return ERROR_IO;
293 }
294 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
295 return OK;
296 }
297
298 ////////////////////////////////////////////////////////////////////////////////
299
300 static const bool kUseHexDump = false;
301
FourCC2MIME(uint32_t fourcc)302 static const char *FourCC2MIME(uint32_t fourcc) {
303 switch (fourcc) {
304 case FOURCC('m', 'p', '4', 'a'):
305 return MEDIA_MIMETYPE_AUDIO_AAC;
306
307 case FOURCC('s', 'a', 'm', 'r'):
308 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
309
310 case FOURCC('s', 'a', 'w', 'b'):
311 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
312
313 case FOURCC('m', 'p', '4', 'v'):
314 return MEDIA_MIMETYPE_VIDEO_MPEG4;
315
316 case FOURCC('s', '2', '6', '3'):
317 case FOURCC('h', '2', '6', '3'):
318 case FOURCC('H', '2', '6', '3'):
319 return MEDIA_MIMETYPE_VIDEO_H263;
320
321 case FOURCC('a', 'v', 'c', '1'):
322 return MEDIA_MIMETYPE_VIDEO_AVC;
323
324 case FOURCC('h', 'v', 'c', '1'):
325 case FOURCC('h', 'e', 'v', '1'):
326 return MEDIA_MIMETYPE_VIDEO_HEVC;
327 default:
328 ALOGW("Unknown fourcc: %c%c%c%c",
329 (fourcc >> 24) & 0xff,
330 (fourcc >> 16) & 0xff,
331 (fourcc >> 8) & 0xff,
332 fourcc & 0xff
333 );
334 return "application/octet-stream";
335 }
336 }
337
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)338 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340 // AMR NB audio is always mono, 8kHz
341 *channels = 1;
342 *rate = 8000;
343 return true;
344 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345 // AMR WB audio is always mono, 16kHz
346 *channels = 1;
347 *rate = 16000;
348 return true;
349 }
350 return false;
351 }
352
MPEG4Extractor(DataSourceBase * source,const char * mime)353 MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
354 : mMoofOffset(0),
355 mMoofFound(false),
356 mMdatFound(false),
357 mDataSource(source),
358 mCachedSource(NULL),
359 mInitCheck(NO_INIT),
360 mHeaderTimescale(0),
361 mIsQT(false),
362 mIsHeif(false),
363 mHasMoovBox(false),
364 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
365 mFirstTrack(NULL),
366 mLastTrack(NULL) {
367 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
368 }
369
~MPEG4Extractor()370 MPEG4Extractor::~MPEG4Extractor() {
371 Track *track = mFirstTrack;
372 while (track) {
373 Track *next = track->next;
374
375 delete track;
376 track = next;
377 }
378 mFirstTrack = mLastTrack = NULL;
379
380 for (size_t i = 0; i < mPssh.size(); i++) {
381 delete [] mPssh[i].data;
382 }
383 mPssh.clear();
384
385 delete mCachedSource;
386 }
387
flags() const388 uint32_t MPEG4Extractor::flags() const {
389 return CAN_PAUSE |
390 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
391 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
392 }
393
getMetaData(MetaDataBase & meta)394 status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
395 status_t err;
396 if ((err = readMetaData()) != OK) {
397 return UNKNOWN_ERROR;
398 }
399 meta = mFileMetaData;
400 return OK;
401 }
402
countTracks()403 size_t MPEG4Extractor::countTracks() {
404 status_t err;
405 if ((err = readMetaData()) != OK) {
406 ALOGV("MPEG4Extractor::countTracks: no tracks");
407 return 0;
408 }
409
410 size_t n = 0;
411 Track *track = mFirstTrack;
412 while (track) {
413 ++n;
414 track = track->next;
415 }
416
417 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
418 return n;
419 }
420
getTrackMetaData(MetaDataBase & meta,size_t index,uint32_t flags)421 status_t MPEG4Extractor::getTrackMetaData(
422 MetaDataBase &meta,
423 size_t index, uint32_t flags) {
424 status_t err;
425 if ((err = readMetaData()) != OK) {
426 return UNKNOWN_ERROR;
427 }
428
429 Track *track = mFirstTrack;
430 while (index > 0) {
431 if (track == NULL) {
432 return UNKNOWN_ERROR;
433 }
434
435 track = track->next;
436 --index;
437 }
438
439 if (track == NULL) {
440 return UNKNOWN_ERROR;
441 }
442
443 [=] {
444 int64_t duration;
445 int32_t samplerate;
446 if (track->has_elst && mHeaderTimescale != 0 &&
447 track->meta.findInt64(kKeyDuration, &duration) &&
448 track->meta.findInt32(kKeySampleRate, &samplerate)) {
449
450 track->has_elst = false;
451
452 if (track->elst_segment_duration > INT64_MAX) {
453 return;
454 }
455 int64_t segment_duration = track->elst_segment_duration;
456 int64_t media_time = track->elst_media_time;
457 int64_t halfscale = mHeaderTimescale / 2;
458 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
459 ", halfscale = %" PRId64 ", timescale = %d",
460 segment_duration,
461 media_time,
462 halfscale,
463 mHeaderTimescale);
464
465 int64_t delay;
466 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
467 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
468 __builtin_add_overflow(delay, halfscale, &delay) ||
469 (delay /= mHeaderTimescale, false) ||
470 delay > INT32_MAX ||
471 delay < INT32_MIN) {
472 return;
473 }
474 ALOGV("delay = %" PRId64, delay);
475 track->meta.setInt32(kKeyEncoderDelay, delay);
476
477 int64_t scaled_duration;
478 // scaled_duration = duration * mHeaderTimescale;
479 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
480 return;
481 }
482 ALOGV("scaled_duration = %" PRId64, scaled_duration);
483
484 int64_t segment_end;
485 int64_t padding;
486 // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
487 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
488 __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
489 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
490 return;
491 }
492 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
493
494 if (padding < 0) {
495 // track duration from media header (which is what kKeyDuration is) might
496 // be slightly shorter than the segment duration, which would make the
497 // padding negative. Clamp to zero.
498 padding = 0;
499 }
500
501 int64_t paddingsamples;
502 int64_t halfscale_e6;
503 int64_t timescale_e6;
504 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
505 // / (mHeaderTimescale * 1000000);
506 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
507 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
508 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
509 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
510 (paddingsamples /= timescale_e6, false) ||
511 paddingsamples > INT32_MAX) {
512 return;
513 }
514 ALOGV("paddingsamples = %" PRId64, paddingsamples);
515 track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
516 }
517 }();
518
519 if ((flags & kIncludeExtensiveMetaData)
520 && !track->includes_expensive_metadata) {
521 track->includes_expensive_metadata = true;
522
523 const char *mime;
524 CHECK(track->meta.findCString(kKeyMIMEType, &mime));
525 if (!strncasecmp("video/", mime, 6)) {
526 // MPEG2 tracks do not provide CSD, so read the stream header
527 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
528 off64_t offset;
529 size_t size;
530 if (track->sampleTable->getMetaDataForSample(
531 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
532 if (size > kMaxTrackHeaderSize) {
533 size = kMaxTrackHeaderSize;
534 }
535 uint8_t header[kMaxTrackHeaderSize];
536 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
537 track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
538 }
539 }
540 }
541
542 if (mMoofOffset > 0) {
543 int64_t duration;
544 if (track->meta.findInt64(kKeyDuration, &duration)) {
545 // nothing fancy, just pick a frame near 1/4th of the duration
546 track->meta.setInt64(
547 kKeyThumbnailTime, duration / 4);
548 }
549 } else {
550 uint32_t sampleIndex;
551 uint32_t sampleTime;
552 if (track->timescale != 0 &&
553 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
554 && track->sampleTable->getMetaDataForSample(
555 sampleIndex, NULL /* offset */, NULL /* size */,
556 &sampleTime) == OK) {
557 track->meta.setInt64(
558 kKeyThumbnailTime,
559 ((int64_t)sampleTime * 1000000) / track->timescale);
560 }
561 }
562 }
563 }
564
565 meta = track->meta;
566 return OK;
567 }
568
readMetaData()569 status_t MPEG4Extractor::readMetaData() {
570 if (mInitCheck != NO_INIT) {
571 return mInitCheck;
572 }
573
574 off64_t offset = 0;
575 status_t err;
576 bool sawMoovOrSidx = false;
577
578 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
579 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
580 (mItemTable != NULL) && mItemTable->isValid()))) {
581 off64_t orig_offset = offset;
582 err = parseChunk(&offset, 0);
583
584 if (err != OK && err != UNKNOWN_ERROR) {
585 break;
586 } else if (offset <= orig_offset) {
587 // only continue parsing if the offset was advanced,
588 // otherwise we might end up in an infinite loop
589 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
590 err = ERROR_MALFORMED;
591 break;
592 } else if (err == UNKNOWN_ERROR) {
593 sawMoovOrSidx = true;
594 }
595 }
596
597 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
598 off64_t exifOffset;
599 size_t exifSize;
600 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
601 mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
602 mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
603 }
604 for (uint32_t imageIndex = 0;
605 imageIndex < mItemTable->countImages(); imageIndex++) {
606 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
607 if (meta == NULL) {
608 ALOGE("heif image %u has no meta!", imageIndex);
609 continue;
610 }
611 // Some heif files advertise image sequence brands (eg. 'hevc') in
612 // ftyp box, but don't have any valid tracks in them. Instead of
613 // reporting the entire file as malformed, we override the error
614 // to allow still images to be extracted.
615 if (err != OK) {
616 ALOGW("Extracting still images only");
617 err = OK;
618 }
619 mInitCheck = OK;
620
621 ALOGV("adding HEIF image track %u", imageIndex);
622 Track *track = new Track;
623 track->next = NULL;
624 if (mLastTrack != NULL) {
625 mLastTrack->next = track;
626 } else {
627 mFirstTrack = track;
628 }
629 mLastTrack = track;
630
631 track->meta = *(meta.get());
632 track->meta.setInt32(kKeyTrackID, imageIndex);
633 track->includes_expensive_metadata = false;
634 track->skipTrack = false;
635 track->timescale = 1000000;
636 }
637 }
638
639 if (mInitCheck == OK) {
640 if (findTrackByMimePrefix("video/") != NULL) {
641 mFileMetaData.setCString(
642 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
643 } else if (findTrackByMimePrefix("audio/") != NULL) {
644 mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
645 } else if (findTrackByMimePrefix(
646 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
647 mFileMetaData.setCString(
648 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
649 } else {
650 mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
651 }
652 } else {
653 mInitCheck = err;
654 }
655
656 CHECK_NE(err, (status_t)NO_INIT);
657
658 // copy pssh data into file metadata
659 uint64_t psshsize = 0;
660 for (size_t i = 0; i < mPssh.size(); i++) {
661 psshsize += 20 + mPssh[i].datalen;
662 }
663 if (psshsize > 0 && psshsize <= UINT32_MAX) {
664 char *buf = (char*)malloc(psshsize);
665 if (!buf) {
666 ALOGE("b/28471206");
667 return NO_MEMORY;
668 }
669 char *ptr = buf;
670 for (size_t i = 0; i < mPssh.size(); i++) {
671 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
672 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
673 ptr += (20 + mPssh[i].datalen);
674 }
675 mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
676 free(buf);
677 }
678
679 return mInitCheck;
680 }
681
682 struct PathAdder {
PathAdderandroid::PathAdder683 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
684 : mPath(path) {
685 mPath->push(chunkType);
686 }
687
~PathAdderandroid::PathAdder688 ~PathAdder() {
689 mPath->pop();
690 }
691
692 private:
693 Vector<uint32_t> *mPath;
694
695 PathAdder(const PathAdder &);
696 PathAdder &operator=(const PathAdder &);
697 };
698
underMetaDataPath(const Vector<uint32_t> & path)699 static bool underMetaDataPath(const Vector<uint32_t> &path) {
700 return path.size() >= 5
701 && path[0] == FOURCC('m', 'o', 'o', 'v')
702 && path[1] == FOURCC('u', 'd', 't', 'a')
703 && path[2] == FOURCC('m', 'e', 't', 'a')
704 && path[3] == FOURCC('i', 'l', 's', 't');
705 }
706
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)707 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
708 return path.size() >= 2
709 && path[0] == FOURCC('m', 'o', 'o', 'v')
710 && path[1] == FOURCC('m', 'e', 't', 'a')
711 && (depth == 2
712 || (depth == 3
713 && (path[2] == FOURCC('h', 'd', 'l', 'r')
714 || path[2] == FOURCC('i', 'l', 's', 't')
715 || path[2] == FOURCC('k', 'e', 'y', 's'))));
716 }
717
718 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)719 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
720 // delta between mpeg4 time and unix epoch time
721 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
722 if (time_1904 < INT64_MIN + delta) {
723 return false;
724 }
725 time_t time_1970 = time_1904 - delta;
726
727 char tmp[32];
728 struct tm* tm = gmtime(&time_1970);
729 if (tm != NULL &&
730 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
731 s->setTo(tmp);
732 return true;
733 }
734 return false;
735 }
736
parseChunk(off64_t * offset,int depth)737 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
738 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
739
740 if (*offset < 0) {
741 ALOGE("b/23540914");
742 return ERROR_MALFORMED;
743 }
744 if (depth > 100) {
745 ALOGE("b/27456299");
746 return ERROR_MALFORMED;
747 }
748 uint32_t hdr[2];
749 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
750 return ERROR_IO;
751 }
752 uint64_t chunk_size = ntohl(hdr[0]);
753 int32_t chunk_type = ntohl(hdr[1]);
754 off64_t data_offset = *offset + 8;
755
756 if (chunk_size == 1) {
757 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
758 return ERROR_IO;
759 }
760 chunk_size = ntoh64(chunk_size);
761 data_offset += 8;
762
763 if (chunk_size < 16) {
764 // The smallest valid chunk is 16 bytes long in this case.
765 return ERROR_MALFORMED;
766 }
767 } else if (chunk_size == 0) {
768 if (depth == 0) {
769 // atom extends to end of file
770 off64_t sourceSize;
771 if (mDataSource->getSize(&sourceSize) == OK) {
772 chunk_size = (sourceSize - *offset);
773 } else {
774 // XXX could we just pick a "sufficiently large" value here?
775 ALOGE("atom size is 0, and data source has no size");
776 return ERROR_MALFORMED;
777 }
778 } else {
779 // not allowed for non-toplevel atoms, skip it
780 *offset += 4;
781 return OK;
782 }
783 } else if (chunk_size < 8) {
784 // The smallest valid chunk is 8 bytes long.
785 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
786 return ERROR_MALFORMED;
787 }
788
789 char chunk[5];
790 MakeFourCCString(chunk_type, chunk);
791 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
792
793 if (kUseHexDump) {
794 static const char kWhitespace[] = " ";
795 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
796 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
797
798 char buffer[256];
799 size_t n = chunk_size;
800 if (n > sizeof(buffer)) {
801 n = sizeof(buffer);
802 }
803 if (mDataSource->readAt(*offset, buffer, n)
804 < (ssize_t)n) {
805 return ERROR_IO;
806 }
807
808 hexdump(buffer, n);
809 }
810
811 PathAdder autoAdder(&mPath, chunk_type);
812
813 // (data_offset - *offset) is either 8 or 16
814 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
815 if (chunk_data_size < 0) {
816 ALOGE("b/23540914");
817 return ERROR_MALFORMED;
818 }
819 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
820 char errMsg[100];
821 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
822 ALOGE("%s (b/28615448)", errMsg);
823 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
824 return ERROR_MALFORMED;
825 }
826
827 if (chunk_type != FOURCC('c', 'p', 'r', 't')
828 && chunk_type != FOURCC('c', 'o', 'v', 'r')
829 && mPath.size() == 5 && underMetaDataPath(mPath)) {
830 off64_t stop_offset = *offset + chunk_size;
831 *offset = data_offset;
832 while (*offset < stop_offset) {
833 status_t err = parseChunk(offset, depth + 1);
834 if (err != OK) {
835 return err;
836 }
837 }
838
839 if (*offset != stop_offset) {
840 return ERROR_MALFORMED;
841 }
842
843 return OK;
844 }
845
846 switch(chunk_type) {
847 case FOURCC('m', 'o', 'o', 'v'):
848 case FOURCC('t', 'r', 'a', 'k'):
849 case FOURCC('m', 'd', 'i', 'a'):
850 case FOURCC('m', 'i', 'n', 'f'):
851 case FOURCC('d', 'i', 'n', 'f'):
852 case FOURCC('s', 't', 'b', 'l'):
853 case FOURCC('m', 'v', 'e', 'x'):
854 case FOURCC('m', 'o', 'o', 'f'):
855 case FOURCC('t', 'r', 'a', 'f'):
856 case FOURCC('m', 'f', 'r', 'a'):
857 case FOURCC('u', 'd', 't', 'a'):
858 case FOURCC('i', 'l', 's', 't'):
859 case FOURCC('s', 'i', 'n', 'f'):
860 case FOURCC('s', 'c', 'h', 'i'):
861 case FOURCC('e', 'd', 't', 's'):
862 case FOURCC('w', 'a', 'v', 'e'):
863 {
864 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
865 ALOGE("moov: depth %d", depth);
866 return ERROR_MALFORMED;
867 }
868
869 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
870 ALOGE("duplicate moov");
871 return ERROR_MALFORMED;
872 }
873
874 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
875 // store the offset of the first segment
876 mMoofFound = true;
877 mMoofOffset = *offset;
878 }
879
880 if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
881 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
882
883 if (mDataSource->flags()
884 & (DataSourceBase::kWantsPrefetching
885 | DataSourceBase::kIsCachingDataSource)) {
886 CachedRangedDataSource *cachedSource =
887 new CachedRangedDataSource(mDataSource);
888
889 if (cachedSource->setCachedRange(
890 *offset, chunk_size,
891 mCachedSource != NULL /* assume ownership on success */) == OK) {
892 mDataSource = mCachedSource = cachedSource;
893 } else {
894 delete cachedSource;
895 }
896 }
897
898 if (mLastTrack == NULL) {
899 return ERROR_MALFORMED;
900 }
901
902 mLastTrack->sampleTable = new SampleTable(mDataSource);
903 }
904
905 bool isTrack = false;
906 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
907 if (depth != 1) {
908 ALOGE("trak: depth %d", depth);
909 return ERROR_MALFORMED;
910 }
911 isTrack = true;
912
913 ALOGV("adding new track");
914 Track *track = new Track;
915 track->next = NULL;
916 if (mLastTrack) {
917 mLastTrack->next = track;
918 } else {
919 mFirstTrack = track;
920 }
921 mLastTrack = track;
922
923 track->includes_expensive_metadata = false;
924 track->skipTrack = false;
925 track->timescale = 0;
926 track->meta.setCString(kKeyMIMEType, "application/octet-stream");
927 track->has_elst = false;
928 track->subsample_encryption = false;
929 }
930
931 off64_t stop_offset = *offset + chunk_size;
932 *offset = data_offset;
933 while (*offset < stop_offset) {
934 status_t err = parseChunk(offset, depth + 1);
935 if (err != OK) {
936 if (isTrack) {
937 mLastTrack->skipTrack = true;
938 break;
939 }
940 return err;
941 }
942 }
943
944 if (*offset != stop_offset) {
945 return ERROR_MALFORMED;
946 }
947
948 if (isTrack) {
949 int32_t trackId;
950 // There must be exact one track header per track.
951 if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
952 mLastTrack->skipTrack = true;
953 }
954
955 status_t err = verifyTrack(mLastTrack);
956 if (err != OK) {
957 mLastTrack->skipTrack = true;
958 }
959
960 if (mLastTrack->skipTrack) {
961 ALOGV("skipping this track...");
962 Track *cur = mFirstTrack;
963
964 if (cur == mLastTrack) {
965 delete cur;
966 mFirstTrack = mLastTrack = NULL;
967 } else {
968 while (cur && cur->next != mLastTrack) {
969 cur = cur->next;
970 }
971 if (cur) {
972 cur->next = NULL;
973 }
974 delete mLastTrack;
975 mLastTrack = cur;
976 }
977
978 return OK;
979 }
980 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
981 mInitCheck = OK;
982
983 return UNKNOWN_ERROR; // Return a dummy error.
984 }
985 break;
986 }
987
988 case FOURCC('s', 'c', 'h', 'm'):
989 {
990
991 *offset += chunk_size;
992 if (!mLastTrack) {
993 return ERROR_MALFORMED;
994 }
995
996 uint32_t scheme_type;
997 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
998 return ERROR_IO;
999 }
1000 scheme_type = ntohl(scheme_type);
1001 int32_t mode = kCryptoModeUnencrypted;
1002 switch(scheme_type) {
1003 case FOURCC('c', 'b', 'c', '1'):
1004 {
1005 mode = kCryptoModeAesCbc;
1006 break;
1007 }
1008 case FOURCC('c', 'b', 'c', 's'):
1009 {
1010 mode = kCryptoModeAesCbc;
1011 mLastTrack->subsample_encryption = true;
1012 break;
1013 }
1014 case FOURCC('c', 'e', 'n', 'c'):
1015 {
1016 mode = kCryptoModeAesCtr;
1017 break;
1018 }
1019 case FOURCC('c', 'e', 'n', 's'):
1020 {
1021 mode = kCryptoModeAesCtr;
1022 mLastTrack->subsample_encryption = true;
1023 break;
1024 }
1025 }
1026 if (mode != kCryptoModeUnencrypted) {
1027 mLastTrack->meta.setInt32(kKeyCryptoMode, mode);
1028 }
1029 break;
1030 }
1031
1032
1033 case FOURCC('e', 'l', 's', 't'):
1034 {
1035 *offset += chunk_size;
1036
1037 if (!mLastTrack) {
1038 return ERROR_MALFORMED;
1039 }
1040
1041 // See 14496-12 8.6.6
1042 uint8_t version;
1043 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1044 return ERROR_IO;
1045 }
1046
1047 uint32_t entry_count;
1048 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1049 return ERROR_IO;
1050 }
1051
1052 if (entry_count != 1) {
1053 // we only support a single entry at the moment, for gapless playback
1054 ALOGW("ignoring edit list with %d entries", entry_count);
1055 } else {
1056 off64_t entriesoffset = data_offset + 8;
1057 uint64_t segment_duration;
1058 int64_t media_time;
1059
1060 if (version == 1) {
1061 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1062 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1063 return ERROR_IO;
1064 }
1065 } else if (version == 0) {
1066 uint32_t sd;
1067 int32_t mt;
1068 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1069 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1070 return ERROR_IO;
1071 }
1072 segment_duration = sd;
1073 media_time = mt;
1074 } else {
1075 return ERROR_IO;
1076 }
1077
1078 // save these for later, because the elst atom might precede
1079 // the atoms that actually gives us the duration and sample rate
1080 // needed to calculate the padding and delay values
1081 mLastTrack->has_elst = true;
1082 mLastTrack->elst_media_time = media_time;
1083 mLastTrack->elst_segment_duration = segment_duration;
1084 }
1085 break;
1086 }
1087
1088 case FOURCC('f', 'r', 'm', 'a'):
1089 {
1090 *offset += chunk_size;
1091
1092 uint32_t original_fourcc;
1093 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1094 return ERROR_IO;
1095 }
1096 original_fourcc = ntohl(original_fourcc);
1097 ALOGV("read original format: %d", original_fourcc);
1098
1099 if (mLastTrack == NULL) {
1100 return ERROR_MALFORMED;
1101 }
1102
1103 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1104 uint32_t num_channels = 0;
1105 uint32_t sample_rate = 0;
1106 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1107 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1108 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1109 }
1110 break;
1111 }
1112
1113 case FOURCC('t', 'e', 'n', 'c'):
1114 {
1115 *offset += chunk_size;
1116
1117 if (chunk_size < 32) {
1118 return ERROR_MALFORMED;
1119 }
1120
1121 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1122 // default IV size, 16 bytes default KeyID
1123 // (ISO 23001-7)
1124
1125 uint8_t version;
1126 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1127 < (ssize_t)sizeof(version)) {
1128 return ERROR_IO;
1129 }
1130
1131 uint8_t buf[4];
1132 memset(buf, 0, 4);
1133 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1134 return ERROR_IO;
1135 }
1136
1137 if (mLastTrack == NULL) {
1138 return ERROR_MALFORMED;
1139 }
1140
1141 uint8_t defaultEncryptedByteBlock = 0;
1142 uint8_t defaultSkipByteBlock = 0;
1143 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1144 if (version == 1) {
1145 uint32_t pattern = buf[2];
1146 defaultEncryptedByteBlock = pattern >> 4;
1147 defaultSkipByteBlock = pattern & 0xf;
1148 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1149 // use (1,0) to mean "encrypt everything"
1150 defaultEncryptedByteBlock = 1;
1151 }
1152 } else if (mLastTrack->subsample_encryption) {
1153 ALOGW("subsample_encryption should be version 1");
1154 } else if (defaultAlgorithmId > 1) {
1155 // only 0 (clear) and 1 (AES-128) are valid
1156 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1157 defaultAlgorithmId = 1;
1158 }
1159
1160 memset(buf, 0, 4);
1161 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1162 return ERROR_IO;
1163 }
1164 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1165
1166 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1167 // only unencrypted data must have 0 IV size
1168 return ERROR_MALFORMED;
1169 } else if (defaultIVSize != 0 &&
1170 defaultIVSize != 8 &&
1171 defaultIVSize != 16) {
1172 return ERROR_MALFORMED;
1173 }
1174
1175 uint8_t defaultKeyId[16];
1176
1177 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1178 return ERROR_IO;
1179 }
1180
1181 sp<ABuffer> defaultConstantIv;
1182 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1183
1184 uint8_t ivlength;
1185 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1186 < (ssize_t)sizeof(ivlength)) {
1187 return ERROR_IO;
1188 }
1189
1190 if (ivlength != 8 && ivlength != 16) {
1191 ALOGW("unsupported IV length: %u", ivlength);
1192 return ERROR_MALFORMED;
1193 }
1194
1195 defaultConstantIv = new ABuffer(ivlength);
1196 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1197 < (ssize_t)ivlength) {
1198 return ERROR_IO;
1199 }
1200
1201 defaultConstantIv->setRange(0, ivlength);
1202 }
1203
1204 int32_t tmpAlgorithmId;
1205 if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) {
1206 mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
1207 }
1208
1209 mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1210 mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1211 mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock);
1212 mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock);
1213 if (defaultConstantIv != NULL) {
1214 mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size());
1215 }
1216 break;
1217 }
1218
1219 case FOURCC('t', 'k', 'h', 'd'):
1220 {
1221 *offset += chunk_size;
1222
1223 status_t err;
1224 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1225 return err;
1226 }
1227
1228 break;
1229 }
1230
1231 case FOURCC('t', 'r', 'e', 'f'):
1232 {
1233 off64_t stop_offset = *offset + chunk_size;
1234 *offset = data_offset;
1235 while (*offset < stop_offset) {
1236 status_t err = parseChunk(offset, depth + 1);
1237 if (err != OK) {
1238 return err;
1239 }
1240 }
1241 if (*offset != stop_offset) {
1242 return ERROR_MALFORMED;
1243 }
1244 break;
1245 }
1246
1247 case FOURCC('t', 'h', 'm', 'b'):
1248 {
1249 *offset += chunk_size;
1250
1251 if (mLastTrack != NULL) {
1252 // Skip thumbnail track for now since we don't have an
1253 // API to retrieve it yet.
1254 // The thumbnail track can't be accessed by negative index or time,
1255 // because each timed sample has its own corresponding thumbnail
1256 // in the thumbnail track. We'll need a dedicated API to retrieve
1257 // thumbnail at time instead.
1258 mLastTrack->skipTrack = true;
1259 }
1260
1261 break;
1262 }
1263
1264 case FOURCC('p', 's', 's', 'h'):
1265 {
1266 *offset += chunk_size;
1267
1268 PsshInfo pssh;
1269
1270 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1271 return ERROR_IO;
1272 }
1273
1274 uint32_t psshdatalen = 0;
1275 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1276 return ERROR_IO;
1277 }
1278 pssh.datalen = ntohl(psshdatalen);
1279 ALOGV("pssh data size: %d", pssh.datalen);
1280 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1281 // pssh data length exceeds size of containing box
1282 return ERROR_MALFORMED;
1283 }
1284
1285 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1286 if (pssh.data == NULL) {
1287 return ERROR_MALFORMED;
1288 }
1289 ALOGV("allocated pssh @ %p", pssh.data);
1290 ssize_t requested = (ssize_t) pssh.datalen;
1291 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1292 delete[] pssh.data;
1293 return ERROR_IO;
1294 }
1295 mPssh.push_back(pssh);
1296
1297 break;
1298 }
1299
1300 case FOURCC('m', 'd', 'h', 'd'):
1301 {
1302 *offset += chunk_size;
1303
1304 if (chunk_data_size < 4 || mLastTrack == NULL) {
1305 return ERROR_MALFORMED;
1306 }
1307
1308 uint8_t version;
1309 if (mDataSource->readAt(
1310 data_offset, &version, sizeof(version))
1311 < (ssize_t)sizeof(version)) {
1312 return ERROR_IO;
1313 }
1314
1315 off64_t timescale_offset;
1316
1317 if (version == 1) {
1318 timescale_offset = data_offset + 4 + 16;
1319 } else if (version == 0) {
1320 timescale_offset = data_offset + 4 + 8;
1321 } else {
1322 return ERROR_IO;
1323 }
1324
1325 uint32_t timescale;
1326 if (mDataSource->readAt(
1327 timescale_offset, ×cale, sizeof(timescale))
1328 < (ssize_t)sizeof(timescale)) {
1329 return ERROR_IO;
1330 }
1331
1332 if (!timescale) {
1333 ALOGE("timescale should not be ZERO.");
1334 return ERROR_MALFORMED;
1335 }
1336
1337 mLastTrack->timescale = ntohl(timescale);
1338
1339 // 14496-12 says all ones means indeterminate, but some files seem to use
1340 // 0 instead. We treat both the same.
1341 int64_t duration = 0;
1342 if (version == 1) {
1343 if (mDataSource->readAt(
1344 timescale_offset + 4, &duration, sizeof(duration))
1345 < (ssize_t)sizeof(duration)) {
1346 return ERROR_IO;
1347 }
1348 if (duration != -1) {
1349 duration = ntoh64(duration);
1350 }
1351 } else {
1352 uint32_t duration32;
1353 if (mDataSource->readAt(
1354 timescale_offset + 4, &duration32, sizeof(duration32))
1355 < (ssize_t)sizeof(duration32)) {
1356 return ERROR_IO;
1357 }
1358 if (duration32 != 0xffffffff) {
1359 duration = ntohl(duration32);
1360 }
1361 }
1362 if (duration != 0 && mLastTrack->timescale != 0) {
1363 mLastTrack->meta.setInt64(
1364 kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1365 }
1366
1367 uint8_t lang[2];
1368 off64_t lang_offset;
1369 if (version == 1) {
1370 lang_offset = timescale_offset + 4 + 8;
1371 } else if (version == 0) {
1372 lang_offset = timescale_offset + 4 + 4;
1373 } else {
1374 return ERROR_IO;
1375 }
1376
1377 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1378 < (ssize_t)sizeof(lang)) {
1379 return ERROR_IO;
1380 }
1381
1382 // To get the ISO-639-2/T three character language code
1383 // 1 bit pad followed by 3 5-bits characters. Each character
1384 // is packed as the difference between its ASCII value and 0x60.
1385 char lang_code[4];
1386 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1387 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1388 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1389 lang_code[3] = '\0';
1390
1391 mLastTrack->meta.setCString(
1392 kKeyMediaLanguage, lang_code);
1393
1394 break;
1395 }
1396
1397 case FOURCC('s', 't', 's', 'd'):
1398 {
1399 uint8_t buffer[8];
1400 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1401 return ERROR_MALFORMED;
1402 }
1403
1404 if (mDataSource->readAt(
1405 data_offset, buffer, 8) < 8) {
1406 return ERROR_IO;
1407 }
1408
1409 if (U32_AT(buffer) != 0) {
1410 // Should be version 0, flags 0.
1411 return ERROR_MALFORMED;
1412 }
1413
1414 uint32_t entry_count = U32_AT(&buffer[4]);
1415
1416 if (entry_count > 1) {
1417 // For 3GPP timed text, there could be multiple tx3g boxes contain
1418 // multiple text display formats. These formats will be used to
1419 // display the timed text.
1420 // For encrypted files, there may also be more than one entry.
1421 const char *mime;
1422
1423 if (mLastTrack == NULL)
1424 return ERROR_MALFORMED;
1425
1426 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1427 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1428 strcasecmp(mime, "application/octet-stream")) {
1429 // For now we only support a single type of media per track.
1430 mLastTrack->skipTrack = true;
1431 *offset += chunk_size;
1432 break;
1433 }
1434 }
1435 off64_t stop_offset = *offset + chunk_size;
1436 *offset = data_offset + 8;
1437 for (uint32_t i = 0; i < entry_count; ++i) {
1438 status_t err = parseChunk(offset, depth + 1);
1439 if (err != OK) {
1440 return err;
1441 }
1442 }
1443
1444 if (*offset != stop_offset) {
1445 return ERROR_MALFORMED;
1446 }
1447 break;
1448 }
1449 case FOURCC('m', 'e', 't', 't'):
1450 {
1451 *offset += chunk_size;
1452
1453 if (mLastTrack == NULL)
1454 return ERROR_MALFORMED;
1455
1456 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1457 if (buffer.get() == NULL) {
1458 return NO_MEMORY;
1459 }
1460
1461 if (mDataSource->readAt(
1462 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1463 return ERROR_IO;
1464 }
1465
1466 String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1467 mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
1468
1469 break;
1470 }
1471
1472 case FOURCC('m', 'p', '4', 'a'):
1473 case FOURCC('e', 'n', 'c', 'a'):
1474 case FOURCC('s', 'a', 'm', 'r'):
1475 case FOURCC('s', 'a', 'w', 'b'):
1476 {
1477 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1478 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1479 // Ignore mp4a embedded in QT wave atom
1480 *offset += chunk_size;
1481 break;
1482 }
1483
1484 uint8_t buffer[8 + 20];
1485 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1486 // Basic AudioSampleEntry size.
1487 return ERROR_MALFORMED;
1488 }
1489
1490 if (mDataSource->readAt(
1491 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1492 return ERROR_IO;
1493 }
1494
1495 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1496 uint16_t version = U16_AT(&buffer[8]);
1497 uint32_t num_channels = U16_AT(&buffer[16]);
1498
1499 uint16_t sample_size = U16_AT(&buffer[18]);
1500 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1501
1502 if (mLastTrack == NULL)
1503 return ERROR_MALFORMED;
1504
1505 off64_t stop_offset = *offset + chunk_size;
1506 *offset = data_offset + sizeof(buffer);
1507
1508 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1509 if (version == 1) {
1510 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1511 return ERROR_IO;
1512 }
1513
1514 #if 0
1515 U32_AT(buffer); // samples per packet
1516 U32_AT(&buffer[4]); // bytes per packet
1517 U32_AT(&buffer[8]); // bytes per frame
1518 U32_AT(&buffer[12]); // bytes per sample
1519 #endif
1520 *offset += 16;
1521 } else if (version == 2) {
1522 uint8_t v2buffer[36];
1523 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1524 return ERROR_IO;
1525 }
1526
1527 #if 0
1528 U32_AT(v2buffer); // size of struct only
1529 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1530 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1531 U32_AT(&v2buffer[16]); // always 0x7f000000
1532 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1533 U32_AT(&v2buffer[24]); // format specifc flags
1534 U32_AT(&v2buffer[28]); // const bytes per audio packet
1535 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1536 #endif
1537 *offset += 36;
1538 }
1539 }
1540
1541 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1542 // if the chunk type is enca, we'll get the type from the frma box later
1543 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1544 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1545 }
1546 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1547 chunk, num_channels, sample_size, sample_rate);
1548 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1549 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1550
1551 while (*offset < stop_offset) {
1552 status_t err = parseChunk(offset, depth + 1);
1553 if (err != OK) {
1554 return err;
1555 }
1556 }
1557
1558 if (*offset != stop_offset) {
1559 return ERROR_MALFORMED;
1560 }
1561 break;
1562 }
1563
1564 case FOURCC('m', 'p', '4', 'v'):
1565 case FOURCC('e', 'n', 'c', 'v'):
1566 case FOURCC('s', '2', '6', '3'):
1567 case FOURCC('H', '2', '6', '3'):
1568 case FOURCC('h', '2', '6', '3'):
1569 case FOURCC('a', 'v', 'c', '1'):
1570 case FOURCC('h', 'v', 'c', '1'):
1571 case FOURCC('h', 'e', 'v', '1'):
1572 {
1573 uint8_t buffer[78];
1574 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1575 // Basic VideoSampleEntry size.
1576 return ERROR_MALFORMED;
1577 }
1578
1579 if (mDataSource->readAt(
1580 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1581 return ERROR_IO;
1582 }
1583
1584 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1585 uint16_t width = U16_AT(&buffer[6 + 18]);
1586 uint16_t height = U16_AT(&buffer[6 + 20]);
1587
1588 // The video sample is not standard-compliant if it has invalid dimension.
1589 // Use some default width and height value, and
1590 // let the decoder figure out the actual width and height (and thus
1591 // be prepared for INFO_FOMRAT_CHANGED event).
1592 if (width == 0) width = 352;
1593 if (height == 0) height = 288;
1594
1595 // printf("*** coding='%s' width=%d height=%d\n",
1596 // chunk, width, height);
1597
1598 if (mLastTrack == NULL)
1599 return ERROR_MALFORMED;
1600
1601 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1602 // if the chunk type is encv, we'll get the type from the frma box later
1603 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1604 }
1605 mLastTrack->meta.setInt32(kKeyWidth, width);
1606 mLastTrack->meta.setInt32(kKeyHeight, height);
1607
1608 off64_t stop_offset = *offset + chunk_size;
1609 *offset = data_offset + sizeof(buffer);
1610 while (*offset < stop_offset) {
1611 status_t err = parseChunk(offset, depth + 1);
1612 if (err != OK) {
1613 return err;
1614 }
1615 }
1616
1617 if (*offset != stop_offset) {
1618 return ERROR_MALFORMED;
1619 }
1620 break;
1621 }
1622
1623 case FOURCC('s', 't', 'c', 'o'):
1624 case FOURCC('c', 'o', '6', '4'):
1625 {
1626 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1627 return ERROR_MALFORMED;
1628 }
1629
1630 status_t err =
1631 mLastTrack->sampleTable->setChunkOffsetParams(
1632 chunk_type, data_offset, chunk_data_size);
1633
1634 *offset += chunk_size;
1635
1636 if (err != OK) {
1637 return err;
1638 }
1639
1640 break;
1641 }
1642
1643 case FOURCC('s', 't', 's', 'c'):
1644 {
1645 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1646 return ERROR_MALFORMED;
1647
1648 status_t err =
1649 mLastTrack->sampleTable->setSampleToChunkParams(
1650 data_offset, chunk_data_size);
1651
1652 *offset += chunk_size;
1653
1654 if (err != OK) {
1655 return err;
1656 }
1657
1658 break;
1659 }
1660
1661 case FOURCC('s', 't', 's', 'z'):
1662 case FOURCC('s', 't', 'z', '2'):
1663 {
1664 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1665 return ERROR_MALFORMED;
1666 }
1667
1668 status_t err =
1669 mLastTrack->sampleTable->setSampleSizeParams(
1670 chunk_type, data_offset, chunk_data_size);
1671
1672 *offset += chunk_size;
1673
1674 if (err != OK) {
1675 return err;
1676 }
1677
1678 size_t max_size;
1679 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1680
1681 if (err != OK) {
1682 return err;
1683 }
1684
1685 if (max_size != 0) {
1686 // Assume that a given buffer only contains at most 10 chunks,
1687 // each chunk originally prefixed with a 2 byte length will
1688 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1689 // and thus will grow by 2 bytes per chunk.
1690 if (max_size > SIZE_MAX - 10 * 2) {
1691 ALOGE("max sample size too big: %zu", max_size);
1692 return ERROR_MALFORMED;
1693 }
1694 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1695 } else {
1696 // No size was specified. Pick a conservatively large size.
1697 uint32_t width, height;
1698 if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
1699 !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
1700 ALOGE("No width or height, assuming worst case 1080p");
1701 width = 1920;
1702 height = 1080;
1703 } else {
1704 // A resolution was specified, check that it's not too big. The values below
1705 // were chosen so that the calculations below don't cause overflows, they're
1706 // not indicating that resolutions up to 32kx32k are actually supported.
1707 if (width > 32768 || height > 32768) {
1708 ALOGE("can't support %u x %u video", width, height);
1709 return ERROR_MALFORMED;
1710 }
1711 }
1712
1713 const char *mime;
1714 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1715 if (!strncmp(mime, "audio/", 6)) {
1716 // for audio, use 128KB
1717 max_size = 1024 * 128;
1718 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1719 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1720 // AVC & HEVC requires compression ratio of at least 2, and uses
1721 // macroblocks
1722 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1723 } else {
1724 // For all other formats there is no minimum compression
1725 // ratio. Use compression ratio of 1.
1726 max_size = width * height * 3 / 2;
1727 }
1728 // HACK: allow 10% overhead
1729 // TODO: read sample size from traf atom for fragmented MPEG4.
1730 max_size += max_size / 10;
1731 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
1732 }
1733
1734 // NOTE: setting another piece of metadata invalidates any pointers (such as the
1735 // mimetype) previously obtained, so don't cache them.
1736 const char *mime;
1737 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1738 // Calculate average frame rate.
1739 if (!strncasecmp("video/", mime, 6)) {
1740 size_t nSamples = mLastTrack->sampleTable->countSamples();
1741 if (nSamples == 0) {
1742 int32_t trackId;
1743 if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
1744 for (size_t i = 0; i < mTrex.size(); i++) {
1745 Trex *t = &mTrex.editItemAt(i);
1746 if (t->track_ID == (uint32_t) trackId) {
1747 if (t->default_sample_duration > 0) {
1748 int32_t frameRate =
1749 mLastTrack->timescale / t->default_sample_duration;
1750 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1751 }
1752 break;
1753 }
1754 }
1755 }
1756 } else {
1757 int64_t durationUs;
1758 if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
1759 if (durationUs > 0) {
1760 int32_t frameRate = (nSamples * 1000000LL +
1761 (durationUs >> 1)) / durationUs;
1762 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1763 }
1764 }
1765 ALOGV("setting frame count %zu", nSamples);
1766 mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
1767 }
1768 }
1769
1770 break;
1771 }
1772
1773 case FOURCC('s', 't', 't', 's'):
1774 {
1775 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1776 return ERROR_MALFORMED;
1777
1778 *offset += chunk_size;
1779
1780 status_t err =
1781 mLastTrack->sampleTable->setTimeToSampleParams(
1782 data_offset, chunk_data_size);
1783
1784 if (err != OK) {
1785 return err;
1786 }
1787
1788 break;
1789 }
1790
1791 case FOURCC('c', 't', 't', 's'):
1792 {
1793 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1794 return ERROR_MALFORMED;
1795
1796 *offset += chunk_size;
1797
1798 status_t err =
1799 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1800 data_offset, chunk_data_size);
1801
1802 if (err != OK) {
1803 return err;
1804 }
1805
1806 break;
1807 }
1808
1809 case FOURCC('s', 't', 's', 's'):
1810 {
1811 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1812 return ERROR_MALFORMED;
1813
1814 *offset += chunk_size;
1815
1816 status_t err =
1817 mLastTrack->sampleTable->setSyncSampleParams(
1818 data_offset, chunk_data_size);
1819
1820 if (err != OK) {
1821 return err;
1822 }
1823
1824 break;
1825 }
1826
1827 // \xA9xyz
1828 case FOURCC(0xA9, 'x', 'y', 'z'):
1829 {
1830 *offset += chunk_size;
1831
1832 // Best case the total data length inside "\xA9xyz" box would
1833 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
1834 // where "\x00\x05" is the text string length with value = 5,
1835 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
1836 // location (string) value with longitude = 0 and latitude = 0.
1837 // Since some devices encountered in the wild omit the trailing
1838 // slash, we'll allow that.
1839 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
1840 return ERROR_MALFORMED;
1841 }
1842
1843 uint16_t len;
1844 if (!mDataSource->getUInt16(data_offset, &len)) {
1845 return ERROR_IO;
1846 }
1847
1848 // allow "+0+0" without trailing slash
1849 if (len < 4 || len > chunk_data_size - 4) {
1850 return ERROR_MALFORMED;
1851 }
1852 // The location string following the language code is formatted
1853 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
1854 // Allocate 2 extra bytes, in case we need to add a trailing slash,
1855 // and to add a terminating 0.
1856 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
1857 if (!buffer) {
1858 return NO_MEMORY;
1859 }
1860
1861 if (mDataSource->readAt(
1862 data_offset + 4, &buffer[0], len) < len) {
1863 return ERROR_IO;
1864 }
1865
1866 len = strlen(&buffer[0]);
1867 if (len < 4) {
1868 return ERROR_MALFORMED;
1869 }
1870 // Add a trailing slash if there wasn't one.
1871 if (buffer[len - 1] != '/') {
1872 buffer[len] = '/';
1873 }
1874 mFileMetaData.setCString(kKeyLocation, &buffer[0]);
1875 break;
1876 }
1877
1878 case FOURCC('e', 's', 'd', 's'):
1879 {
1880 *offset += chunk_size;
1881
1882 if (chunk_data_size < 4) {
1883 return ERROR_MALFORMED;
1884 }
1885
1886 uint8_t buffer[256];
1887 if (chunk_data_size > (off64_t)sizeof(buffer)) {
1888 return ERROR_BUFFER_TOO_SMALL;
1889 }
1890
1891 if (mDataSource->readAt(
1892 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1893 return ERROR_IO;
1894 }
1895
1896 if (U32_AT(buffer) != 0) {
1897 // Should be version 0, flags 0.
1898 return ERROR_MALFORMED;
1899 }
1900
1901 if (mLastTrack == NULL)
1902 return ERROR_MALFORMED;
1903
1904 mLastTrack->meta.setData(
1905 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1906
1907 if (mPath.size() >= 2
1908 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1909 // Information from the ESDS must be relied on for proper
1910 // setup of sample rate and channel count for MPEG4 Audio.
1911 // The generic header appears to only contain generic
1912 // information...
1913
1914 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1915 &buffer[4], chunk_data_size - 4);
1916
1917 if (err != OK) {
1918 return err;
1919 }
1920 }
1921 if (mPath.size() >= 2
1922 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1923 // Check if the video is MPEG2
1924 ESDS esds(&buffer[4], chunk_data_size - 4);
1925
1926 uint8_t objectTypeIndication;
1927 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1928 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1929 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1930 }
1931 }
1932 }
1933 break;
1934 }
1935
1936 case FOURCC('b', 't', 'r', 't'):
1937 {
1938 *offset += chunk_size;
1939 if (mLastTrack == NULL) {
1940 return ERROR_MALFORMED;
1941 }
1942
1943 uint8_t buffer[12];
1944 if (chunk_data_size != sizeof(buffer)) {
1945 return ERROR_MALFORMED;
1946 }
1947
1948 if (mDataSource->readAt(
1949 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1950 return ERROR_IO;
1951 }
1952
1953 uint32_t maxBitrate = U32_AT(&buffer[4]);
1954 uint32_t avgBitrate = U32_AT(&buffer[8]);
1955 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1956 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1957 }
1958 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1959 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
1960 }
1961 break;
1962 }
1963
1964 case FOURCC('a', 'v', 'c', 'C'):
1965 {
1966 *offset += chunk_size;
1967
1968 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1969
1970 if (buffer.get() == NULL) {
1971 ALOGE("b/28471206");
1972 return NO_MEMORY;
1973 }
1974
1975 if (mDataSource->readAt(
1976 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1977 return ERROR_IO;
1978 }
1979
1980 if (mLastTrack == NULL)
1981 return ERROR_MALFORMED;
1982
1983 mLastTrack->meta.setData(
1984 kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
1985
1986 break;
1987 }
1988 case FOURCC('h', 'v', 'c', 'C'):
1989 {
1990 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1991
1992 if (buffer.get() == NULL) {
1993 ALOGE("b/28471206");
1994 return NO_MEMORY;
1995 }
1996
1997 if (mDataSource->readAt(
1998 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1999 return ERROR_IO;
2000 }
2001
2002 if (mLastTrack == NULL)
2003 return ERROR_MALFORMED;
2004
2005 mLastTrack->meta.setData(
2006 kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
2007
2008 *offset += chunk_size;
2009 break;
2010 }
2011
2012 case FOURCC('d', '2', '6', '3'):
2013 {
2014 *offset += chunk_size;
2015 /*
2016 * d263 contains a fixed 7 bytes part:
2017 * vendor - 4 bytes
2018 * version - 1 byte
2019 * level - 1 byte
2020 * profile - 1 byte
2021 * optionally, "d263" box itself may contain a 16-byte
2022 * bit rate box (bitr)
2023 * average bit rate - 4 bytes
2024 * max bit rate - 4 bytes
2025 */
2026 char buffer[23];
2027 if (chunk_data_size != 7 &&
2028 chunk_data_size != 23) {
2029 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2030 return ERROR_MALFORMED;
2031 }
2032
2033 if (mDataSource->readAt(
2034 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2035 return ERROR_IO;
2036 }
2037
2038 if (mLastTrack == NULL)
2039 return ERROR_MALFORMED;
2040
2041 mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
2042
2043 break;
2044 }
2045
2046 case FOURCC('m', 'e', 't', 'a'):
2047 {
2048 off64_t stop_offset = *offset + chunk_size;
2049 *offset = data_offset;
2050 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2051 if (!isParsingMetaKeys) {
2052 uint8_t buffer[4];
2053 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2054 *offset = stop_offset;
2055 return ERROR_MALFORMED;
2056 }
2057
2058 if (mDataSource->readAt(
2059 data_offset, buffer, 4) < 4) {
2060 *offset = stop_offset;
2061 return ERROR_IO;
2062 }
2063
2064 if (U32_AT(buffer) != 0) {
2065 // Should be version 0, flags 0.
2066
2067 // If it's not, let's assume this is one of those
2068 // apparently malformed chunks that don't have flags
2069 // and completely different semantics than what's
2070 // in the MPEG4 specs and skip it.
2071 *offset = stop_offset;
2072 return OK;
2073 }
2074 *offset += sizeof(buffer);
2075 }
2076
2077 while (*offset < stop_offset) {
2078 status_t err = parseChunk(offset, depth + 1);
2079 if (err != OK) {
2080 return err;
2081 }
2082 }
2083
2084 if (*offset != stop_offset) {
2085 return ERROR_MALFORMED;
2086 }
2087 break;
2088 }
2089
2090 case FOURCC('i', 'l', 'o', 'c'):
2091 case FOURCC('i', 'i', 'n', 'f'):
2092 case FOURCC('i', 'p', 'r', 'p'):
2093 case FOURCC('p', 'i', 't', 'm'):
2094 case FOURCC('i', 'd', 'a', 't'):
2095 case FOURCC('i', 'r', 'e', 'f'):
2096 case FOURCC('i', 'p', 'r', 'o'):
2097 {
2098 if (mIsHeif) {
2099 if (mItemTable == NULL) {
2100 mItemTable = new ItemTable(mDataSource);
2101 }
2102 status_t err = mItemTable->parse(
2103 chunk_type, data_offset, chunk_data_size);
2104 if (err != OK) {
2105 return err;
2106 }
2107 }
2108 *offset += chunk_size;
2109 break;
2110 }
2111
2112 case FOURCC('m', 'e', 'a', 'n'):
2113 case FOURCC('n', 'a', 'm', 'e'):
2114 case FOURCC('d', 'a', 't', 'a'):
2115 {
2116 *offset += chunk_size;
2117
2118 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2119 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2120
2121 if (err != OK) {
2122 return err;
2123 }
2124 }
2125
2126 break;
2127 }
2128
2129 case FOURCC('m', 'v', 'h', 'd'):
2130 {
2131 *offset += chunk_size;
2132
2133 if (depth != 1) {
2134 ALOGE("mvhd: depth %d", depth);
2135 return ERROR_MALFORMED;
2136 }
2137 if (chunk_data_size < 32) {
2138 return ERROR_MALFORMED;
2139 }
2140
2141 uint8_t header[32];
2142 if (mDataSource->readAt(
2143 data_offset, header, sizeof(header))
2144 < (ssize_t)sizeof(header)) {
2145 return ERROR_IO;
2146 }
2147
2148 uint64_t creationTime;
2149 uint64_t duration = 0;
2150 if (header[0] == 1) {
2151 creationTime = U64_AT(&header[4]);
2152 mHeaderTimescale = U32_AT(&header[20]);
2153 duration = U64_AT(&header[24]);
2154 if (duration == 0xffffffffffffffff) {
2155 duration = 0;
2156 }
2157 } else if (header[0] != 0) {
2158 return ERROR_MALFORMED;
2159 } else {
2160 creationTime = U32_AT(&header[4]);
2161 mHeaderTimescale = U32_AT(&header[12]);
2162 uint32_t d32 = U32_AT(&header[16]);
2163 if (d32 == 0xffffffff) {
2164 d32 = 0;
2165 }
2166 duration = d32;
2167 }
2168 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2169 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2170 }
2171
2172 String8 s;
2173 if (convertTimeToDate(creationTime, &s)) {
2174 mFileMetaData.setCString(kKeyDate, s.string());
2175 }
2176
2177
2178 break;
2179 }
2180
2181 case FOURCC('m', 'e', 'h', 'd'):
2182 {
2183 *offset += chunk_size;
2184
2185 if (chunk_data_size < 8) {
2186 return ERROR_MALFORMED;
2187 }
2188
2189 uint8_t flags[4];
2190 if (mDataSource->readAt(
2191 data_offset, flags, sizeof(flags))
2192 < (ssize_t)sizeof(flags)) {
2193 return ERROR_IO;
2194 }
2195
2196 uint64_t duration = 0;
2197 if (flags[0] == 1) {
2198 // 64 bit
2199 if (chunk_data_size < 12) {
2200 return ERROR_MALFORMED;
2201 }
2202 mDataSource->getUInt64(data_offset + 4, &duration);
2203 if (duration == 0xffffffffffffffff) {
2204 duration = 0;
2205 }
2206 } else if (flags[0] == 0) {
2207 // 32 bit
2208 uint32_t d32;
2209 mDataSource->getUInt32(data_offset + 4, &d32);
2210 if (d32 == 0xffffffff) {
2211 d32 = 0;
2212 }
2213 duration = d32;
2214 } else {
2215 return ERROR_MALFORMED;
2216 }
2217
2218 if (duration != 0 && mHeaderTimescale != 0) {
2219 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2220 }
2221
2222 break;
2223 }
2224
2225 case FOURCC('m', 'd', 'a', 't'):
2226 {
2227 mMdatFound = true;
2228
2229 *offset += chunk_size;
2230 break;
2231 }
2232
2233 case FOURCC('h', 'd', 'l', 'r'):
2234 {
2235 *offset += chunk_size;
2236
2237 if (underQTMetaPath(mPath, 3)) {
2238 break;
2239 }
2240
2241 uint32_t buffer;
2242 if (mDataSource->readAt(
2243 data_offset + 8, &buffer, 4) < 4) {
2244 return ERROR_IO;
2245 }
2246
2247 uint32_t type = ntohl(buffer);
2248 // For the 3GPP file format, the handler-type within the 'hdlr' box
2249 // shall be 'text'. We also want to support 'sbtl' handler type
2250 // for a practical reason as various MPEG4 containers use it.
2251 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2252 if (mLastTrack != NULL) {
2253 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2254 }
2255 }
2256
2257 break;
2258 }
2259
2260 case FOURCC('k', 'e', 'y', 's'):
2261 {
2262 *offset += chunk_size;
2263
2264 if (underQTMetaPath(mPath, 3)) {
2265 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2266 if (err != OK) {
2267 return err;
2268 }
2269 }
2270 break;
2271 }
2272
2273 case FOURCC('t', 'r', 'e', 'x'):
2274 {
2275 *offset += chunk_size;
2276
2277 if (chunk_data_size < 24) {
2278 return ERROR_IO;
2279 }
2280 Trex trex;
2281 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2282 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2283 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2284 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2285 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2286 return ERROR_IO;
2287 }
2288 mTrex.add(trex);
2289 break;
2290 }
2291
2292 case FOURCC('t', 'x', '3', 'g'):
2293 {
2294 if (mLastTrack == NULL)
2295 return ERROR_MALFORMED;
2296
2297 uint32_t type;
2298 const void *data;
2299 size_t size = 0;
2300 if (!mLastTrack->meta.findData(
2301 kKeyTextFormatData, &type, &data, &size)) {
2302 size = 0;
2303 }
2304
2305 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2306 return ERROR_MALFORMED;
2307 }
2308
2309 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2310 if (buffer == NULL) {
2311 return ERROR_MALFORMED;
2312 }
2313
2314 if (size > 0) {
2315 memcpy(buffer, data, size);
2316 }
2317
2318 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2319 < chunk_size) {
2320 delete[] buffer;
2321 buffer = NULL;
2322
2323 // advance read pointer so we don't end up reading this again
2324 *offset += chunk_size;
2325 return ERROR_IO;
2326 }
2327
2328 mLastTrack->meta.setData(
2329 kKeyTextFormatData, 0, buffer, size + chunk_size);
2330
2331 delete[] buffer;
2332
2333 *offset += chunk_size;
2334 break;
2335 }
2336
2337 case FOURCC('c', 'o', 'v', 'r'):
2338 {
2339 *offset += chunk_size;
2340
2341 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2342 chunk_data_size, data_offset);
2343
2344 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2345 return ERROR_MALFORMED;
2346 }
2347 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2348 if (buffer.get() == NULL) {
2349 ALOGE("b/28471206");
2350 return NO_MEMORY;
2351 }
2352 if (mDataSource->readAt(
2353 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2354 return ERROR_IO;
2355 }
2356 const int kSkipBytesOfDataBox = 16;
2357 if (chunk_data_size <= kSkipBytesOfDataBox) {
2358 return ERROR_MALFORMED;
2359 }
2360
2361 mFileMetaData.setData(
2362 kKeyAlbumArt, MetaData::TYPE_NONE,
2363 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2364
2365 break;
2366 }
2367
2368 case FOURCC('c', 'o', 'l', 'r'):
2369 {
2370 *offset += chunk_size;
2371 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2372 // ignore otherwise
2373 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2374 status_t err = parseColorInfo(data_offset, chunk_data_size);
2375 if (err != OK) {
2376 return err;
2377 }
2378 }
2379
2380 break;
2381 }
2382
2383 case FOURCC('t', 'i', 't', 'l'):
2384 case FOURCC('p', 'e', 'r', 'f'):
2385 case FOURCC('a', 'u', 't', 'h'):
2386 case FOURCC('g', 'n', 'r', 'e'):
2387 case FOURCC('a', 'l', 'b', 'm'):
2388 case FOURCC('y', 'r', 'r', 'c'):
2389 {
2390 *offset += chunk_size;
2391
2392 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2393
2394 if (err != OK) {
2395 return err;
2396 }
2397
2398 break;
2399 }
2400
2401 case FOURCC('I', 'D', '3', '2'):
2402 {
2403 *offset += chunk_size;
2404
2405 if (chunk_data_size < 6) {
2406 return ERROR_MALFORMED;
2407 }
2408
2409 parseID3v2MetaData(data_offset + 6);
2410
2411 break;
2412 }
2413
2414 case FOURCC('-', '-', '-', '-'):
2415 {
2416 mLastCommentMean.clear();
2417 mLastCommentName.clear();
2418 mLastCommentData.clear();
2419 *offset += chunk_size;
2420 break;
2421 }
2422
2423 case FOURCC('s', 'i', 'd', 'x'):
2424 {
2425 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2426 if (err != OK) {
2427 return err;
2428 }
2429 *offset += chunk_size;
2430 return UNKNOWN_ERROR; // stop parsing after sidx
2431 }
2432
2433 case FOURCC('a', 'c', '-', '3'):
2434 {
2435 *offset += chunk_size;
2436 return parseAC3SampleEntry(data_offset);
2437 }
2438
2439 case FOURCC('f', 't', 'y', 'p'):
2440 {
2441 if (chunk_data_size < 8 || depth != 0) {
2442 return ERROR_MALFORMED;
2443 }
2444
2445 off64_t stop_offset = *offset + chunk_size;
2446 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2447 std::set<uint32_t> brandSet;
2448 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2449 if (i == 1) {
2450 // Skip this index, it refers to the minorVersion,
2451 // not a brand.
2452 continue;
2453 }
2454
2455 uint32_t brand;
2456 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2457 return ERROR_MALFORMED;
2458 }
2459
2460 brand = ntohl(brand);
2461 brandSet.insert(brand);
2462 }
2463
2464 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
2465 mIsQT = true;
2466 } else {
2467 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
2468 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
2469 ALOGV("identified HEIF image");
2470
2471 mIsHeif = true;
2472 brandSet.erase(FOURCC('m', 'i', 'f', '1'));
2473 brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
2474 }
2475
2476 if (!brandSet.empty()) {
2477 // This means that the file should have moov box.
2478 // It could be any iso files (mp4, heifs, etc.)
2479 mHasMoovBox = true;
2480 if (mIsHeif) {
2481 ALOGV("identified HEIF image with other tracks");
2482 }
2483 }
2484 }
2485
2486 *offset = stop_offset;
2487
2488 break;
2489 }
2490
2491 default:
2492 {
2493 // check if we're parsing 'ilst' for meta keys
2494 // if so, treat type as a number (key-id).
2495 if (underQTMetaPath(mPath, 3)) {
2496 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2497 if (err != OK) {
2498 return err;
2499 }
2500 }
2501
2502 *offset += chunk_size;
2503 break;
2504 }
2505 }
2506
2507 return OK;
2508 }
2509
parseAC3SampleEntry(off64_t offset)2510 status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2511 // skip 16 bytes:
2512 // + 6-byte reserved,
2513 // + 2-byte data reference index,
2514 // + 8-byte reserved
2515 offset += 16;
2516 uint16_t channelCount;
2517 if (!mDataSource->getUInt16(offset, &channelCount)) {
2518 return ERROR_MALFORMED;
2519 }
2520 // skip 8 bytes:
2521 // + 2-byte channelCount,
2522 // + 2-byte sample size,
2523 // + 4-byte reserved
2524 offset += 8;
2525 uint16_t sampleRate;
2526 if (!mDataSource->getUInt16(offset, &sampleRate)) {
2527 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2528 return ERROR_MALFORMED;
2529 }
2530
2531 // skip 4 bytes:
2532 // + 2-byte sampleRate,
2533 // + 2-byte reserved
2534 offset += 4;
2535 return parseAC3SpecificBox(offset, sampleRate);
2536 }
2537
parseAC3SpecificBox(off64_t offset,uint16_t sampleRate)2538 status_t MPEG4Extractor::parseAC3SpecificBox(
2539 off64_t offset, uint16_t sampleRate) {
2540 uint32_t size;
2541 // + 4-byte size
2542 // + 4-byte type
2543 // + 3-byte payload
2544 const uint32_t kAC3SpecificBoxSize = 11;
2545 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2546 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2547 return ERROR_MALFORMED;
2548 }
2549
2550 offset += 4;
2551 uint32_t type;
2552 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2553 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2554 return ERROR_MALFORMED;
2555 }
2556
2557 offset += 4;
2558 const uint32_t kAC3SpecificBoxPayloadSize = 3;
2559 uint8_t chunk[kAC3SpecificBoxPayloadSize];
2560 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2561 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2562 return ERROR_MALFORMED;
2563 }
2564
2565 ABitReader br(chunk, sizeof(chunk));
2566 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2567 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2568
2569 unsigned fscod = br.getBits(2);
2570 if (fscod == 3) {
2571 ALOGE("Incorrect fscod (3) in AC3 header");
2572 return ERROR_MALFORMED;
2573 }
2574 unsigned boxSampleRate = sampleRateTable[fscod];
2575 if (boxSampleRate != sampleRate) {
2576 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2577 boxSampleRate, sampleRate);
2578 return ERROR_MALFORMED;
2579 }
2580
2581 unsigned bsid = br.getBits(5);
2582 if (bsid > 8) {
2583 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2584 return ERROR_MALFORMED;
2585 }
2586
2587 // skip
2588 unsigned bsmod __unused = br.getBits(3);
2589
2590 unsigned acmod = br.getBits(3);
2591 unsigned lfeon = br.getBits(1);
2592 unsigned channelCount = channelCountTable[acmod] + lfeon;
2593
2594 if (mLastTrack == NULL) {
2595 return ERROR_MALFORMED;
2596 }
2597 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2598 mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
2599 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
2600 return OK;
2601 }
2602
parseSegmentIndex(off64_t offset,size_t size)2603 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2604 ALOGV("MPEG4Extractor::parseSegmentIndex");
2605
2606 if (size < 12) {
2607 return -EINVAL;
2608 }
2609
2610 uint32_t flags;
2611 if (!mDataSource->getUInt32(offset, &flags)) {
2612 return ERROR_MALFORMED;
2613 }
2614
2615 uint32_t version = flags >> 24;
2616 flags &= 0xffffff;
2617
2618 ALOGV("sidx version %d", version);
2619
2620 uint32_t referenceId;
2621 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2622 return ERROR_MALFORMED;
2623 }
2624
2625 uint32_t timeScale;
2626 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2627 return ERROR_MALFORMED;
2628 }
2629 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2630 if (timeScale == 0)
2631 return ERROR_MALFORMED;
2632
2633 uint64_t earliestPresentationTime;
2634 uint64_t firstOffset;
2635
2636 offset += 12;
2637 size -= 12;
2638
2639 if (version == 0) {
2640 if (size < 8) {
2641 return -EINVAL;
2642 }
2643 uint32_t tmp;
2644 if (!mDataSource->getUInt32(offset, &tmp)) {
2645 return ERROR_MALFORMED;
2646 }
2647 earliestPresentationTime = tmp;
2648 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2649 return ERROR_MALFORMED;
2650 }
2651 firstOffset = tmp;
2652 offset += 8;
2653 size -= 8;
2654 } else {
2655 if (size < 16) {
2656 return -EINVAL;
2657 }
2658 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2659 return ERROR_MALFORMED;
2660 }
2661 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2662 return ERROR_MALFORMED;
2663 }
2664 offset += 16;
2665 size -= 16;
2666 }
2667 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2668
2669 if (size < 4) {
2670 return -EINVAL;
2671 }
2672
2673 uint16_t referenceCount;
2674 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2675 return ERROR_MALFORMED;
2676 }
2677 offset += 4;
2678 size -= 4;
2679 ALOGV("refcount: %d", referenceCount);
2680
2681 if (size < referenceCount * 12) {
2682 return -EINVAL;
2683 }
2684
2685 uint64_t total_duration = 0;
2686 for (unsigned int i = 0; i < referenceCount; i++) {
2687 uint32_t d1, d2, d3;
2688
2689 if (!mDataSource->getUInt32(offset, &d1) || // size
2690 !mDataSource->getUInt32(offset + 4, &d2) || // duration
2691 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2692 return ERROR_MALFORMED;
2693 }
2694
2695 if (d1 & 0x80000000) {
2696 ALOGW("sub-sidx boxes not supported yet");
2697 }
2698 bool sap = d3 & 0x80000000;
2699 uint32_t saptype = (d3 >> 28) & 7;
2700 if (!sap || (saptype != 1 && saptype != 2)) {
2701 // type 1 and 2 are sync samples
2702 ALOGW("not a stream access point, or unsupported type: %08x", d3);
2703 }
2704 total_duration += d2;
2705 offset += 12;
2706 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2707 SidxEntry se;
2708 se.mSize = d1 & 0x7fffffff;
2709 se.mDurationUs = 1000000LL * d2 / timeScale;
2710 mSidxEntries.add(se);
2711 }
2712
2713 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2714
2715 if (mLastTrack == NULL)
2716 return ERROR_MALFORMED;
2717
2718 int64_t metaDuration;
2719 if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2720 mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
2721 }
2722 return OK;
2723 }
2724
parseQTMetaKey(off64_t offset,size_t size)2725 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2726 if (size < 8) {
2727 return ERROR_MALFORMED;
2728 }
2729
2730 uint32_t count;
2731 if (!mDataSource->getUInt32(offset + 4, &count)) {
2732 return ERROR_MALFORMED;
2733 }
2734
2735 if (mMetaKeyMap.size() > 0) {
2736 ALOGW("'keys' atom seen again, discarding existing entries");
2737 mMetaKeyMap.clear();
2738 }
2739
2740 off64_t keyOffset = offset + 8;
2741 off64_t stopOffset = offset + size;
2742 for (size_t i = 1; i <= count; i++) {
2743 if (keyOffset + 8 > stopOffset) {
2744 return ERROR_MALFORMED;
2745 }
2746
2747 uint32_t keySize;
2748 if (!mDataSource->getUInt32(keyOffset, &keySize)
2749 || keySize < 8
2750 || keyOffset + keySize > stopOffset) {
2751 return ERROR_MALFORMED;
2752 }
2753
2754 uint32_t type;
2755 if (!mDataSource->getUInt32(keyOffset + 4, &type)
2756 || type != FOURCC('m', 'd', 't', 'a')) {
2757 return ERROR_MALFORMED;
2758 }
2759
2760 keySize -= 8;
2761 keyOffset += 8;
2762
2763 auto keyData = heapbuffer<uint8_t>(keySize);
2764 if (keyData.get() == NULL) {
2765 return ERROR_MALFORMED;
2766 }
2767 if (mDataSource->readAt(
2768 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
2769 return ERROR_MALFORMED;
2770 }
2771
2772 AString key((const char *)keyData.get(), keySize);
2773 mMetaKeyMap.add(i, key);
2774
2775 keyOffset += keySize;
2776 }
2777 return OK;
2778 }
2779
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)2780 status_t MPEG4Extractor::parseQTMetaVal(
2781 int32_t keyId, off64_t offset, size_t size) {
2782 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2783 if (index < 0) {
2784 // corresponding key is not present, ignore
2785 return ERROR_MALFORMED;
2786 }
2787
2788 if (size <= 16) {
2789 return ERROR_MALFORMED;
2790 }
2791 uint32_t dataSize;
2792 if (!mDataSource->getUInt32(offset, &dataSize)
2793 || dataSize > size || dataSize <= 16) {
2794 return ERROR_MALFORMED;
2795 }
2796 uint32_t atomFourCC;
2797 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2798 || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2799 return ERROR_MALFORMED;
2800 }
2801 uint32_t dataType;
2802 if (!mDataSource->getUInt32(offset + 8, &dataType)
2803 || ((dataType & 0xff000000) != 0)) {
2804 // not well-known type
2805 return ERROR_MALFORMED;
2806 }
2807
2808 dataSize -= 16;
2809 offset += 16;
2810
2811 if (dataType == 23 && dataSize >= 4) {
2812 // BE Float32
2813 uint32_t val;
2814 if (!mDataSource->getUInt32(offset, &val)) {
2815 return ERROR_MALFORMED;
2816 }
2817 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2818 mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
2819 }
2820 } else if (dataType == 67 && dataSize >= 4) {
2821 // BE signed int32
2822 uint32_t val;
2823 if (!mDataSource->getUInt32(offset, &val)) {
2824 return ERROR_MALFORMED;
2825 }
2826 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2827 mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
2828 }
2829 } else {
2830 // add more keys if needed
2831 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2832 }
2833
2834 return OK;
2835 }
2836
parseTrackHeader(off64_t data_offset,off64_t data_size)2837 status_t MPEG4Extractor::parseTrackHeader(
2838 off64_t data_offset, off64_t data_size) {
2839 if (data_size < 4) {
2840 return ERROR_MALFORMED;
2841 }
2842
2843 uint8_t version;
2844 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2845 return ERROR_IO;
2846 }
2847
2848 size_t dynSize = (version == 1) ? 36 : 24;
2849
2850 uint8_t buffer[36 + 60];
2851
2852 if (data_size != (off64_t)dynSize + 60) {
2853 return ERROR_MALFORMED;
2854 }
2855
2856 if (mDataSource->readAt(
2857 data_offset, buffer, data_size) < (ssize_t)data_size) {
2858 return ERROR_IO;
2859 }
2860
2861 uint64_t ctime __unused, mtime __unused, duration __unused;
2862 int32_t id;
2863
2864 if (version == 1) {
2865 ctime = U64_AT(&buffer[4]);
2866 mtime = U64_AT(&buffer[12]);
2867 id = U32_AT(&buffer[20]);
2868 duration = U64_AT(&buffer[28]);
2869 } else if (version == 0) {
2870 ctime = U32_AT(&buffer[4]);
2871 mtime = U32_AT(&buffer[8]);
2872 id = U32_AT(&buffer[12]);
2873 duration = U32_AT(&buffer[20]);
2874 } else {
2875 return ERROR_UNSUPPORTED;
2876 }
2877
2878 if (mLastTrack == NULL)
2879 return ERROR_MALFORMED;
2880
2881 mLastTrack->meta.setInt32(kKeyTrackID, id);
2882
2883 size_t matrixOffset = dynSize + 16;
2884 int32_t a00 = U32_AT(&buffer[matrixOffset]);
2885 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2886 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2887 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2888
2889 #if 0
2890 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2891 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2892
2893 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2894 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2895 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2896 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2897 #endif
2898
2899 uint32_t rotationDegrees;
2900
2901 static const int32_t kFixedOne = 0x10000;
2902 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2903 // Identity, no rotation
2904 rotationDegrees = 0;
2905 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2906 rotationDegrees = 90;
2907 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2908 rotationDegrees = 270;
2909 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2910 rotationDegrees = 180;
2911 } else {
2912 ALOGW("We only support 0,90,180,270 degree rotation matrices");
2913 rotationDegrees = 0;
2914 }
2915
2916 if (rotationDegrees != 0) {
2917 mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
2918 }
2919
2920 // Handle presentation display size, which could be different
2921 // from the image size indicated by kKeyWidth and kKeyHeight.
2922 uint32_t width = U32_AT(&buffer[dynSize + 52]);
2923 uint32_t height = U32_AT(&buffer[dynSize + 56]);
2924 mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
2925 mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
2926
2927 return OK;
2928 }
2929
parseITunesMetaData(off64_t offset,size_t size)2930 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2931 if (size == 0) {
2932 return OK;
2933 }
2934
2935 if (size < 4 || size == SIZE_MAX) {
2936 return ERROR_MALFORMED;
2937 }
2938
2939 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2940 if (buffer == NULL) {
2941 return ERROR_MALFORMED;
2942 }
2943 if (mDataSource->readAt(
2944 offset, buffer, size) != (ssize_t)size) {
2945 delete[] buffer;
2946 buffer = NULL;
2947
2948 return ERROR_IO;
2949 }
2950
2951 uint32_t flags = U32_AT(buffer);
2952
2953 uint32_t metadataKey = 0;
2954 char chunk[5];
2955 MakeFourCCString(mPath[4], chunk);
2956 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2957 switch ((int32_t)mPath[4]) {
2958 case FOURCC(0xa9, 'a', 'l', 'b'):
2959 {
2960 metadataKey = kKeyAlbum;
2961 break;
2962 }
2963 case FOURCC(0xa9, 'A', 'R', 'T'):
2964 {
2965 metadataKey = kKeyArtist;
2966 break;
2967 }
2968 case FOURCC('a', 'A', 'R', 'T'):
2969 {
2970 metadataKey = kKeyAlbumArtist;
2971 break;
2972 }
2973 case FOURCC(0xa9, 'd', 'a', 'y'):
2974 {
2975 metadataKey = kKeyYear;
2976 break;
2977 }
2978 case FOURCC(0xa9, 'n', 'a', 'm'):
2979 {
2980 metadataKey = kKeyTitle;
2981 break;
2982 }
2983 case FOURCC(0xa9, 'w', 'r', 't'):
2984 {
2985 metadataKey = kKeyWriter;
2986 break;
2987 }
2988 case FOURCC('c', 'o', 'v', 'r'):
2989 {
2990 metadataKey = kKeyAlbumArt;
2991 break;
2992 }
2993 case FOURCC('g', 'n', 'r', 'e'):
2994 {
2995 metadataKey = kKeyGenre;
2996 break;
2997 }
2998 case FOURCC(0xa9, 'g', 'e', 'n'):
2999 {
3000 metadataKey = kKeyGenre;
3001 break;
3002 }
3003 case FOURCC('c', 'p', 'i', 'l'):
3004 {
3005 if (size == 9 && flags == 21) {
3006 char tmp[16];
3007 sprintf(tmp, "%d",
3008 (int)buffer[size - 1]);
3009
3010 mFileMetaData.setCString(kKeyCompilation, tmp);
3011 }
3012 break;
3013 }
3014 case FOURCC('t', 'r', 'k', 'n'):
3015 {
3016 if (size == 16 && flags == 0) {
3017 char tmp[16];
3018 uint16_t* pTrack = (uint16_t*)&buffer[10];
3019 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3020 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3021
3022 mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3023 }
3024 break;
3025 }
3026 case FOURCC('d', 'i', 's', 'k'):
3027 {
3028 if ((size == 14 || size == 16) && flags == 0) {
3029 char tmp[16];
3030 uint16_t* pDisc = (uint16_t*)&buffer[10];
3031 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3032 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3033
3034 mFileMetaData.setCString(kKeyDiscNumber, tmp);
3035 }
3036 break;
3037 }
3038 case FOURCC('-', '-', '-', '-'):
3039 {
3040 buffer[size] = '\0';
3041 switch (mPath[5]) {
3042 case FOURCC('m', 'e', 'a', 'n'):
3043 mLastCommentMean.setTo((const char *)buffer + 4);
3044 break;
3045 case FOURCC('n', 'a', 'm', 'e'):
3046 mLastCommentName.setTo((const char *)buffer + 4);
3047 break;
3048 case FOURCC('d', 'a', 't', 'a'):
3049 if (size < 8) {
3050 delete[] buffer;
3051 buffer = NULL;
3052 ALOGE("b/24346430");
3053 return ERROR_MALFORMED;
3054 }
3055 mLastCommentData.setTo((const char *)buffer + 8);
3056 break;
3057 }
3058
3059 // Once we have a set of mean/name/data info, go ahead and process
3060 // it to see if its something we are interested in. Whether or not
3061 // were are interested in the specific tag, make sure to clear out
3062 // the set so we can be ready to process another tuple should one
3063 // show up later in the file.
3064 if ((mLastCommentMean.length() != 0) &&
3065 (mLastCommentName.length() != 0) &&
3066 (mLastCommentData.length() != 0)) {
3067
3068 if (mLastCommentMean == "com.apple.iTunes"
3069 && mLastCommentName == "iTunSMPB") {
3070 int32_t delay, padding;
3071 if (sscanf(mLastCommentData,
3072 " %*x %x %x %*x", &delay, &padding) == 2) {
3073 if (mLastTrack == NULL) {
3074 delete[] buffer;
3075 return ERROR_MALFORMED;
3076 }
3077
3078 mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
3079 mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
3080 }
3081 }
3082
3083 mLastCommentMean.clear();
3084 mLastCommentName.clear();
3085 mLastCommentData.clear();
3086 }
3087 break;
3088 }
3089
3090 default:
3091 break;
3092 }
3093
3094 if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
3095 if (metadataKey == kKeyAlbumArt) {
3096 mFileMetaData.setData(
3097 kKeyAlbumArt, MetaData::TYPE_NONE,
3098 buffer + 8, size - 8);
3099 } else if (metadataKey == kKeyGenre) {
3100 if (flags == 0) {
3101 // uint8_t genre code, iTunes genre codes are
3102 // the standard id3 codes, except they start
3103 // at 1 instead of 0 (e.g. Pop is 14, not 13)
3104 // We use standard id3 numbering, so subtract 1.
3105 int genrecode = (int)buffer[size - 1];
3106 genrecode--;
3107 if (genrecode < 0) {
3108 genrecode = 255; // reserved for 'unknown genre'
3109 }
3110 char genre[10];
3111 sprintf(genre, "%d", genrecode);
3112
3113 mFileMetaData.setCString(metadataKey, genre);
3114 } else if (flags == 1) {
3115 // custom genre string
3116 buffer[size] = '\0';
3117
3118 mFileMetaData.setCString(
3119 metadataKey, (const char *)buffer + 8);
3120 }
3121 } else {
3122 buffer[size] = '\0';
3123
3124 mFileMetaData.setCString(
3125 metadataKey, (const char *)buffer + 8);
3126 }
3127 }
3128
3129 delete[] buffer;
3130 buffer = NULL;
3131
3132 return OK;
3133 }
3134
parseColorInfo(off64_t offset,size_t size)3135 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3136 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3137 return ERROR_MALFORMED;
3138 }
3139
3140 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3141 if (buffer == NULL) {
3142 return ERROR_MALFORMED;
3143 }
3144 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3145 delete[] buffer;
3146 buffer = NULL;
3147
3148 return ERROR_IO;
3149 }
3150
3151 int32_t type = U32_AT(&buffer[0]);
3152 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3153 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3154 int32_t primaries = U16_AT(&buffer[4]);
3155 int32_t transfer = U16_AT(&buffer[6]);
3156 int32_t coeffs = U16_AT(&buffer[8]);
3157 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3158
3159 ColorAspects aspects;
3160 ColorUtils::convertIsoColorAspectsToCodecAspects(
3161 primaries, transfer, coeffs, fullRange, aspects);
3162
3163 // only store the first color specification
3164 if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
3165 mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3166 mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
3167 mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3168 mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
3169 }
3170 }
3171
3172 delete[] buffer;
3173 buffer = NULL;
3174
3175 return OK;
3176 }
3177
parse3GPPMetaData(off64_t offset,size_t size,int depth)3178 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3179 if (size < 4 || size == SIZE_MAX) {
3180 return ERROR_MALFORMED;
3181 }
3182
3183 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3184 if (buffer == NULL) {
3185 return ERROR_MALFORMED;
3186 }
3187 if (mDataSource->readAt(
3188 offset, buffer, size) != (ssize_t)size) {
3189 delete[] buffer;
3190 buffer = NULL;
3191
3192 return ERROR_IO;
3193 }
3194
3195 uint32_t metadataKey = 0;
3196 switch (mPath[depth]) {
3197 case FOURCC('t', 'i', 't', 'l'):
3198 {
3199 metadataKey = kKeyTitle;
3200 break;
3201 }
3202 case FOURCC('p', 'e', 'r', 'f'):
3203 {
3204 metadataKey = kKeyArtist;
3205 break;
3206 }
3207 case FOURCC('a', 'u', 't', 'h'):
3208 {
3209 metadataKey = kKeyWriter;
3210 break;
3211 }
3212 case FOURCC('g', 'n', 'r', 'e'):
3213 {
3214 metadataKey = kKeyGenre;
3215 break;
3216 }
3217 case FOURCC('a', 'l', 'b', 'm'):
3218 {
3219 if (buffer[size - 1] != '\0') {
3220 char tmp[4];
3221 sprintf(tmp, "%u", buffer[size - 1]);
3222
3223 mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3224 }
3225
3226 metadataKey = kKeyAlbum;
3227 break;
3228 }
3229 case FOURCC('y', 'r', 'r', 'c'):
3230 {
3231 if (size < 6) {
3232 delete[] buffer;
3233 buffer = NULL;
3234 ALOGE("b/62133227");
3235 android_errorWriteLog(0x534e4554, "62133227");
3236 return ERROR_MALFORMED;
3237 }
3238 char tmp[5];
3239 uint16_t year = U16_AT(&buffer[4]);
3240
3241 if (year < 10000) {
3242 sprintf(tmp, "%u", year);
3243
3244 mFileMetaData.setCString(kKeyYear, tmp);
3245 }
3246 break;
3247 }
3248
3249 default:
3250 break;
3251 }
3252
3253 if (metadataKey > 0) {
3254 bool isUTF8 = true; // Common case
3255 char16_t *framedata = NULL;
3256 int len16 = 0; // Number of UTF-16 characters
3257
3258 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3259 if (size < 6) {
3260 delete[] buffer;
3261 buffer = NULL;
3262 return ERROR_MALFORMED;
3263 }
3264
3265 if (size - 6 >= 4) {
3266 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3267 framedata = (char16_t *)(buffer + 6);
3268 if (0xfffe == *framedata) {
3269 // endianness marker (BOM) doesn't match host endianness
3270 for (int i = 0; i < len16; i++) {
3271 framedata[i] = bswap_16(framedata[i]);
3272 }
3273 // BOM is now swapped to 0xfeff, we will execute next block too
3274 }
3275
3276 if (0xfeff == *framedata) {
3277 // Remove the BOM
3278 framedata++;
3279 len16--;
3280 isUTF8 = false;
3281 }
3282 // else normal non-zero-length UTF-8 string
3283 // we can't handle UTF-16 without BOM as there is no other
3284 // indication of encoding.
3285 }
3286
3287 if (isUTF8) {
3288 buffer[size] = 0;
3289 mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
3290 } else {
3291 // Convert from UTF-16 string to UTF-8 string.
3292 String8 tmpUTF8str(framedata, len16);
3293 mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
3294 }
3295 }
3296
3297 delete[] buffer;
3298 buffer = NULL;
3299
3300 return OK;
3301 }
3302
parseID3v2MetaData(off64_t offset)3303 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3304 ID3 id3(mDataSource, true /* ignorev1 */, offset);
3305
3306 if (id3.isValid()) {
3307 struct Map {
3308 int key;
3309 const char *tag1;
3310 const char *tag2;
3311 };
3312 static const Map kMap[] = {
3313 { kKeyAlbum, "TALB", "TAL" },
3314 { kKeyArtist, "TPE1", "TP1" },
3315 { kKeyAlbumArtist, "TPE2", "TP2" },
3316 { kKeyComposer, "TCOM", "TCM" },
3317 { kKeyGenre, "TCON", "TCO" },
3318 { kKeyTitle, "TIT2", "TT2" },
3319 { kKeyYear, "TYE", "TYER" },
3320 { kKeyAuthor, "TXT", "TEXT" },
3321 { kKeyCDTrackNumber, "TRK", "TRCK" },
3322 { kKeyDiscNumber, "TPA", "TPOS" },
3323 { kKeyCompilation, "TCP", "TCMP" },
3324 };
3325 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3326
3327 for (size_t i = 0; i < kNumMapEntries; ++i) {
3328 if (!mFileMetaData.hasData(kMap[i].key)) {
3329 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3330 if (it->done()) {
3331 delete it;
3332 it = new ID3::Iterator(id3, kMap[i].tag2);
3333 }
3334
3335 if (it->done()) {
3336 delete it;
3337 continue;
3338 }
3339
3340 String8 s;
3341 it->getString(&s);
3342 delete it;
3343
3344 mFileMetaData.setCString(kMap[i].key, s);
3345 }
3346 }
3347
3348 size_t dataSize;
3349 String8 mime;
3350 const void *data = id3.getAlbumArt(&dataSize, &mime);
3351
3352 if (data) {
3353 mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3354 mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
3355 }
3356 }
3357 }
3358
getTrack(size_t index)3359 MediaTrack *MPEG4Extractor::getTrack(size_t index) {
3360 status_t err;
3361 if ((err = readMetaData()) != OK) {
3362 return NULL;
3363 }
3364
3365 Track *track = mFirstTrack;
3366 while (index > 0) {
3367 if (track == NULL) {
3368 return NULL;
3369 }
3370
3371 track = track->next;
3372 --index;
3373 }
3374
3375 if (track == NULL) {
3376 return NULL;
3377 }
3378
3379
3380 Trex *trex = NULL;
3381 int32_t trackId;
3382 if (track->meta.findInt32(kKeyTrackID, &trackId)) {
3383 for (size_t i = 0; i < mTrex.size(); i++) {
3384 Trex *t = &mTrex.editItemAt(i);
3385 if (t->track_ID == (uint32_t) trackId) {
3386 trex = t;
3387 break;
3388 }
3389 }
3390 } else {
3391 ALOGE("b/21657957");
3392 return NULL;
3393 }
3394
3395 ALOGV("getTrack called, pssh: %zu", mPssh.size());
3396
3397 const char *mime;
3398 if (!track->meta.findCString(kKeyMIMEType, &mime)) {
3399 return NULL;
3400 }
3401
3402 sp<ItemTable> itemTable;
3403 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3404 uint32_t type;
3405 const void *data;
3406 size_t size;
3407 if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
3408 return NULL;
3409 }
3410
3411 const uint8_t *ptr = (const uint8_t *)data;
3412
3413 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
3414 return NULL;
3415 }
3416 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
3417 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3418 uint32_t type;
3419 const void *data;
3420 size_t size;
3421 if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
3422 return NULL;
3423 }
3424
3425 const uint8_t *ptr = (const uint8_t *)data;
3426
3427 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
3428 return NULL;
3429 }
3430 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3431 itemTable = mItemTable;
3432 }
3433 }
3434
3435 MPEG4Source *source = new MPEG4Source(
3436 track->meta, mDataSource, track->timescale, track->sampleTable,
3437 mSidxEntries, trex, mMoofOffset, itemTable);
3438 if (source->init() != OK) {
3439 delete source;
3440 return NULL;
3441 }
3442 return source;
3443 }
3444
3445 // static
verifyTrack(Track * track)3446 status_t MPEG4Extractor::verifyTrack(Track *track) {
3447 const char *mime;
3448 CHECK(track->meta.findCString(kKeyMIMEType, &mime));
3449
3450 uint32_t type;
3451 const void *data;
3452 size_t size;
3453 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3454 if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
3455 || type != kTypeAVCC) {
3456 return ERROR_MALFORMED;
3457 }
3458 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3459 if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
3460 || type != kTypeHVCC) {
3461 return ERROR_MALFORMED;
3462 }
3463 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3464 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3465 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3466 if (!track->meta.findData(kKeyESDS, &type, &data, &size)
3467 || type != kTypeESDS) {
3468 return ERROR_MALFORMED;
3469 }
3470 }
3471
3472 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3473 // Make sure we have all the metadata we need.
3474 ALOGE("stbl atom missing/invalid.");
3475 return ERROR_MALFORMED;
3476 }
3477
3478 if (track->timescale == 0) {
3479 ALOGE("timescale invalid.");
3480 return ERROR_MALFORMED;
3481 }
3482
3483 return OK;
3484 }
3485
3486 typedef enum {
3487 //AOT_NONE = -1,
3488 //AOT_NULL_OBJECT = 0,
3489 //AOT_AAC_MAIN = 1, /**< Main profile */
3490 AOT_AAC_LC = 2, /**< Low Complexity object */
3491 //AOT_AAC_SSR = 3,
3492 //AOT_AAC_LTP = 4,
3493 AOT_SBR = 5,
3494 //AOT_AAC_SCAL = 6,
3495 //AOT_TWIN_VQ = 7,
3496 //AOT_CELP = 8,
3497 //AOT_HVXC = 9,
3498 //AOT_RSVD_10 = 10, /**< (reserved) */
3499 //AOT_RSVD_11 = 11, /**< (reserved) */
3500 //AOT_TTSI = 12, /**< TTSI Object */
3501 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
3502 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
3503 //AOT_GEN_MIDI = 15, /**< General MIDI object */
3504 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3505 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
3506 //AOT_RSVD_18 = 18, /**< (reserved) */
3507 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
3508 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
3509 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
3510 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
3511 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
3512 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
3513 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
3514 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
3515 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
3516 //AOT_RSVD_28 = 28, /**< might become SSC */
3517 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
3518 //AOT_MPEGS = 30, /**< MPEG Surround */
3519
3520 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
3521
3522 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
3523 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
3524 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
3525 //AOT_RSVD_35 = 35, /**< might become DST */
3526 //AOT_RSVD_36 = 36, /**< might become ALS */
3527 //AOT_AAC_SLS = 37, /**< AAC + SLS */
3528 //AOT_SLS = 38, /**< SLS */
3529 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
3530
3531 //AOT_USAC = 42, /**< USAC */
3532 //AOT_SAOC = 43, /**< SAOC */
3533 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
3534
3535 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
3536 } AUDIO_OBJECT_TYPE;
3537
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)3538 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3539 const void *esds_data, size_t esds_size) {
3540 ESDS esds(esds_data, esds_size);
3541
3542 uint8_t objectTypeIndication;
3543 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3544 return ERROR_MALFORMED;
3545 }
3546
3547 if (objectTypeIndication == 0xe1) {
3548 // This isn't MPEG4 audio at all, it's QCELP 14k...
3549 if (mLastTrack == NULL)
3550 return ERROR_MALFORMED;
3551
3552 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3553 return OK;
3554 }
3555
3556 if (objectTypeIndication == 0x6b) {
3557 // The media subtype is MP3 audio
3558 // Our software MP3 audio decoder may not be able to handle
3559 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3560 ALOGE("MP3 track in MP4/3GPP file is not supported");
3561 return ERROR_UNSUPPORTED;
3562 }
3563
3564 if (mLastTrack != NULL) {
3565 uint32_t maxBitrate = 0;
3566 uint32_t avgBitrate = 0;
3567 esds.getBitRate(&maxBitrate, &avgBitrate);
3568 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
3569 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
3570 }
3571 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
3572 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
3573 }
3574 }
3575
3576 const uint8_t *csd;
3577 size_t csd_size;
3578 if (esds.getCodecSpecificInfo(
3579 (const void **)&csd, &csd_size) != OK) {
3580 return ERROR_MALFORMED;
3581 }
3582
3583 if (kUseHexDump) {
3584 printf("ESD of size %zu\n", csd_size);
3585 hexdump(csd, csd_size);
3586 }
3587
3588 if (csd_size == 0) {
3589 // There's no further information, i.e. no codec specific data
3590 // Let's assume that the information provided in the mpeg4 headers
3591 // is accurate and hope for the best.
3592
3593 return OK;
3594 }
3595
3596 if (csd_size < 2) {
3597 return ERROR_MALFORMED;
3598 }
3599
3600 static uint32_t kSamplingRate[] = {
3601 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3602 16000, 12000, 11025, 8000, 7350
3603 };
3604
3605 ABitReader br(csd, csd_size);
3606 uint32_t objectType = br.getBits(5);
3607
3608 if (objectType == 31) { // AAC-ELD => additional 6 bits
3609 objectType = 32 + br.getBits(6);
3610 }
3611
3612 if (mLastTrack == NULL)
3613 return ERROR_MALFORMED;
3614
3615 //keep AOT type
3616 mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
3617
3618 uint32_t freqIndex = br.getBits(4);
3619
3620 int32_t sampleRate = 0;
3621 int32_t numChannels = 0;
3622 if (freqIndex == 15) {
3623 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3624 sampleRate = br.getBits(24);
3625 numChannels = br.getBits(4);
3626 } else {
3627 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3628 numChannels = br.getBits(4);
3629
3630 if (freqIndex == 13 || freqIndex == 14) {
3631 return ERROR_MALFORMED;
3632 }
3633
3634 sampleRate = kSamplingRate[freqIndex];
3635 }
3636
3637 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3638 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3639 uint32_t extFreqIndex = br.getBits(4);
3640 int32_t extSampleRate __unused;
3641 if (extFreqIndex == 15) {
3642 if (csd_size < 8) {
3643 return ERROR_MALFORMED;
3644 }
3645 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3646 extSampleRate = br.getBits(24);
3647 } else {
3648 if (extFreqIndex == 13 || extFreqIndex == 14) {
3649 return ERROR_MALFORMED;
3650 }
3651 extSampleRate = kSamplingRate[extFreqIndex];
3652 }
3653 //TODO: save the extension sampling rate value in meta data =>
3654 // mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
3655 }
3656
3657 switch (numChannels) {
3658 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3659 case 0:
3660 case 1:// FC
3661 case 2:// FL FR
3662 case 3:// FC, FL FR
3663 case 4:// FC, FL FR, RC
3664 case 5:// FC, FL FR, SL SR
3665 case 6:// FC, FL FR, SL SR, LFE
3666 //numChannels already contains the right value
3667 break;
3668 case 11:// FC, FL FR, SL SR, RC, LFE
3669 numChannels = 7;
3670 break;
3671 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3672 case 12:// FC, FL FR, SL SR, RL RR, LFE
3673 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
3674 numChannels = 8;
3675 break;
3676 default:
3677 return ERROR_UNSUPPORTED;
3678 }
3679
3680 {
3681 if (objectType == AOT_SBR || objectType == AOT_PS) {
3682 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3683 objectType = br.getBits(5);
3684
3685 if (objectType == AOT_ESCAPE) {
3686 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3687 objectType = 32 + br.getBits(6);
3688 }
3689 }
3690 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3691 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3692 objectType == AOT_ER_BSAC) {
3693 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3694 const int32_t frameLengthFlag __unused = br.getBits(1);
3695
3696 const int32_t dependsOnCoreCoder = br.getBits(1);
3697
3698 if (dependsOnCoreCoder ) {
3699 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3700 const int32_t coreCoderDelay __unused = br.getBits(14);
3701 }
3702
3703 int32_t extensionFlag = -1;
3704 if (br.numBitsLeft() > 0) {
3705 extensionFlag = br.getBits(1);
3706 } else {
3707 switch (objectType) {
3708 // 14496-3 4.5.1.1 extensionFlag
3709 case AOT_AAC_LC:
3710 extensionFlag = 0;
3711 break;
3712 case AOT_ER_AAC_LC:
3713 case AOT_ER_AAC_SCAL:
3714 case AOT_ER_BSAC:
3715 case AOT_ER_AAC_LD:
3716 extensionFlag = 1;
3717 break;
3718 default:
3719 return ERROR_MALFORMED;
3720 break;
3721 }
3722 ALOGW("csd missing extension flag; assuming %d for object type %u.",
3723 extensionFlag, objectType);
3724 }
3725
3726 if (numChannels == 0) {
3727 int32_t channelsEffectiveNum = 0;
3728 int32_t channelsNum = 0;
3729 if (br.numBitsLeft() < 32) {
3730 return ERROR_MALFORMED;
3731 }
3732 const int32_t ElementInstanceTag __unused = br.getBits(4);
3733 const int32_t Profile __unused = br.getBits(2);
3734 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3735 const int32_t NumFrontChannelElements = br.getBits(4);
3736 const int32_t NumSideChannelElements = br.getBits(4);
3737 const int32_t NumBackChannelElements = br.getBits(4);
3738 const int32_t NumLfeChannelElements = br.getBits(2);
3739 const int32_t NumAssocDataElements __unused = br.getBits(3);
3740 const int32_t NumValidCcElements __unused = br.getBits(4);
3741
3742 const int32_t MonoMixdownPresent = br.getBits(1);
3743
3744 if (MonoMixdownPresent != 0) {
3745 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3746 const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3747 }
3748
3749 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3750 const int32_t StereoMixdownPresent = br.getBits(1);
3751 if (StereoMixdownPresent != 0) {
3752 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3753 const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3754 }
3755
3756 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3757 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3758 if (MatrixMixdownIndexPresent != 0) {
3759 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3760 const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3761 const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3762 }
3763
3764 int i;
3765 for (i=0; i < NumFrontChannelElements; i++) {
3766 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3767 const int32_t FrontElementIsCpe = br.getBits(1);
3768 const int32_t FrontElementTagSelect __unused = br.getBits(4);
3769 channelsNum += FrontElementIsCpe ? 2 : 1;
3770 }
3771
3772 for (i=0; i < NumSideChannelElements; i++) {
3773 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3774 const int32_t SideElementIsCpe = br.getBits(1);
3775 const int32_t SideElementTagSelect __unused = br.getBits(4);
3776 channelsNum += SideElementIsCpe ? 2 : 1;
3777 }
3778
3779 for (i=0; i < NumBackChannelElements; i++) {
3780 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3781 const int32_t BackElementIsCpe = br.getBits(1);
3782 const int32_t BackElementTagSelect __unused = br.getBits(4);
3783 channelsNum += BackElementIsCpe ? 2 : 1;
3784 }
3785 channelsEffectiveNum = channelsNum;
3786
3787 for (i=0; i < NumLfeChannelElements; i++) {
3788 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3789 const int32_t LfeElementTagSelect __unused = br.getBits(4);
3790 channelsNum += 1;
3791 }
3792 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3793 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3794 numChannels = channelsNum;
3795 }
3796 }
3797 }
3798
3799 if (numChannels == 0) {
3800 return ERROR_UNSUPPORTED;
3801 }
3802
3803 if (mLastTrack == NULL)
3804 return ERROR_MALFORMED;
3805
3806 int32_t prevSampleRate;
3807 CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
3808
3809 if (prevSampleRate != sampleRate) {
3810 ALOGV("mpeg4 audio sample rate different from previous setting. "
3811 "was: %d, now: %d", prevSampleRate, sampleRate);
3812 }
3813
3814 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
3815
3816 int32_t prevChannelCount;
3817 CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
3818
3819 if (prevChannelCount != numChannels) {
3820 ALOGV("mpeg4 audio channel count different from previous setting. "
3821 "was: %d, now: %d", prevChannelCount, numChannels);
3822 }
3823
3824 mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
3825
3826 return OK;
3827 }
3828
3829 ////////////////////////////////////////////////////////////////////////////////
3830
MPEG4Source(MetaDataBase & format,DataSourceBase * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable)3831 MPEG4Source::MPEG4Source(
3832 MetaDataBase &format,
3833 DataSourceBase *dataSource,
3834 int32_t timeScale,
3835 const sp<SampleTable> &sampleTable,
3836 Vector<SidxEntry> &sidx,
3837 const Trex *trex,
3838 off64_t firstMoofOffset,
3839 const sp<ItemTable> &itemTable)
3840 : mFormat(format),
3841 mDataSource(dataSource),
3842 mTimescale(timeScale),
3843 mSampleTable(sampleTable),
3844 mCurrentSampleIndex(0),
3845 mCurrentFragmentIndex(0),
3846 mSegments(sidx),
3847 mTrex(trex),
3848 mFirstMoofOffset(firstMoofOffset),
3849 mCurrentMoofOffset(firstMoofOffset),
3850 mNextMoofOffset(-1),
3851 mCurrentTime(0),
3852 mDefaultEncryptedByteBlock(0),
3853 mDefaultSkipByteBlock(0),
3854 mCurrentSampleInfoAllocSize(0),
3855 mCurrentSampleInfoSizes(NULL),
3856 mCurrentSampleInfoOffsetsAllocSize(0),
3857 mCurrentSampleInfoOffsets(NULL),
3858 mIsAVC(false),
3859 mIsHEVC(false),
3860 mNALLengthSize(0),
3861 mStarted(false),
3862 mGroup(NULL),
3863 mBuffer(NULL),
3864 mWantsNALFragments(false),
3865 mSrcBuffer(NULL),
3866 mIsHeif(itemTable != NULL),
3867 mItemTable(itemTable) {
3868
3869 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3870
3871 mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
3872 mDefaultIVSize = 0;
3873 mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3874 uint32_t keytype;
3875 const void *key;
3876 size_t keysize;
3877 if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3878 CHECK(keysize <= 16);
3879 memset(mCryptoKey, 0, 16);
3880 memcpy(mCryptoKey, key, keysize);
3881 }
3882
3883 mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock);
3884 mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock);
3885
3886 const char *mime;
3887 bool success = mFormat.findCString(kKeyMIMEType, &mime);
3888 CHECK(success);
3889
3890 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3891 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
3892 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
3893
3894 if (mIsAVC) {
3895 uint32_t type;
3896 const void *data;
3897 size_t size;
3898 CHECK(format.findData(kKeyAVCC, &type, &data, &size));
3899
3900 const uint8_t *ptr = (const uint8_t *)data;
3901
3902 CHECK(size >= 7);
3903 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3904
3905 // The number of bytes used to encode the length of a NAL unit.
3906 mNALLengthSize = 1 + (ptr[4] & 3);
3907 } else if (mIsHEVC) {
3908 uint32_t type;
3909 const void *data;
3910 size_t size;
3911 CHECK(format.findData(kKeyHVCC, &type, &data, &size));
3912
3913 const uint8_t *ptr = (const uint8_t *)data;
3914
3915 CHECK(size >= 22);
3916 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3917
3918 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3919 }
3920
3921 CHECK(format.findInt32(kKeyTrackID, &mTrackId));
3922
3923 }
3924
init()3925 status_t MPEG4Source::init() {
3926 if (mFirstMoofOffset != 0) {
3927 off64_t offset = mFirstMoofOffset;
3928 return parseChunk(&offset);
3929 }
3930 return OK;
3931 }
3932
~MPEG4Source()3933 MPEG4Source::~MPEG4Source() {
3934 if (mStarted) {
3935 stop();
3936 }
3937 free(mCurrentSampleInfoSizes);
3938 free(mCurrentSampleInfoOffsets);
3939 }
3940
start(MetaDataBase * params)3941 status_t MPEG4Source::start(MetaDataBase *params) {
3942 Mutex::Autolock autoLock(mLock);
3943
3944 CHECK(!mStarted);
3945
3946 int32_t val;
3947 if (params && params->findInt32(kKeyWantsNALFragments, &val)
3948 && val != 0) {
3949 mWantsNALFragments = true;
3950 } else {
3951 mWantsNALFragments = false;
3952 }
3953
3954 int32_t tmp;
3955 CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
3956 size_t max_size = tmp;
3957
3958 // A somewhat arbitrary limit that should be sufficient for 8k video frames
3959 // If you see the message below for a valid input stream: increase the limit
3960 const size_t kMaxBufferSize = 64 * 1024 * 1024;
3961 if (max_size > kMaxBufferSize) {
3962 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
3963 return ERROR_MALFORMED;
3964 }
3965 if (max_size == 0) {
3966 ALOGE("zero max input size");
3967 return ERROR_MALFORMED;
3968 }
3969
3970 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
3971 const size_t kInitialBuffers = 2;
3972 const size_t kMaxBuffers = 8;
3973 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
3974 mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers);
3975 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3976 if (mSrcBuffer == NULL) {
3977 // file probably specified a bad max size
3978 delete mGroup;
3979 mGroup = NULL;
3980 return ERROR_MALFORMED;
3981 }
3982
3983 mStarted = true;
3984
3985 return OK;
3986 }
3987
stop()3988 status_t MPEG4Source::stop() {
3989 Mutex::Autolock autoLock(mLock);
3990
3991 CHECK(mStarted);
3992
3993 if (mBuffer != NULL) {
3994 mBuffer->release();
3995 mBuffer = NULL;
3996 }
3997
3998 delete[] mSrcBuffer;
3999 mSrcBuffer = NULL;
4000
4001 delete mGroup;
4002 mGroup = NULL;
4003
4004 mStarted = false;
4005 mCurrentSampleIndex = 0;
4006
4007 return OK;
4008 }
4009
parseChunk(off64_t * offset)4010 status_t MPEG4Source::parseChunk(off64_t *offset) {
4011 uint32_t hdr[2];
4012 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4013 return ERROR_IO;
4014 }
4015 uint64_t chunk_size = ntohl(hdr[0]);
4016 uint32_t chunk_type = ntohl(hdr[1]);
4017 off64_t data_offset = *offset + 8;
4018
4019 if (chunk_size == 1) {
4020 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4021 return ERROR_IO;
4022 }
4023 chunk_size = ntoh64(chunk_size);
4024 data_offset += 8;
4025
4026 if (chunk_size < 16) {
4027 // The smallest valid chunk is 16 bytes long in this case.
4028 return ERROR_MALFORMED;
4029 }
4030 } else if (chunk_size < 8) {
4031 // The smallest valid chunk is 8 bytes long.
4032 return ERROR_MALFORMED;
4033 }
4034
4035 char chunk[5];
4036 MakeFourCCString(chunk_type, chunk);
4037 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4038
4039 off64_t chunk_data_size = *offset + chunk_size - data_offset;
4040
4041 switch(chunk_type) {
4042
4043 case FOURCC('t', 'r', 'a', 'f'):
4044 case FOURCC('m', 'o', 'o', 'f'): {
4045 off64_t stop_offset = *offset + chunk_size;
4046 *offset = data_offset;
4047 while (*offset < stop_offset) {
4048 status_t err = parseChunk(offset);
4049 if (err != OK) {
4050 return err;
4051 }
4052 }
4053 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4054 // *offset points to the box following this moof. Find the next moof from there.
4055
4056 while (true) {
4057 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4058 // no more box to the end of file.
4059 break;
4060 }
4061 chunk_size = ntohl(hdr[0]);
4062 chunk_type = ntohl(hdr[1]);
4063 if (chunk_size == 1) {
4064 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4065 // which is defined in 4.2 Object Structure.
4066 // When chunk_size==1, 8 bytes follows as "largesize".
4067 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4068 return ERROR_IO;
4069 }
4070 chunk_size = ntoh64(chunk_size);
4071 if (chunk_size < 16) {
4072 // The smallest valid chunk is 16 bytes long in this case.
4073 return ERROR_MALFORMED;
4074 }
4075 } else if (chunk_size == 0) {
4076 // next box extends to end of file.
4077 } else if (chunk_size < 8) {
4078 // The smallest valid chunk is 8 bytes long in this case.
4079 return ERROR_MALFORMED;
4080 }
4081
4082 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4083 mNextMoofOffset = *offset;
4084 break;
4085 } else if (chunk_size == 0) {
4086 break;
4087 }
4088 *offset += chunk_size;
4089 }
4090 }
4091 break;
4092 }
4093
4094 case FOURCC('t', 'f', 'h', 'd'): {
4095 status_t err;
4096 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4097 return err;
4098 }
4099 *offset += chunk_size;
4100 break;
4101 }
4102
4103 case FOURCC('t', 'r', 'u', 'n'): {
4104 status_t err;
4105 if (mLastParsedTrackId == mTrackId) {
4106 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4107 return err;
4108 }
4109 }
4110
4111 *offset += chunk_size;
4112 break;
4113 }
4114
4115 case FOURCC('s', 'a', 'i', 'z'): {
4116 status_t err;
4117 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4118 return err;
4119 }
4120 *offset += chunk_size;
4121 break;
4122 }
4123 case FOURCC('s', 'a', 'i', 'o'): {
4124 status_t err;
4125 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
4126 return err;
4127 }
4128 *offset += chunk_size;
4129 break;
4130 }
4131
4132 case FOURCC('s', 'e', 'n', 'c'): {
4133 status_t err;
4134 if ((err = parseSampleEncryption(data_offset)) != OK) {
4135 return err;
4136 }
4137 *offset += chunk_size;
4138 break;
4139 }
4140
4141 case FOURCC('m', 'd', 'a', 't'): {
4142 // parse DRM info if present
4143 ALOGV("MPEG4Source::parseChunk mdat");
4144 // if saiz/saoi was previously observed, do something with the sampleinfos
4145 *offset += chunk_size;
4146 break;
4147 }
4148
4149 default: {
4150 *offset += chunk_size;
4151 break;
4152 }
4153 }
4154 return OK;
4155 }
4156
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)4157 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4158 off64_t offset, off64_t /* size */) {
4159 ALOGV("parseSampleAuxiliaryInformationSizes");
4160 // 14496-12 8.7.12
4161 uint8_t version;
4162 if (mDataSource->readAt(
4163 offset, &version, sizeof(version))
4164 < (ssize_t)sizeof(version)) {
4165 return ERROR_IO;
4166 }
4167
4168 if (version != 0) {
4169 return ERROR_UNSUPPORTED;
4170 }
4171 offset++;
4172
4173 uint32_t flags;
4174 if (!mDataSource->getUInt24(offset, &flags)) {
4175 return ERROR_IO;
4176 }
4177 offset += 3;
4178
4179 if (flags & 1) {
4180 uint32_t tmp;
4181 if (!mDataSource->getUInt32(offset, &tmp)) {
4182 return ERROR_MALFORMED;
4183 }
4184 mCurrentAuxInfoType = tmp;
4185 offset += 4;
4186 if (!mDataSource->getUInt32(offset, &tmp)) {
4187 return ERROR_MALFORMED;
4188 }
4189 mCurrentAuxInfoTypeParameter = tmp;
4190 offset += 4;
4191 }
4192
4193 uint8_t defsize;
4194 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
4195 return ERROR_MALFORMED;
4196 }
4197 mCurrentDefaultSampleInfoSize = defsize;
4198 offset++;
4199
4200 uint32_t smplcnt;
4201 if (!mDataSource->getUInt32(offset, &smplcnt)) {
4202 return ERROR_MALFORMED;
4203 }
4204 mCurrentSampleInfoCount = smplcnt;
4205 offset += 4;
4206
4207 if (mCurrentDefaultSampleInfoSize != 0) {
4208 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
4209 return OK;
4210 }
4211 if (smplcnt > mCurrentSampleInfoAllocSize) {
4212 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
4213 if (newPtr == NULL) {
4214 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
4215 return NO_MEMORY;
4216 }
4217 mCurrentSampleInfoSizes = newPtr;
4218 mCurrentSampleInfoAllocSize = smplcnt;
4219 }
4220
4221 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
4222 return OK;
4223 }
4224
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)4225 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
4226 off64_t offset, off64_t /* size */) {
4227 ALOGV("parseSampleAuxiliaryInformationOffsets");
4228 // 14496-12 8.7.13
4229 uint8_t version;
4230 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
4231 return ERROR_IO;
4232 }
4233 offset++;
4234
4235 uint32_t flags;
4236 if (!mDataSource->getUInt24(offset, &flags)) {
4237 return ERROR_IO;
4238 }
4239 offset += 3;
4240
4241 uint32_t entrycount;
4242 if (!mDataSource->getUInt32(offset, &entrycount)) {
4243 return ERROR_IO;
4244 }
4245 offset += 4;
4246 if (entrycount == 0) {
4247 return OK;
4248 }
4249 if (entrycount > UINT32_MAX / 8) {
4250 return ERROR_MALFORMED;
4251 }
4252
4253 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
4254 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
4255 if (newPtr == NULL) {
4256 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
4257 return NO_MEMORY;
4258 }
4259 mCurrentSampleInfoOffsets = newPtr;
4260 mCurrentSampleInfoOffsetsAllocSize = entrycount;
4261 }
4262 mCurrentSampleInfoOffsetCount = entrycount;
4263
4264 if (mCurrentSampleInfoOffsets == NULL) {
4265 return OK;
4266 }
4267
4268 for (size_t i = 0; i < entrycount; i++) {
4269 if (version == 0) {
4270 uint32_t tmp;
4271 if (!mDataSource->getUInt32(offset, &tmp)) {
4272 return ERROR_IO;
4273 }
4274 mCurrentSampleInfoOffsets[i] = tmp;
4275 offset += 4;
4276 } else {
4277 uint64_t tmp;
4278 if (!mDataSource->getUInt64(offset, &tmp)) {
4279 return ERROR_IO;
4280 }
4281 mCurrentSampleInfoOffsets[i] = tmp;
4282 offset += 8;
4283 }
4284 }
4285
4286 // parse clear/encrypted data
4287
4288 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
4289
4290 drmoffset += mCurrentMoofOffset;
4291
4292 return parseClearEncryptedSizes(drmoffset, false, 0);
4293 }
4294
parseClearEncryptedSizes(off64_t offset,bool isSubsampleEncryption,uint32_t flags)4295 status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
4296
4297 int ivlength;
4298 CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
4299
4300 // only 0, 8 and 16 byte initialization vectors are supported
4301 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
4302 ALOGW("unsupported IV length: %d", ivlength);
4303 return ERROR_MALFORMED;
4304 }
4305
4306 uint32_t sampleCount = mCurrentSampleInfoCount;
4307 if (isSubsampleEncryption) {
4308 if (!mDataSource->getUInt32(offset, &sampleCount)) {
4309 return ERROR_IO;
4310 }
4311 offset += 4;
4312 }
4313
4314 // read CencSampleAuxiliaryDataFormats
4315 for (size_t i = 0; i < sampleCount; i++) {
4316 if (i >= mCurrentSamples.size()) {
4317 ALOGW("too few samples");
4318 break;
4319 }
4320 Sample *smpl = &mCurrentSamples.editItemAt(i);
4321 if (!smpl->clearsizes.isEmpty()) {
4322 continue;
4323 }
4324
4325 memset(smpl->iv, 0, 16);
4326 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
4327 return ERROR_IO;
4328 }
4329
4330 offset += ivlength;
4331
4332 bool readSubsamples;
4333 if (isSubsampleEncryption) {
4334 readSubsamples = flags & 2;
4335 } else {
4336 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
4337 if (smplinfosize == 0) {
4338 smplinfosize = mCurrentSampleInfoSizes[i];
4339 }
4340 readSubsamples = smplinfosize > ivlength;
4341 }
4342
4343 if (readSubsamples) {
4344 uint16_t numsubsamples;
4345 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
4346 return ERROR_IO;
4347 }
4348 offset += 2;
4349 for (size_t j = 0; j < numsubsamples; j++) {
4350 uint16_t numclear;
4351 uint32_t numencrypted;
4352 if (!mDataSource->getUInt16(offset, &numclear)) {
4353 return ERROR_IO;
4354 }
4355 offset += 2;
4356 if (!mDataSource->getUInt32(offset, &numencrypted)) {
4357 return ERROR_IO;
4358 }
4359 offset += 4;
4360 smpl->clearsizes.add(numclear);
4361 smpl->encryptedsizes.add(numencrypted);
4362 }
4363 } else {
4364 smpl->clearsizes.add(0);
4365 smpl->encryptedsizes.add(smpl->size);
4366 }
4367 }
4368
4369 return OK;
4370 }
4371
parseSampleEncryption(off64_t offset)4372 status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
4373 uint32_t flags;
4374 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4375 return ERROR_MALFORMED;
4376 }
4377 return parseClearEncryptedSizes(offset + 4, true, flags);
4378 }
4379
parseTrackFragmentHeader(off64_t offset,off64_t size)4380 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
4381
4382 if (size < 8) {
4383 return -EINVAL;
4384 }
4385
4386 uint32_t flags;
4387 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4388 return ERROR_MALFORMED;
4389 }
4390
4391 if (flags & 0xff000000) {
4392 return -EINVAL;
4393 }
4394
4395 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
4396 return ERROR_MALFORMED;
4397 }
4398
4399 if (mLastParsedTrackId != mTrackId) {
4400 // this is not the right track, skip it
4401 return OK;
4402 }
4403
4404 mTrackFragmentHeaderInfo.mFlags = flags;
4405 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
4406 offset += 8;
4407 size -= 8;
4408
4409 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
4410
4411 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
4412 if (size < 8) {
4413 return -EINVAL;
4414 }
4415
4416 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
4417 return ERROR_MALFORMED;
4418 }
4419 offset += 8;
4420 size -= 8;
4421 }
4422
4423 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
4424 if (size < 4) {
4425 return -EINVAL;
4426 }
4427
4428 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
4429 return ERROR_MALFORMED;
4430 }
4431 offset += 4;
4432 size -= 4;
4433 }
4434
4435 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4436 if (size < 4) {
4437 return -EINVAL;
4438 }
4439
4440 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
4441 return ERROR_MALFORMED;
4442 }
4443 offset += 4;
4444 size -= 4;
4445 }
4446
4447 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4448 if (size < 4) {
4449 return -EINVAL;
4450 }
4451
4452 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
4453 return ERROR_MALFORMED;
4454 }
4455 offset += 4;
4456 size -= 4;
4457 }
4458
4459 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4460 if (size < 4) {
4461 return -EINVAL;
4462 }
4463
4464 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
4465 return ERROR_MALFORMED;
4466 }
4467 offset += 4;
4468 size -= 4;
4469 }
4470
4471 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
4472 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
4473 }
4474
4475 mTrackFragmentHeaderInfo.mDataOffset = 0;
4476 return OK;
4477 }
4478
parseTrackFragmentRun(off64_t offset,off64_t size)4479 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
4480
4481 ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4482 if (size < 8) {
4483 return -EINVAL;
4484 }
4485
4486 enum {
4487 kDataOffsetPresent = 0x01,
4488 kFirstSampleFlagsPresent = 0x04,
4489 kSampleDurationPresent = 0x100,
4490 kSampleSizePresent = 0x200,
4491 kSampleFlagsPresent = 0x400,
4492 kSampleCompositionTimeOffsetPresent = 0x800,
4493 };
4494
4495 uint32_t flags;
4496 if (!mDataSource->getUInt32(offset, &flags)) {
4497 return ERROR_MALFORMED;
4498 }
4499 // |version| only affects SampleCompositionTimeOffset field.
4500 // If version == 0, SampleCompositionTimeOffset is uint32_t;
4501 // Otherwise, SampleCompositionTimeOffset is int32_t.
4502 // Sample.compositionOffset is defined as int32_t.
4503 uint8_t version = flags >> 24;
4504 flags &= 0xffffff;
4505 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
4506
4507 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4508 // These two shall not be used together.
4509 return -EINVAL;
4510 }
4511
4512 uint32_t sampleCount;
4513 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4514 return ERROR_MALFORMED;
4515 }
4516 offset += 8;
4517 size -= 8;
4518
4519 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4520
4521 uint32_t firstSampleFlags = 0;
4522
4523 if (flags & kDataOffsetPresent) {
4524 if (size < 4) {
4525 return -EINVAL;
4526 }
4527
4528 int32_t dataOffsetDelta;
4529 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4530 return ERROR_MALFORMED;
4531 }
4532
4533 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4534
4535 offset += 4;
4536 size -= 4;
4537 }
4538
4539 if (flags & kFirstSampleFlagsPresent) {
4540 if (size < 4) {
4541 return -EINVAL;
4542 }
4543
4544 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4545 return ERROR_MALFORMED;
4546 }
4547 offset += 4;
4548 size -= 4;
4549 }
4550
4551 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4552 sampleCtsOffset = 0;
4553
4554 size_t bytesPerSample = 0;
4555 if (flags & kSampleDurationPresent) {
4556 bytesPerSample += 4;
4557 } else if (mTrackFragmentHeaderInfo.mFlags
4558 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4559 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4560 } else if (mTrex) {
4561 sampleDuration = mTrex->default_sample_duration;
4562 }
4563
4564 if (flags & kSampleSizePresent) {
4565 bytesPerSample += 4;
4566 } else if (mTrackFragmentHeaderInfo.mFlags
4567 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4568 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4569 } else {
4570 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4571 }
4572
4573 if (flags & kSampleFlagsPresent) {
4574 bytesPerSample += 4;
4575 } else if (mTrackFragmentHeaderInfo.mFlags
4576 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4577 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4578 } else {
4579 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4580 }
4581
4582 if (flags & kSampleCompositionTimeOffsetPresent) {
4583 bytesPerSample += 4;
4584 } else {
4585 sampleCtsOffset = 0;
4586 }
4587
4588 if (size < (off64_t)(sampleCount * bytesPerSample)) {
4589 return -EINVAL;
4590 }
4591
4592 Sample tmp;
4593 for (uint32_t i = 0; i < sampleCount; ++i) {
4594 if (flags & kSampleDurationPresent) {
4595 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4596 return ERROR_MALFORMED;
4597 }
4598 offset += 4;
4599 }
4600
4601 if (flags & kSampleSizePresent) {
4602 if (!mDataSource->getUInt32(offset, &sampleSize)) {
4603 return ERROR_MALFORMED;
4604 }
4605 offset += 4;
4606 }
4607
4608 if (flags & kSampleFlagsPresent) {
4609 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4610 return ERROR_MALFORMED;
4611 }
4612 offset += 4;
4613 }
4614
4615 if (flags & kSampleCompositionTimeOffsetPresent) {
4616 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4617 return ERROR_MALFORMED;
4618 }
4619 offset += 4;
4620 }
4621
4622 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4623 " flags 0x%08x", i + 1,
4624 dataOffset, sampleSize, sampleDuration,
4625 (flags & kFirstSampleFlagsPresent) && i == 0
4626 ? firstSampleFlags : sampleFlags);
4627 tmp.offset = dataOffset;
4628 tmp.size = sampleSize;
4629 tmp.duration = sampleDuration;
4630 tmp.compositionOffset = sampleCtsOffset;
4631 memset(tmp.iv, 0, sizeof(tmp.iv));
4632 mCurrentSamples.add(tmp);
4633
4634 dataOffset += sampleSize;
4635 }
4636
4637 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4638
4639 return OK;
4640 }
4641
getFormat(MetaDataBase & meta)4642 status_t MPEG4Source::getFormat(MetaDataBase &meta) {
4643 Mutex::Autolock autoLock(mLock);
4644 meta = mFormat;
4645 return OK;
4646 }
4647
parseNALSize(const uint8_t * data) const4648 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4649 switch (mNALLengthSize) {
4650 case 1:
4651 return *data;
4652 case 2:
4653 return U16_AT(data);
4654 case 3:
4655 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4656 case 4:
4657 return U32_AT(data);
4658 }
4659
4660 // This cannot happen, mNALLengthSize springs to life by adding 1 to
4661 // a 2-bit integer.
4662 CHECK(!"Should not be here.");
4663
4664 return 0;
4665 }
4666
read(MediaBufferBase ** out,const ReadOptions * options)4667 status_t MPEG4Source::read(
4668 MediaBufferBase **out, const ReadOptions *options) {
4669 Mutex::Autolock autoLock(mLock);
4670
4671 CHECK(mStarted);
4672
4673 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
4674 *out = nullptr;
4675 return WOULD_BLOCK;
4676 }
4677
4678 if (mFirstMoofOffset > 0) {
4679 return fragmentedRead(out, options);
4680 }
4681
4682 *out = NULL;
4683
4684 int64_t targetSampleTimeUs = -1;
4685
4686 int64_t seekTimeUs;
4687 ReadOptions::SeekMode mode;
4688 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4689 if (mIsHeif) {
4690 CHECK(mSampleTable == NULL);
4691 CHECK(mItemTable != NULL);
4692 int32_t imageIndex;
4693 if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
4694 return ERROR_MALFORMED;
4695 }
4696
4697 status_t err;
4698 if (seekTimeUs >= 0) {
4699 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
4700 } else {
4701 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
4702 }
4703 if (err != OK) {
4704 return err;
4705 }
4706 } else {
4707 uint32_t findFlags = 0;
4708 switch (mode) {
4709 case ReadOptions::SEEK_PREVIOUS_SYNC:
4710 findFlags = SampleTable::kFlagBefore;
4711 break;
4712 case ReadOptions::SEEK_NEXT_SYNC:
4713 findFlags = SampleTable::kFlagAfter;
4714 break;
4715 case ReadOptions::SEEK_CLOSEST_SYNC:
4716 case ReadOptions::SEEK_CLOSEST:
4717 findFlags = SampleTable::kFlagClosest;
4718 break;
4719 case ReadOptions::SEEK_FRAME_INDEX:
4720 findFlags = SampleTable::kFlagFrameIndex;
4721 break;
4722 default:
4723 CHECK(!"Should not be here.");
4724 break;
4725 }
4726
4727 uint32_t sampleIndex;
4728 status_t err = mSampleTable->findSampleAtTime(
4729 seekTimeUs, 1000000, mTimescale,
4730 &sampleIndex, findFlags);
4731
4732 if (mode == ReadOptions::SEEK_CLOSEST
4733 || mode == ReadOptions::SEEK_FRAME_INDEX) {
4734 // We found the closest sample already, now we want the sync
4735 // sample preceding it (or the sample itself of course), even
4736 // if the subsequent sync sample is closer.
4737 findFlags = SampleTable::kFlagBefore;
4738 }
4739
4740 uint32_t syncSampleIndex;
4741 if (err == OK) {
4742 err = mSampleTable->findSyncSampleNear(
4743 sampleIndex, &syncSampleIndex, findFlags);
4744 }
4745
4746 uint32_t sampleTime;
4747 if (err == OK) {
4748 err = mSampleTable->getMetaDataForSample(
4749 sampleIndex, NULL, NULL, &sampleTime);
4750 }
4751
4752 if (err != OK) {
4753 if (err == ERROR_OUT_OF_RANGE) {
4754 // An attempt to seek past the end of the stream would
4755 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4756 // this all the way to the MediaPlayer would cause abnormal
4757 // termination. Legacy behaviour appears to be to behave as if
4758 // we had seeked to the end of stream, ending normally.
4759 err = ERROR_END_OF_STREAM;
4760 }
4761 ALOGV("end of stream");
4762 return err;
4763 }
4764
4765 if (mode == ReadOptions::SEEK_CLOSEST
4766 || mode == ReadOptions::SEEK_FRAME_INDEX) {
4767 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4768 }
4769
4770 #if 0
4771 uint32_t syncSampleTime;
4772 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4773 syncSampleIndex, NULL, NULL, &syncSampleTime));
4774
4775 ALOGI("seek to time %lld us => sample at time %lld us, "
4776 "sync sample at time %lld us",
4777 seekTimeUs,
4778 sampleTime * 1000000ll / mTimescale,
4779 syncSampleTime * 1000000ll / mTimescale);
4780 #endif
4781
4782 mCurrentSampleIndex = syncSampleIndex;
4783 }
4784
4785 if (mBuffer != NULL) {
4786 mBuffer->release();
4787 mBuffer = NULL;
4788 }
4789
4790 // fall through
4791 }
4792
4793 off64_t offset = 0;
4794 size_t size = 0;
4795 uint32_t cts, stts;
4796 bool isSyncSample;
4797 bool newBuffer = false;
4798 if (mBuffer == NULL) {
4799 newBuffer = true;
4800
4801 status_t err;
4802 if (!mIsHeif) {
4803 err = mSampleTable->getMetaDataForSample(
4804 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4805 } else {
4806 err = mItemTable->getImageOffsetAndSize(
4807 options && options->getSeekTo(&seekTimeUs, &mode) ?
4808 &mCurrentSampleIndex : NULL, &offset, &size);
4809
4810 cts = stts = 0;
4811 isSyncSample = 0;
4812 ALOGV("image offset %lld, size %zu", (long long)offset, size);
4813 }
4814
4815 if (err != OK) {
4816 return err;
4817 }
4818
4819 err = mGroup->acquire_buffer(&mBuffer);
4820
4821 if (err != OK) {
4822 CHECK(mBuffer == NULL);
4823 return err;
4824 }
4825 if (size > mBuffer->size()) {
4826 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4827 mBuffer->release();
4828 mBuffer = NULL;
4829 return ERROR_BUFFER_TOO_SMALL;
4830 }
4831 }
4832
4833 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4834 if (newBuffer) {
4835 ssize_t num_bytes_read =
4836 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4837
4838 if (num_bytes_read < (ssize_t)size) {
4839 mBuffer->release();
4840 mBuffer = NULL;
4841
4842 return ERROR_IO;
4843 }
4844
4845 CHECK(mBuffer != NULL);
4846 mBuffer->set_range(0, size);
4847 mBuffer->meta_data().clear();
4848 mBuffer->meta_data().setInt64(
4849 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4850 mBuffer->meta_data().setInt64(
4851 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4852
4853 if (targetSampleTimeUs >= 0) {
4854 mBuffer->meta_data().setInt64(
4855 kKeyTargetTime, targetSampleTimeUs);
4856 }
4857
4858 if (isSyncSample) {
4859 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4860 }
4861
4862 ++mCurrentSampleIndex;
4863 }
4864
4865 if (!mIsAVC && !mIsHEVC) {
4866 *out = mBuffer;
4867 mBuffer = NULL;
4868
4869 return OK;
4870 }
4871
4872 // Each NAL unit is split up into its constituent fragments and
4873 // each one of them returned in its own buffer.
4874
4875 CHECK(mBuffer->range_length() >= mNALLengthSize);
4876
4877 const uint8_t *src =
4878 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4879
4880 size_t nal_size = parseNALSize(src);
4881 if (mNALLengthSize > SIZE_MAX - nal_size) {
4882 ALOGE("b/24441553, b/24445122");
4883 }
4884 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4885 ALOGE("incomplete NAL unit.");
4886
4887 mBuffer->release();
4888 mBuffer = NULL;
4889
4890 return ERROR_MALFORMED;
4891 }
4892
4893 MediaBufferBase *clone = mBuffer->clone();
4894 CHECK(clone != NULL);
4895 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4896
4897 CHECK(mBuffer != NULL);
4898 mBuffer->set_range(
4899 mBuffer->range_offset() + mNALLengthSize + nal_size,
4900 mBuffer->range_length() - mNALLengthSize - nal_size);
4901
4902 if (mBuffer->range_length() == 0) {
4903 mBuffer->release();
4904 mBuffer = NULL;
4905 }
4906
4907 *out = clone;
4908
4909 return OK;
4910 } else {
4911 // Whole NAL units are returned but each fragment is prefixed by
4912 // the start code (0x00 00 00 01).
4913 ssize_t num_bytes_read = 0;
4914 int32_t drm = 0;
4915 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
4916 if (usesDRM) {
4917 num_bytes_read =
4918 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4919 } else {
4920 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4921 }
4922
4923 if (num_bytes_read < (ssize_t)size) {
4924 mBuffer->release();
4925 mBuffer = NULL;
4926
4927 return ERROR_IO;
4928 }
4929
4930 if (usesDRM) {
4931 CHECK(mBuffer != NULL);
4932 mBuffer->set_range(0, size);
4933
4934 } else {
4935 uint8_t *dstData = (uint8_t *)mBuffer->data();
4936 size_t srcOffset = 0;
4937 size_t dstOffset = 0;
4938
4939 while (srcOffset < size) {
4940 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4941 size_t nalLength = 0;
4942 if (!isMalFormed) {
4943 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4944 srcOffset += mNALLengthSize;
4945 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4946 }
4947
4948 if (isMalFormed) {
4949 ALOGE("Video is malformed");
4950 mBuffer->release();
4951 mBuffer = NULL;
4952 return ERROR_MALFORMED;
4953 }
4954
4955 if (nalLength == 0) {
4956 continue;
4957 }
4958
4959 if (dstOffset > SIZE_MAX - 4 ||
4960 dstOffset + 4 > SIZE_MAX - nalLength ||
4961 dstOffset + 4 + nalLength > mBuffer->size()) {
4962 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4963 android_errorWriteLog(0x534e4554, "27208621");
4964 mBuffer->release();
4965 mBuffer = NULL;
4966 return ERROR_MALFORMED;
4967 }
4968
4969 dstData[dstOffset++] = 0;
4970 dstData[dstOffset++] = 0;
4971 dstData[dstOffset++] = 0;
4972 dstData[dstOffset++] = 1;
4973 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4974 srcOffset += nalLength;
4975 dstOffset += nalLength;
4976 }
4977 CHECK_EQ(srcOffset, size);
4978 CHECK(mBuffer != NULL);
4979 mBuffer->set_range(0, dstOffset);
4980 }
4981
4982 mBuffer->meta_data().clear();
4983 mBuffer->meta_data().setInt64(
4984 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4985 mBuffer->meta_data().setInt64(
4986 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4987
4988 if (targetSampleTimeUs >= 0) {
4989 mBuffer->meta_data().setInt64(
4990 kKeyTargetTime, targetSampleTimeUs);
4991 }
4992
4993 if (mIsAVC) {
4994 uint32_t layerId = FindAVCLayerId(
4995 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
4996 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
4997 }
4998
4999 if (isSyncSample) {
5000 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5001 }
5002
5003 ++mCurrentSampleIndex;
5004
5005 *out = mBuffer;
5006 mBuffer = NULL;
5007
5008 return OK;
5009 }
5010 }
5011
fragmentedRead(MediaBufferBase ** out,const ReadOptions * options)5012 status_t MPEG4Source::fragmentedRead(
5013 MediaBufferBase **out, const ReadOptions *options) {
5014
5015 ALOGV("MPEG4Source::fragmentedRead");
5016
5017 CHECK(mStarted);
5018
5019 *out = NULL;
5020
5021 int64_t targetSampleTimeUs = -1;
5022
5023 int64_t seekTimeUs;
5024 ReadOptions::SeekMode mode;
5025 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5026
5027 int numSidxEntries = mSegments.size();
5028 if (numSidxEntries != 0) {
5029 int64_t totalTime = 0;
5030 off64_t totalOffset = mFirstMoofOffset;
5031 for (int i = 0; i < numSidxEntries; i++) {
5032 const SidxEntry *se = &mSegments[i];
5033 if (totalTime + se->mDurationUs > seekTimeUs) {
5034 // The requested time is somewhere in this segment
5035 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
5036 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
5037 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
5038 // requested next sync, or closest sync and it was closer to the end of
5039 // this segment
5040 totalTime += se->mDurationUs;
5041 totalOffset += se->mSize;
5042 }
5043 break;
5044 }
5045 totalTime += se->mDurationUs;
5046 totalOffset += se->mSize;
5047 }
5048 mCurrentMoofOffset = totalOffset;
5049 mNextMoofOffset = -1;
5050 mCurrentSamples.clear();
5051 mCurrentSampleIndex = 0;
5052 status_t err = parseChunk(&totalOffset);
5053 if (err != OK) {
5054 return err;
5055 }
5056 mCurrentTime = totalTime * mTimescale / 1000000ll;
5057 } else {
5058 // without sidx boxes, we can only seek to 0
5059 mCurrentMoofOffset = mFirstMoofOffset;
5060 mNextMoofOffset = -1;
5061 mCurrentSamples.clear();
5062 mCurrentSampleIndex = 0;
5063 off64_t tmp = mCurrentMoofOffset;
5064 status_t err = parseChunk(&tmp);
5065 if (err != OK) {
5066 return err;
5067 }
5068 mCurrentTime = 0;
5069 }
5070
5071 if (mBuffer != NULL) {
5072 mBuffer->release();
5073 mBuffer = NULL;
5074 }
5075
5076 // fall through
5077 }
5078
5079 off64_t offset = 0;
5080 size_t size = 0;
5081 uint32_t cts = 0;
5082 bool isSyncSample = false;
5083 bool newBuffer = false;
5084 if (mBuffer == NULL) {
5085 newBuffer = true;
5086
5087 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5088 // move to next fragment if there is one
5089 if (mNextMoofOffset <= mCurrentMoofOffset) {
5090 return ERROR_END_OF_STREAM;
5091 }
5092 off64_t nextMoof = mNextMoofOffset;
5093 mCurrentMoofOffset = nextMoof;
5094 mCurrentSamples.clear();
5095 mCurrentSampleIndex = 0;
5096 status_t err = parseChunk(&nextMoof);
5097 if (err != OK) {
5098 return err;
5099 }
5100 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5101 return ERROR_END_OF_STREAM;
5102 }
5103 }
5104
5105 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5106 offset = smpl->offset;
5107 size = smpl->size;
5108 cts = mCurrentTime + smpl->compositionOffset;
5109 mCurrentTime += smpl->duration;
5110 isSyncSample = (mCurrentSampleIndex == 0); // XXX
5111
5112 status_t err = mGroup->acquire_buffer(&mBuffer);
5113
5114 if (err != OK) {
5115 CHECK(mBuffer == NULL);
5116 ALOGV("acquire_buffer returned %d", err);
5117 return err;
5118 }
5119 if (size > mBuffer->size()) {
5120 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5121 mBuffer->release();
5122 mBuffer = NULL;
5123 return ERROR_BUFFER_TOO_SMALL;
5124 }
5125 }
5126
5127 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5128 MetaDataBase &bufmeta = mBuffer->meta_data();
5129 bufmeta.clear();
5130 if (smpl->encryptedsizes.size()) {
5131 // store clear/encrypted lengths in metadata
5132 bufmeta.setData(kKeyPlainSizes, 0,
5133 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
5134 bufmeta.setData(kKeyEncryptedSizes, 0,
5135 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
5136 bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
5137 bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
5138 bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
5139 bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock);
5140 bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock);
5141
5142 uint32_t type = 0;
5143 const void *iv = NULL;
5144 size_t ivlength = 0;
5145 if (!mFormat.findData(
5146 kKeyCryptoIV, &type, &iv, &ivlength)) {
5147 iv = smpl->iv;
5148 ivlength = 16; // use 16 or the actual size?
5149 }
5150 bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength);
5151
5152 }
5153
5154 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
5155 if (newBuffer) {
5156 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
5157 mBuffer->release();
5158 mBuffer = NULL;
5159
5160 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
5161 return ERROR_MALFORMED;
5162 }
5163
5164 ssize_t num_bytes_read =
5165 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5166
5167 if (num_bytes_read < (ssize_t)size) {
5168 mBuffer->release();
5169 mBuffer = NULL;
5170
5171 ALOGE("i/o error");
5172 return ERROR_IO;
5173 }
5174
5175 CHECK(mBuffer != NULL);
5176 mBuffer->set_range(0, size);
5177 mBuffer->meta_data().setInt64(
5178 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5179 mBuffer->meta_data().setInt64(
5180 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5181
5182 if (targetSampleTimeUs >= 0) {
5183 mBuffer->meta_data().setInt64(
5184 kKeyTargetTime, targetSampleTimeUs);
5185 }
5186
5187 if (mIsAVC) {
5188 uint32_t layerId = FindAVCLayerId(
5189 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5190 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
5191 }
5192
5193 if (isSyncSample) {
5194 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5195 }
5196
5197 ++mCurrentSampleIndex;
5198 }
5199
5200 if (!mIsAVC && !mIsHEVC) {
5201 *out = mBuffer;
5202 mBuffer = NULL;
5203
5204 return OK;
5205 }
5206
5207 // Each NAL unit is split up into its constituent fragments and
5208 // each one of them returned in its own buffer.
5209
5210 CHECK(mBuffer->range_length() >= mNALLengthSize);
5211
5212 const uint8_t *src =
5213 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
5214
5215 size_t nal_size = parseNALSize(src);
5216 if (mNALLengthSize > SIZE_MAX - nal_size) {
5217 ALOGE("b/24441553, b/24445122");
5218 }
5219
5220 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
5221 ALOGE("incomplete NAL unit.");
5222
5223 mBuffer->release();
5224 mBuffer = NULL;
5225
5226 return ERROR_MALFORMED;
5227 }
5228
5229 MediaBufferBase *clone = mBuffer->clone();
5230 CHECK(clone != NULL);
5231 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
5232
5233 CHECK(mBuffer != NULL);
5234 mBuffer->set_range(
5235 mBuffer->range_offset() + mNALLengthSize + nal_size,
5236 mBuffer->range_length() - mNALLengthSize - nal_size);
5237
5238 if (mBuffer->range_length() == 0) {
5239 mBuffer->release();
5240 mBuffer = NULL;
5241 }
5242
5243 *out = clone;
5244
5245 return OK;
5246 } else {
5247 ALOGV("whole NAL");
5248 // Whole NAL units are returned but each fragment is prefixed by
5249 // the start code (0x00 00 00 01).
5250 ssize_t num_bytes_read = 0;
5251 int32_t drm = 0;
5252 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
5253 void *data = NULL;
5254 bool isMalFormed = false;
5255 if (usesDRM) {
5256 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
5257 isMalFormed = true;
5258 } else {
5259 data = mBuffer->data();
5260 }
5261 } else {
5262 int32_t max_size;
5263 if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
5264 || !isInRange((size_t)0u, (size_t)max_size, size)) {
5265 isMalFormed = true;
5266 } else {
5267 data = mSrcBuffer;
5268 }
5269 }
5270
5271 if (isMalFormed || data == NULL) {
5272 ALOGE("isMalFormed size %zu", size);
5273 if (mBuffer != NULL) {
5274 mBuffer->release();
5275 mBuffer = NULL;
5276 }
5277 return ERROR_MALFORMED;
5278 }
5279 num_bytes_read = mDataSource->readAt(offset, data, size);
5280
5281 if (num_bytes_read < (ssize_t)size) {
5282 mBuffer->release();
5283 mBuffer = NULL;
5284
5285 ALOGE("i/o error");
5286 return ERROR_IO;
5287 }
5288
5289 if (usesDRM) {
5290 CHECK(mBuffer != NULL);
5291 mBuffer->set_range(0, size);
5292
5293 } else {
5294 uint8_t *dstData = (uint8_t *)mBuffer->data();
5295 size_t srcOffset = 0;
5296 size_t dstOffset = 0;
5297
5298 while (srcOffset < size) {
5299 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5300 size_t nalLength = 0;
5301 if (!isMalFormed) {
5302 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5303 srcOffset += mNALLengthSize;
5304 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
5305 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
5306 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
5307 }
5308
5309 if (isMalFormed) {
5310 ALOGE("Video is malformed; nalLength %zu", nalLength);
5311 mBuffer->release();
5312 mBuffer = NULL;
5313 return ERROR_MALFORMED;
5314 }
5315
5316 if (nalLength == 0) {
5317 continue;
5318 }
5319
5320 if (dstOffset > SIZE_MAX - 4 ||
5321 dstOffset + 4 > SIZE_MAX - nalLength ||
5322 dstOffset + 4 + nalLength > mBuffer->size()) {
5323 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
5324 android_errorWriteLog(0x534e4554, "26365349");
5325 mBuffer->release();
5326 mBuffer = NULL;
5327 return ERROR_MALFORMED;
5328 }
5329
5330 dstData[dstOffset++] = 0;
5331 dstData[dstOffset++] = 0;
5332 dstData[dstOffset++] = 0;
5333 dstData[dstOffset++] = 1;
5334 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5335 srcOffset += nalLength;
5336 dstOffset += nalLength;
5337 }
5338 CHECK_EQ(srcOffset, size);
5339 CHECK(mBuffer != NULL);
5340 mBuffer->set_range(0, dstOffset);
5341 }
5342
5343 mBuffer->meta_data().setInt64(
5344 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5345 mBuffer->meta_data().setInt64(
5346 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5347
5348 if (targetSampleTimeUs >= 0) {
5349 mBuffer->meta_data().setInt64(
5350 kKeyTargetTime, targetSampleTimeUs);
5351 }
5352
5353 if (isSyncSample) {
5354 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5355 }
5356
5357 ++mCurrentSampleIndex;
5358
5359 *out = mBuffer;
5360 mBuffer = NULL;
5361
5362 return OK;
5363 }
5364 }
5365
findTrackByMimePrefix(const char * mimePrefix)5366 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
5367 const char *mimePrefix) {
5368 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
5369 const char *mime;
5370 if (track->meta.findCString(kKeyMIMEType, &mime)
5371 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
5372 return track;
5373 }
5374 }
5375
5376 return NULL;
5377 }
5378
LegacySniffMPEG4(DataSourceBase * source,float * confidence)5379 static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
5380 uint8_t header[8];
5381
5382 ssize_t n = source->readAt(4, header, sizeof(header));
5383 if (n < (ssize_t)sizeof(header)) {
5384 return false;
5385 }
5386
5387 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
5388 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
5389 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
5390 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
5391 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
5392 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
5393 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
5394 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
5395 *confidence = 0.4;
5396
5397 return true;
5398 }
5399
5400 return false;
5401 }
5402
isCompatibleBrand(uint32_t fourcc)5403 static bool isCompatibleBrand(uint32_t fourcc) {
5404 static const uint32_t kCompatibleBrands[] = {
5405 FOURCC('i', 's', 'o', 'm'),
5406 FOURCC('i', 's', 'o', '2'),
5407 FOURCC('a', 'v', 'c', '1'),
5408 FOURCC('h', 'v', 'c', '1'),
5409 FOURCC('h', 'e', 'v', '1'),
5410 FOURCC('3', 'g', 'p', '4'),
5411 FOURCC('m', 'p', '4', '1'),
5412 FOURCC('m', 'p', '4', '2'),
5413 FOURCC('d', 'a', 's', 'h'),
5414
5415 // Won't promise that the following file types can be played.
5416 // Just give these file types a chance.
5417 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime
5418 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP
5419
5420 FOURCC('3', 'g', '2', 'a'), // 3GPP2
5421 FOURCC('3', 'g', '2', 'b'),
5422 FOURCC('m', 'i', 'f', '1'), // HEIF image
5423 FOURCC('h', 'e', 'i', 'c'), // HEIF image
5424 FOURCC('m', 's', 'f', '1'), // HEIF image sequence
5425 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence
5426 };
5427
5428 for (size_t i = 0;
5429 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
5430 ++i) {
5431 if (kCompatibleBrands[i] == fourcc) {
5432 return true;
5433 }
5434 }
5435
5436 return false;
5437 }
5438
5439 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
5440 // compatible brand is present.
5441 // Also try to identify where this file's metadata ends
5442 // (end of the 'moov' atom) and report it to the caller as part of
5443 // the metadata.
BetterSniffMPEG4(DataSourceBase * source,float * confidence)5444 static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
5445 // We scan up to 128 bytes to identify this file as an MP4.
5446 static const off64_t kMaxScanOffset = 128ll;
5447
5448 off64_t offset = 0ll;
5449 bool foundGoodFileType = false;
5450 off64_t moovAtomEndOffset = -1ll;
5451 bool done = false;
5452
5453 while (!done && offset < kMaxScanOffset) {
5454 uint32_t hdr[2];
5455 if (source->readAt(offset, hdr, 8) < 8) {
5456 return false;
5457 }
5458
5459 uint64_t chunkSize = ntohl(hdr[0]);
5460 uint32_t chunkType = ntohl(hdr[1]);
5461 off64_t chunkDataOffset = offset + 8;
5462
5463 if (chunkSize == 1) {
5464 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
5465 return false;
5466 }
5467
5468 chunkSize = ntoh64(chunkSize);
5469 chunkDataOffset += 8;
5470
5471 if (chunkSize < 16) {
5472 // The smallest valid chunk is 16 bytes long in this case.
5473 return false;
5474 }
5475
5476 } else if (chunkSize < 8) {
5477 // The smallest valid chunk is 8 bytes long.
5478 return false;
5479 }
5480
5481 // (data_offset - offset) is either 8 or 16
5482 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
5483 if (chunkDataSize < 0) {
5484 ALOGE("b/23540914");
5485 return false;
5486 }
5487
5488 char chunkstring[5];
5489 MakeFourCCString(chunkType, chunkstring);
5490 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
5491 switch (chunkType) {
5492 case FOURCC('f', 't', 'y', 'p'):
5493 {
5494 if (chunkDataSize < 8) {
5495 return false;
5496 }
5497
5498 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
5499 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
5500 if (i == 1) {
5501 // Skip this index, it refers to the minorVersion,
5502 // not a brand.
5503 continue;
5504 }
5505
5506 uint32_t brand;
5507 if (source->readAt(
5508 chunkDataOffset + 4 * i, &brand, 4) < 4) {
5509 return false;
5510 }
5511
5512 brand = ntohl(brand);
5513
5514 if (isCompatibleBrand(brand)) {
5515 foundGoodFileType = true;
5516 break;
5517 }
5518 }
5519
5520 if (!foundGoodFileType) {
5521 return false;
5522 }
5523
5524 break;
5525 }
5526
5527 case FOURCC('m', 'o', 'o', 'v'):
5528 {
5529 moovAtomEndOffset = offset + chunkSize;
5530
5531 done = true;
5532 break;
5533 }
5534
5535 default:
5536 break;
5537 }
5538
5539 offset += chunkSize;
5540 }
5541
5542 if (!foundGoodFileType) {
5543 return false;
5544 }
5545
5546 *confidence = 0.4f;
5547
5548 return true;
5549 }
5550
CreateExtractor(DataSourceBase * source,void *)5551 static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
5552 return new MPEG4Extractor(source);
5553 }
5554
Sniff(DataSourceBase * source,float * confidence,void **,MediaExtractor::FreeMetaFunc *)5555 static MediaExtractor::CreatorFunc Sniff(
5556 DataSourceBase *source, float *confidence, void **,
5557 MediaExtractor::FreeMetaFunc *) {
5558 if (BetterSniffMPEG4(source, confidence)) {
5559 return CreateExtractor;
5560 }
5561
5562 if (LegacySniffMPEG4(source, confidence)) {
5563 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5564 return CreateExtractor;
5565 }
5566
5567 return NULL;
5568 }
5569
5570 extern "C" {
5571 // This is the only symbol that needs to be exported
5572 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()5573 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
5574 return {
5575 MediaExtractor::EXTRACTORDEF_VERSION,
5576 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
5577 1, // version
5578 "MP4 Extractor",
5579 Sniff
5580 };
5581 }
5582
5583 } // extern "C"
5584
5585 } // namespace android
5586