1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include <utils/Log.h>
27
28 #include "include/MPEG4Extractor.h"
29 #include "include/SampleTable.h"
30 #include "include/ESDS.h"
31
32 #include <media/stagefright/foundation/ABitReader.h>
33 #include <media/stagefright/foundation/ABuffer.h>
34 #include <media/stagefright/foundation/ADebug.h>
35 #include <media/stagefright/foundation/AMessage.h>
36 #include <media/stagefright/foundation/AUtils.h>
37 #include <media/stagefright/foundation/ColorUtils.h>
38 #include <media/stagefright/MediaBuffer.h>
39 #include <media/stagefright/MediaBufferGroup.h>
40 #include <media/stagefright/MediaDefs.h>
41 #include <media/stagefright/MediaSource.h>
42 #include <media/stagefright/MetaData.h>
43 #include <utils/String8.h>
44
45 #include <byteswap.h>
46 #include "include/ID3.h"
47
48 #ifndef UINT32_MAX
49 #define UINT32_MAX (4294967295U)
50 #endif
51
52 namespace android {
53
54 enum {
55 // max track header chunk to return
56 kMaxTrackHeaderSize = 32,
57 };
58
59 class MPEG4Source : public MediaSource {
60 public:
61 // Caller retains ownership of both "dataSource" and "sampleTable".
62 MPEG4Source(const sp<MPEG4Extractor> &owner,
63 const sp<MetaData> &format,
64 const sp<DataSource> &dataSource,
65 int32_t timeScale,
66 const sp<SampleTable> &sampleTable,
67 Vector<SidxEntry> &sidx,
68 const Trex *trex,
69 off64_t firstMoofOffset);
70
71 virtual status_t start(MetaData *params = NULL);
72 virtual status_t stop();
73
74 virtual sp<MetaData> getFormat();
75
76 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
77 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
78
79 protected:
80 virtual ~MPEG4Source();
81
82 private:
83 Mutex mLock;
84
85 // keep the MPEG4Extractor around, since we're referencing its data
86 sp<MPEG4Extractor> mOwner;
87 sp<MetaData> mFormat;
88 sp<DataSource> mDataSource;
89 int32_t mTimescale;
90 sp<SampleTable> mSampleTable;
91 uint32_t mCurrentSampleIndex;
92 uint32_t mCurrentFragmentIndex;
93 Vector<SidxEntry> &mSegments;
94 const Trex *mTrex;
95 off64_t mFirstMoofOffset;
96 off64_t mCurrentMoofOffset;
97 off64_t mNextMoofOffset;
98 uint32_t mCurrentTime;
99 int32_t mLastParsedTrackId;
100 int32_t mTrackId;
101
102 int32_t mCryptoMode; // passed in from extractor
103 int32_t mDefaultIVSize; // passed in from extractor
104 uint8_t mCryptoKey[16]; // passed in from extractor
105 uint32_t mCurrentAuxInfoType;
106 uint32_t mCurrentAuxInfoTypeParameter;
107 int32_t mCurrentDefaultSampleInfoSize;
108 uint32_t mCurrentSampleInfoCount;
109 uint32_t mCurrentSampleInfoAllocSize;
110 uint8_t* mCurrentSampleInfoSizes;
111 uint32_t mCurrentSampleInfoOffsetCount;
112 uint32_t mCurrentSampleInfoOffsetsAllocSize;
113 uint64_t* mCurrentSampleInfoOffsets;
114
115 bool mIsAVC;
116 bool mIsHEVC;
117 size_t mNALLengthSize;
118
119 bool mStarted;
120
121 MediaBufferGroup *mGroup;
122
123 MediaBuffer *mBuffer;
124
125 bool mWantsNALFragments;
126
127 uint8_t *mSrcBuffer;
128
129 size_t parseNALSize(const uint8_t *data) const;
130 status_t parseChunk(off64_t *offset);
131 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
132 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
133 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
134 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
135
136 struct TrackFragmentHeaderInfo {
137 enum Flags {
138 kBaseDataOffsetPresent = 0x01,
139 kSampleDescriptionIndexPresent = 0x02,
140 kDefaultSampleDurationPresent = 0x08,
141 kDefaultSampleSizePresent = 0x10,
142 kDefaultSampleFlagsPresent = 0x20,
143 kDurationIsEmpty = 0x10000,
144 };
145
146 uint32_t mTrackID;
147 uint32_t mFlags;
148 uint64_t mBaseDataOffset;
149 uint32_t mSampleDescriptionIndex;
150 uint32_t mDefaultSampleDuration;
151 uint32_t mDefaultSampleSize;
152 uint32_t mDefaultSampleFlags;
153
154 uint64_t mDataOffset;
155 };
156 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
157
158 struct Sample {
159 off64_t offset;
160 size_t size;
161 uint32_t duration;
162 int32_t compositionOffset;
163 uint8_t iv[16];
164 Vector<size_t> clearsizes;
165 Vector<size_t> encryptedsizes;
166 };
167 Vector<Sample> mCurrentSamples;
168
169 MPEG4Source(const MPEG4Source &);
170 MPEG4Source &operator=(const MPEG4Source &);
171 };
172
173 // This custom data source wraps an existing one and satisfies requests
174 // falling entirely within a cached range from the cache while forwarding
175 // all remaining requests to the wrapped datasource.
176 // This is used to cache the full sampletable metadata for a single track,
177 // possibly wrapping multiple times to cover all tracks, i.e.
178 // Each MPEG4DataSource caches the sampletable metadata for a single track.
179
180 struct MPEG4DataSource : public DataSource {
181 MPEG4DataSource(const sp<DataSource> &source);
182
183 virtual status_t initCheck() const;
184 virtual ssize_t readAt(off64_t offset, void *data, size_t size);
185 virtual status_t getSize(off64_t *size);
186 virtual uint32_t flags();
187
188 status_t setCachedRange(off64_t offset, size_t size);
189
190 protected:
191 virtual ~MPEG4DataSource();
192
193 private:
194 Mutex mLock;
195
196 sp<DataSource> mSource;
197 off64_t mCachedOffset;
198 size_t mCachedSize;
199 uint8_t *mCache;
200
201 void clearCache();
202
203 MPEG4DataSource(const MPEG4DataSource &);
204 MPEG4DataSource &operator=(const MPEG4DataSource &);
205 };
206
MPEG4DataSource(const sp<DataSource> & source)207 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
208 : mSource(source),
209 mCachedOffset(0),
210 mCachedSize(0),
211 mCache(NULL) {
212 }
213
~MPEG4DataSource()214 MPEG4DataSource::~MPEG4DataSource() {
215 clearCache();
216 }
217
clearCache()218 void MPEG4DataSource::clearCache() {
219 if (mCache) {
220 free(mCache);
221 mCache = NULL;
222 }
223
224 mCachedOffset = 0;
225 mCachedSize = 0;
226 }
227
initCheck() const228 status_t MPEG4DataSource::initCheck() const {
229 return mSource->initCheck();
230 }
231
readAt(off64_t offset,void * data,size_t size)232 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
233 Mutex::Autolock autoLock(mLock);
234
235 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
236 memcpy(data, &mCache[offset - mCachedOffset], size);
237 return size;
238 }
239
240 return mSource->readAt(offset, data, size);
241 }
242
getSize(off64_t * size)243 status_t MPEG4DataSource::getSize(off64_t *size) {
244 return mSource->getSize(size);
245 }
246
flags()247 uint32_t MPEG4DataSource::flags() {
248 return mSource->flags();
249 }
250
setCachedRange(off64_t offset,size_t size)251 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
252 Mutex::Autolock autoLock(mLock);
253
254 clearCache();
255
256 mCache = (uint8_t *)malloc(size);
257
258 if (mCache == NULL) {
259 return -ENOMEM;
260 }
261
262 mCachedOffset = offset;
263 mCachedSize = size;
264
265 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
266
267 if (err < (ssize_t)size) {
268 clearCache();
269
270 return ERROR_IO;
271 }
272
273 return OK;
274 }
275
276 ////////////////////////////////////////////////////////////////////////////////
277
278 static const bool kUseHexDump = false;
279
hexdump(const void * _data,size_t size)280 static void hexdump(const void *_data, size_t size) {
281 const uint8_t *data = (const uint8_t *)_data;
282 size_t offset = 0;
283 while (offset < size) {
284 printf("0x%04zx ", offset);
285
286 size_t n = size - offset;
287 if (n > 16) {
288 n = 16;
289 }
290
291 for (size_t i = 0; i < 16; ++i) {
292 if (i == 8) {
293 printf(" ");
294 }
295
296 if (offset + i < size) {
297 printf("%02x ", data[offset + i]);
298 } else {
299 printf(" ");
300 }
301 }
302
303 printf(" ");
304
305 for (size_t i = 0; i < n; ++i) {
306 if (isprint(data[offset + i])) {
307 printf("%c", data[offset + i]);
308 } else {
309 printf(".");
310 }
311 }
312
313 printf("\n");
314
315 offset += 16;
316 }
317 }
318
FourCC2MIME(uint32_t fourcc)319 static const char *FourCC2MIME(uint32_t fourcc) {
320 switch (fourcc) {
321 case FOURCC('m', 'p', '4', 'a'):
322 return MEDIA_MIMETYPE_AUDIO_AAC;
323
324 case FOURCC('s', 'a', 'm', 'r'):
325 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
326
327 case FOURCC('s', 'a', 'w', 'b'):
328 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
329
330 case FOURCC('m', 'p', '4', 'v'):
331 return MEDIA_MIMETYPE_VIDEO_MPEG4;
332
333 case FOURCC('s', '2', '6', '3'):
334 case FOURCC('h', '2', '6', '3'):
335 case FOURCC('H', '2', '6', '3'):
336 return MEDIA_MIMETYPE_VIDEO_H263;
337
338 case FOURCC('a', 'v', 'c', '1'):
339 return MEDIA_MIMETYPE_VIDEO_AVC;
340
341 case FOURCC('h', 'v', 'c', '1'):
342 case FOURCC('h', 'e', 'v', '1'):
343 return MEDIA_MIMETYPE_VIDEO_HEVC;
344 default:
345 CHECK(!"should not be here.");
346 return NULL;
347 }
348 }
349
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)350 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
351 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
352 // AMR NB audio is always mono, 8kHz
353 *channels = 1;
354 *rate = 8000;
355 return true;
356 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
357 // AMR WB audio is always mono, 16kHz
358 *channels = 1;
359 *rate = 16000;
360 return true;
361 }
362 return false;
363 }
364
MPEG4Extractor(const sp<DataSource> & source)365 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
366 : mMoofOffset(0),
367 mMoofFound(false),
368 mMdatFound(false),
369 mDataSource(source),
370 mInitCheck(NO_INIT),
371 mHasVideo(false),
372 mHeaderTimescale(0),
373 mFirstTrack(NULL),
374 mLastTrack(NULL),
375 mFileMetaData(new MetaData),
376 mFirstSINF(NULL),
377 mIsDrm(false) {
378 }
379
~MPEG4Extractor()380 MPEG4Extractor::~MPEG4Extractor() {
381 Track *track = mFirstTrack;
382 while (track) {
383 Track *next = track->next;
384
385 delete track;
386 track = next;
387 }
388 mFirstTrack = mLastTrack = NULL;
389
390 SINF *sinf = mFirstSINF;
391 while (sinf) {
392 SINF *next = sinf->next;
393 delete[] sinf->IPMPData;
394 delete sinf;
395 sinf = next;
396 }
397 mFirstSINF = NULL;
398
399 for (size_t i = 0; i < mPssh.size(); i++) {
400 delete [] mPssh[i].data;
401 }
402 }
403
flags() const404 uint32_t MPEG4Extractor::flags() const {
405 return CAN_PAUSE |
406 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
407 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
408 }
409
getMetaData()410 sp<MetaData> MPEG4Extractor::getMetaData() {
411 status_t err;
412 if ((err = readMetaData()) != OK) {
413 return new MetaData;
414 }
415
416 return mFileMetaData;
417 }
418
countTracks()419 size_t MPEG4Extractor::countTracks() {
420 status_t err;
421 if ((err = readMetaData()) != OK) {
422 ALOGV("MPEG4Extractor::countTracks: no tracks");
423 return 0;
424 }
425
426 size_t n = 0;
427 Track *track = mFirstTrack;
428 while (track) {
429 ++n;
430 track = track->next;
431 }
432
433 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
434 return n;
435 }
436
getTrackMetaData(size_t index,uint32_t flags)437 sp<MetaData> MPEG4Extractor::getTrackMetaData(
438 size_t index, uint32_t flags) {
439 status_t err;
440 if ((err = readMetaData()) != OK) {
441 return NULL;
442 }
443
444 Track *track = mFirstTrack;
445 while (index > 0) {
446 if (track == NULL) {
447 return NULL;
448 }
449
450 track = track->next;
451 --index;
452 }
453
454 if (track == NULL) {
455 return NULL;
456 }
457
458 if ((flags & kIncludeExtensiveMetaData)
459 && !track->includes_expensive_metadata) {
460 track->includes_expensive_metadata = true;
461
462 const char *mime;
463 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
464 if (!strncasecmp("video/", mime, 6)) {
465 if (mMoofOffset > 0) {
466 int64_t duration;
467 if (track->meta->findInt64(kKeyDuration, &duration)) {
468 // nothing fancy, just pick a frame near 1/4th of the duration
469 track->meta->setInt64(
470 kKeyThumbnailTime, duration / 4);
471 }
472 } else {
473 uint32_t sampleIndex;
474 uint32_t sampleTime;
475 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
476 && track->sampleTable->getMetaDataForSample(
477 sampleIndex, NULL /* offset */, NULL /* size */,
478 &sampleTime) == OK) {
479 track->meta->setInt64(
480 kKeyThumbnailTime,
481 ((int64_t)sampleTime * 1000000) / track->timescale);
482 }
483 }
484
485 // MPEG2 tracks do not provide CSD, so read the stream header
486 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
487 off64_t offset;
488 size_t size;
489 if (track->sampleTable->getMetaDataForSample(
490 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
491 if (size > kMaxTrackHeaderSize) {
492 size = kMaxTrackHeaderSize;
493 }
494 uint8_t header[kMaxTrackHeaderSize];
495 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
496 track->meta->setData(kKeyStreamHeader, 'mdat', header, size);
497 }
498 }
499 }
500 }
501 }
502
503 return track->meta;
504 }
505
MakeFourCCString(uint32_t x,char * s)506 static void MakeFourCCString(uint32_t x, char *s) {
507 s[0] = x >> 24;
508 s[1] = (x >> 16) & 0xff;
509 s[2] = (x >> 8) & 0xff;
510 s[3] = x & 0xff;
511 s[4] = '\0';
512 }
513
readMetaData()514 status_t MPEG4Extractor::readMetaData() {
515 if (mInitCheck != NO_INIT) {
516 return mInitCheck;
517 }
518
519 off64_t offset = 0;
520 status_t err;
521 bool sawMoovOrSidx = false;
522
523 while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) {
524 off64_t orig_offset = offset;
525 err = parseChunk(&offset, 0);
526
527 if (err != OK && err != UNKNOWN_ERROR) {
528 break;
529 } else if (offset <= orig_offset) {
530 // only continue parsing if the offset was advanced,
531 // otherwise we might end up in an infinite loop
532 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
533 err = ERROR_MALFORMED;
534 break;
535 } else if (err == UNKNOWN_ERROR) {
536 sawMoovOrSidx = true;
537 }
538 }
539
540 if (mInitCheck == OK) {
541 if (mHasVideo) {
542 mFileMetaData->setCString(
543 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
544 } else {
545 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
546 }
547 } else {
548 mInitCheck = err;
549 }
550
551 CHECK_NE(err, (status_t)NO_INIT);
552
553 // copy pssh data into file metadata
554 uint64_t psshsize = 0;
555 for (size_t i = 0; i < mPssh.size(); i++) {
556 psshsize += 20 + mPssh[i].datalen;
557 }
558 if (psshsize > 0 && psshsize <= UINT32_MAX) {
559 char *buf = (char*)malloc(psshsize);
560 if (!buf) {
561 ALOGE("b/28471206");
562 return NO_MEMORY;
563 }
564 char *ptr = buf;
565 for (size_t i = 0; i < mPssh.size(); i++) {
566 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
567 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
568 ptr += (20 + mPssh[i].datalen);
569 }
570 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
571 free(buf);
572 }
573 return mInitCheck;
574 }
575
getDrmTrackInfo(size_t trackID,int * len)576 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
577 if (mFirstSINF == NULL) {
578 return NULL;
579 }
580
581 SINF *sinf = mFirstSINF;
582 while (sinf && (trackID != sinf->trackID)) {
583 sinf = sinf->next;
584 }
585
586 if (sinf == NULL) {
587 return NULL;
588 }
589
590 *len = sinf->len;
591 return sinf->IPMPData;
592 }
593
594 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
readSize(off64_t offset,const sp<DataSource> DataSource,uint8_t * numOfBytes)595 static int32_t readSize(off64_t offset,
596 const sp<DataSource> DataSource, uint8_t *numOfBytes) {
597 uint32_t size = 0;
598 uint8_t data;
599 bool moreData = true;
600 *numOfBytes = 0;
601
602 while (moreData) {
603 if (DataSource->readAt(offset, &data, 1) < 1) {
604 return -1;
605 }
606 offset ++;
607 moreData = (data >= 128) ? true : false;
608 size = (size << 7) | (data & 0x7f); // Take last 7 bits
609 (*numOfBytes) ++;
610 }
611
612 return size;
613 }
614
parseDrmSINF(off64_t *,off64_t data_offset)615 status_t MPEG4Extractor::parseDrmSINF(
616 off64_t * /* offset */, off64_t data_offset) {
617 uint8_t updateIdTag;
618 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
619 return ERROR_IO;
620 }
621 data_offset ++;
622
623 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
624 return ERROR_MALFORMED;
625 }
626
627 uint8_t numOfBytes;
628 int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
629 if (size < 0) {
630 return ERROR_IO;
631 }
632 data_offset += numOfBytes;
633
634 while(size >= 11 ) {
635 uint8_t descriptorTag;
636 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
637 return ERROR_IO;
638 }
639 data_offset ++;
640
641 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
642 return ERROR_MALFORMED;
643 }
644
645 uint8_t buffer[8];
646 //ObjectDescriptorID and ObjectDescriptor url flag
647 if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
648 return ERROR_IO;
649 }
650 data_offset += 2;
651
652 if ((buffer[1] >> 5) & 0x0001) { //url flag is set
653 return ERROR_MALFORMED;
654 }
655
656 if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
657 return ERROR_IO;
658 }
659 data_offset += 8;
660
661 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
662 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
663 return ERROR_MALFORMED;
664 }
665
666 SINF *sinf = new SINF;
667 sinf->trackID = U16_AT(&buffer[3]);
668 sinf->IPMPDescriptorID = buffer[7];
669 sinf->next = mFirstSINF;
670 mFirstSINF = sinf;
671
672 size -= (8 + 2 + 1);
673 }
674
675 if (size != 0) {
676 return ERROR_MALFORMED;
677 }
678
679 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
680 return ERROR_IO;
681 }
682 data_offset ++;
683
684 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
685 return ERROR_MALFORMED;
686 }
687
688 size = readSize(data_offset, mDataSource, &numOfBytes);
689 if (size < 0) {
690 return ERROR_IO;
691 }
692 data_offset += numOfBytes;
693
694 while (size > 0) {
695 uint8_t tag;
696 int32_t dataLen;
697 if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
698 return ERROR_IO;
699 }
700 data_offset ++;
701
702 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
703 uint8_t id;
704 dataLen = readSize(data_offset, mDataSource, &numOfBytes);
705 if (dataLen < 0) {
706 return ERROR_IO;
707 } else if (dataLen < 4) {
708 return ERROR_MALFORMED;
709 }
710 data_offset += numOfBytes;
711
712 if (mDataSource->readAt(data_offset, &id, 1) < 1) {
713 return ERROR_IO;
714 }
715 data_offset ++;
716
717 SINF *sinf = mFirstSINF;
718 while (sinf && (sinf->IPMPDescriptorID != id)) {
719 sinf = sinf->next;
720 }
721 if (sinf == NULL) {
722 return ERROR_MALFORMED;
723 }
724 sinf->len = dataLen - 3;
725 sinf->IPMPData = new (std::nothrow) char[sinf->len];
726 if (sinf->IPMPData == NULL) {
727 return ERROR_MALFORMED;
728 }
729 data_offset += 2;
730
731 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
732 return ERROR_IO;
733 }
734 data_offset += sinf->len;
735
736 size -= (dataLen + numOfBytes + 1);
737 }
738 }
739
740 if (size != 0) {
741 return ERROR_MALFORMED;
742 }
743
744 return UNKNOWN_ERROR; // Return a dummy error.
745 }
746
747 struct PathAdder {
PathAdderandroid::PathAdder748 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
749 : mPath(path) {
750 mPath->push(chunkType);
751 }
752
~PathAdderandroid::PathAdder753 ~PathAdder() {
754 mPath->pop();
755 }
756
757 private:
758 Vector<uint32_t> *mPath;
759
760 PathAdder(const PathAdder &);
761 PathAdder &operator=(const PathAdder &);
762 };
763
underMetaDataPath(const Vector<uint32_t> & path)764 static bool underMetaDataPath(const Vector<uint32_t> &path) {
765 return path.size() >= 5
766 && path[0] == FOURCC('m', 'o', 'o', 'v')
767 && path[1] == FOURCC('u', 'd', 't', 'a')
768 && path[2] == FOURCC('m', 'e', 't', 'a')
769 && path[3] == FOURCC('i', 'l', 's', 't');
770 }
771
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)772 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
773 return path.size() >= 2
774 && path[0] == FOURCC('m', 'o', 'o', 'v')
775 && path[1] == FOURCC('m', 'e', 't', 'a')
776 && (depth == 2
777 || (depth == 3
778 && (path[2] == FOURCC('h', 'd', 'l', 'r')
779 || path[2] == FOURCC('i', 'l', 's', 't')
780 || path[2] == FOURCC('k', 'e', 'y', 's'))));
781 }
782
783 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)784 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
785 // delta between mpeg4 time and unix epoch time
786 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
787 if (time_1904 < INT64_MIN + delta) {
788 return false;
789 }
790 time_t time_1970 = time_1904 - delta;
791
792 char tmp[32];
793 struct tm* tm = gmtime(&time_1970);
794 if (tm != NULL &&
795 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
796 s->setTo(tmp);
797 return true;
798 }
799 return false;
800 }
801
parseChunk(off64_t * offset,int depth)802 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
803 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
804
805 if (*offset < 0) {
806 ALOGE("b/23540914");
807 return ERROR_MALFORMED;
808 }
809 uint32_t hdr[2];
810 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
811 return ERROR_IO;
812 }
813 uint64_t chunk_size = ntohl(hdr[0]);
814 int32_t chunk_type = ntohl(hdr[1]);
815 off64_t data_offset = *offset + 8;
816
817 if (chunk_size == 1) {
818 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
819 return ERROR_IO;
820 }
821 chunk_size = ntoh64(chunk_size);
822 data_offset += 8;
823
824 if (chunk_size < 16) {
825 // The smallest valid chunk is 16 bytes long in this case.
826 return ERROR_MALFORMED;
827 }
828 } else if (chunk_size == 0) {
829 if (depth == 0) {
830 // atom extends to end of file
831 off64_t sourceSize;
832 if (mDataSource->getSize(&sourceSize) == OK) {
833 chunk_size = (sourceSize - *offset);
834 } else {
835 // XXX could we just pick a "sufficiently large" value here?
836 ALOGE("atom size is 0, and data source has no size");
837 return ERROR_MALFORMED;
838 }
839 } else {
840 // not allowed for non-toplevel atoms, skip it
841 *offset += 4;
842 return OK;
843 }
844 } else if (chunk_size < 8) {
845 // The smallest valid chunk is 8 bytes long.
846 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
847 return ERROR_MALFORMED;
848 }
849
850 char chunk[5];
851 MakeFourCCString(chunk_type, chunk);
852 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
853
854 if (kUseHexDump) {
855 static const char kWhitespace[] = " ";
856 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
857 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
858
859 char buffer[256];
860 size_t n = chunk_size;
861 if (n > sizeof(buffer)) {
862 n = sizeof(buffer);
863 }
864 if (mDataSource->readAt(*offset, buffer, n)
865 < (ssize_t)n) {
866 return ERROR_IO;
867 }
868
869 hexdump(buffer, n);
870 }
871
872 PathAdder autoAdder(&mPath, chunk_type);
873
874 // (data_offset - *offset) is either 8 or 16
875 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
876 if (chunk_data_size < 0) {
877 ALOGE("b/23540914");
878 return ERROR_MALFORMED;
879 }
880
881 if (chunk_type != FOURCC('c', 'p', 'r', 't')
882 && chunk_type != FOURCC('c', 'o', 'v', 'r')
883 && mPath.size() == 5 && underMetaDataPath(mPath)) {
884 off64_t stop_offset = *offset + chunk_size;
885 *offset = data_offset;
886 while (*offset < stop_offset) {
887 status_t err = parseChunk(offset, depth + 1);
888 if (err != OK) {
889 return err;
890 }
891 }
892
893 if (*offset != stop_offset) {
894 return ERROR_MALFORMED;
895 }
896
897 return OK;
898 }
899
900 switch(chunk_type) {
901 case FOURCC('m', 'o', 'o', 'v'):
902 case FOURCC('t', 'r', 'a', 'k'):
903 case FOURCC('m', 'd', 'i', 'a'):
904 case FOURCC('m', 'i', 'n', 'f'):
905 case FOURCC('d', 'i', 'n', 'f'):
906 case FOURCC('s', 't', 'b', 'l'):
907 case FOURCC('m', 'v', 'e', 'x'):
908 case FOURCC('m', 'o', 'o', 'f'):
909 case FOURCC('t', 'r', 'a', 'f'):
910 case FOURCC('m', 'f', 'r', 'a'):
911 case FOURCC('u', 'd', 't', 'a'):
912 case FOURCC('i', 'l', 's', 't'):
913 case FOURCC('s', 'i', 'n', 'f'):
914 case FOURCC('s', 'c', 'h', 'i'):
915 case FOURCC('e', 'd', 't', 's'):
916 {
917 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
918 // store the offset of the first segment
919 mMoofFound = true;
920 mMoofOffset = *offset;
921 }
922
923 if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
924 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
925
926 if (mDataSource->flags()
927 & (DataSource::kWantsPrefetching
928 | DataSource::kIsCachingDataSource)) {
929 sp<MPEG4DataSource> cachedSource =
930 new MPEG4DataSource(mDataSource);
931
932 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
933 mDataSource = cachedSource;
934 }
935 }
936
937 if (mLastTrack == NULL)
938 return ERROR_MALFORMED;
939
940 mLastTrack->sampleTable = new SampleTable(mDataSource);
941 }
942
943 bool isTrack = false;
944 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
945 isTrack = true;
946
947 Track *track = new Track;
948 track->next = NULL;
949 if (mLastTrack) {
950 mLastTrack->next = track;
951 } else {
952 mFirstTrack = track;
953 }
954 mLastTrack = track;
955
956 track->meta = new MetaData;
957 track->includes_expensive_metadata = false;
958 track->skipTrack = false;
959 track->timescale = 0;
960 track->meta->setCString(kKeyMIMEType, "application/octet-stream");
961 }
962
963 off64_t stop_offset = *offset + chunk_size;
964 *offset = data_offset;
965 while (*offset < stop_offset) {
966 status_t err = parseChunk(offset, depth + 1);
967 if (err != OK) {
968 return err;
969 }
970 }
971
972 if (*offset != stop_offset) {
973 return ERROR_MALFORMED;
974 }
975
976 if (isTrack) {
977 int32_t trackId;
978 // There must be exact one track header per track.
979 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
980 mLastTrack->skipTrack = true;
981 }
982 if (mLastTrack->skipTrack) {
983 Track *cur = mFirstTrack;
984
985 if (cur == mLastTrack) {
986 delete cur;
987 mFirstTrack = mLastTrack = NULL;
988 } else {
989 while (cur && cur->next != mLastTrack) {
990 cur = cur->next;
991 }
992 cur->next = NULL;
993 delete mLastTrack;
994 mLastTrack = cur;
995 }
996
997 return OK;
998 }
999
1000 status_t err = verifyTrack(mLastTrack);
1001
1002 if (err != OK) {
1003 return err;
1004 }
1005 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
1006 mInitCheck = OK;
1007
1008 if (!mIsDrm) {
1009 return UNKNOWN_ERROR; // Return a dummy error.
1010 } else {
1011 return OK;
1012 }
1013 }
1014 break;
1015 }
1016
1017 case FOURCC('e', 'l', 's', 't'):
1018 {
1019 *offset += chunk_size;
1020
1021 // See 14496-12 8.6.6
1022 uint8_t version;
1023 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1024 return ERROR_IO;
1025 }
1026
1027 uint32_t entry_count;
1028 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1029 return ERROR_IO;
1030 }
1031
1032 if (entry_count != 1) {
1033 // we only support a single entry at the moment, for gapless playback
1034 ALOGW("ignoring edit list with %d entries", entry_count);
1035 } else if (mHeaderTimescale == 0) {
1036 ALOGW("ignoring edit list because timescale is 0");
1037 } else {
1038 off64_t entriesoffset = data_offset + 8;
1039 uint64_t segment_duration;
1040 int64_t media_time;
1041
1042 if (version == 1) {
1043 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1044 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1045 return ERROR_IO;
1046 }
1047 } else if (version == 0) {
1048 uint32_t sd;
1049 int32_t mt;
1050 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1051 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1052 return ERROR_IO;
1053 }
1054 segment_duration = sd;
1055 media_time = mt;
1056 } else {
1057 return ERROR_IO;
1058 }
1059
1060 uint64_t halfscale = mHeaderTimescale / 2;
1061 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1062 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1063
1064 int64_t duration;
1065 int32_t samplerate;
1066 if (!mLastTrack) {
1067 return ERROR_MALFORMED;
1068 }
1069 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1070 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1071
1072 int64_t delay = (media_time * samplerate + 500000) / 1000000;
1073 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1074
1075 int64_t paddingus = duration - (int64_t)(segment_duration + media_time);
1076 if (paddingus < 0) {
1077 // track duration from media header (which is what kKeyDuration is) might
1078 // be slightly shorter than the segment duration, which would make the
1079 // padding negative. Clamp to zero.
1080 paddingus = 0;
1081 }
1082 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1083 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1084 }
1085 }
1086 break;
1087 }
1088
1089 case FOURCC('f', 'r', 'm', 'a'):
1090 {
1091 *offset += chunk_size;
1092
1093 uint32_t original_fourcc;
1094 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1095 return ERROR_IO;
1096 }
1097 original_fourcc = ntohl(original_fourcc);
1098 ALOGV("read original format: %d", original_fourcc);
1099
1100 if (mLastTrack == NULL)
1101 return ERROR_MALFORMED;
1102
1103 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1104 uint32_t num_channels = 0;
1105 uint32_t sample_rate = 0;
1106 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1107 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1108 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1109 }
1110 break;
1111 }
1112
1113 case FOURCC('t', 'e', 'n', 'c'):
1114 {
1115 *offset += chunk_size;
1116
1117 if (chunk_size < 32) {
1118 return ERROR_MALFORMED;
1119 }
1120
1121 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1122 // default IV size, 16 bytes default KeyID
1123 // (ISO 23001-7)
1124 char buf[4];
1125 memset(buf, 0, 4);
1126 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1127 return ERROR_IO;
1128 }
1129 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1130 if (defaultAlgorithmId > 1) {
1131 // only 0 (clear) and 1 (AES-128) are valid
1132 return ERROR_MALFORMED;
1133 }
1134
1135 memset(buf, 0, 4);
1136 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1137 return ERROR_IO;
1138 }
1139 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1140
1141 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1142 (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1143 // only unencrypted data must have 0 IV size
1144 return ERROR_MALFORMED;
1145 } else if (defaultIVSize != 0 &&
1146 defaultIVSize != 8 &&
1147 defaultIVSize != 16) {
1148 // only supported sizes are 0, 8 and 16
1149 return ERROR_MALFORMED;
1150 }
1151
1152 uint8_t defaultKeyId[16];
1153
1154 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1155 return ERROR_IO;
1156 }
1157
1158 if (mLastTrack == NULL)
1159 return ERROR_MALFORMED;
1160
1161 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1162 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1163 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1164 break;
1165 }
1166
1167 case FOURCC('t', 'k', 'h', 'd'):
1168 {
1169 *offset += chunk_size;
1170
1171 status_t err;
1172 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1173 return err;
1174 }
1175
1176 break;
1177 }
1178
1179 case FOURCC('p', 's', 's', 'h'):
1180 {
1181 *offset += chunk_size;
1182
1183 PsshInfo pssh;
1184
1185 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1186 return ERROR_IO;
1187 }
1188
1189 uint32_t psshdatalen = 0;
1190 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1191 return ERROR_IO;
1192 }
1193 pssh.datalen = ntohl(psshdatalen);
1194 ALOGV("pssh data size: %d", pssh.datalen);
1195 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1196 // pssh data length exceeds size of containing box
1197 return ERROR_MALFORMED;
1198 }
1199
1200 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1201 if (pssh.data == NULL) {
1202 return ERROR_MALFORMED;
1203 }
1204 ALOGV("allocated pssh @ %p", pssh.data);
1205 ssize_t requested = (ssize_t) pssh.datalen;
1206 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1207 return ERROR_IO;
1208 }
1209 mPssh.push_back(pssh);
1210
1211 break;
1212 }
1213
1214 case FOURCC('m', 'd', 'h', 'd'):
1215 {
1216 *offset += chunk_size;
1217
1218 if (chunk_data_size < 4 || mLastTrack == NULL) {
1219 return ERROR_MALFORMED;
1220 }
1221
1222 uint8_t version;
1223 if (mDataSource->readAt(
1224 data_offset, &version, sizeof(version))
1225 < (ssize_t)sizeof(version)) {
1226 return ERROR_IO;
1227 }
1228
1229 off64_t timescale_offset;
1230
1231 if (version == 1) {
1232 timescale_offset = data_offset + 4 + 16;
1233 } else if (version == 0) {
1234 timescale_offset = data_offset + 4 + 8;
1235 } else {
1236 return ERROR_IO;
1237 }
1238
1239 uint32_t timescale;
1240 if (mDataSource->readAt(
1241 timescale_offset, ×cale, sizeof(timescale))
1242 < (ssize_t)sizeof(timescale)) {
1243 return ERROR_IO;
1244 }
1245
1246 if (!timescale) {
1247 ALOGE("timescale should not be ZERO.");
1248 return ERROR_MALFORMED;
1249 }
1250
1251 mLastTrack->timescale = ntohl(timescale);
1252
1253 // 14496-12 says all ones means indeterminate, but some files seem to use
1254 // 0 instead. We treat both the same.
1255 int64_t duration = 0;
1256 if (version == 1) {
1257 if (mDataSource->readAt(
1258 timescale_offset + 4, &duration, sizeof(duration))
1259 < (ssize_t)sizeof(duration)) {
1260 return ERROR_IO;
1261 }
1262 if (duration != -1) {
1263 duration = ntoh64(duration);
1264 }
1265 } else {
1266 uint32_t duration32;
1267 if (mDataSource->readAt(
1268 timescale_offset + 4, &duration32, sizeof(duration32))
1269 < (ssize_t)sizeof(duration32)) {
1270 return ERROR_IO;
1271 }
1272 if (duration32 != 0xffffffff) {
1273 duration = ntohl(duration32);
1274 }
1275 }
1276 if (duration != 0 && mLastTrack->timescale != 0) {
1277 mLastTrack->meta->setInt64(
1278 kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1279 }
1280
1281 uint8_t lang[2];
1282 off64_t lang_offset;
1283 if (version == 1) {
1284 lang_offset = timescale_offset + 4 + 8;
1285 } else if (version == 0) {
1286 lang_offset = timescale_offset + 4 + 4;
1287 } else {
1288 return ERROR_IO;
1289 }
1290
1291 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1292 < (ssize_t)sizeof(lang)) {
1293 return ERROR_IO;
1294 }
1295
1296 // To get the ISO-639-2/T three character language code
1297 // 1 bit pad followed by 3 5-bits characters. Each character
1298 // is packed as the difference between its ASCII value and 0x60.
1299 char lang_code[4];
1300 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1301 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1302 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1303 lang_code[3] = '\0';
1304
1305 mLastTrack->meta->setCString(
1306 kKeyMediaLanguage, lang_code);
1307
1308 break;
1309 }
1310
1311 case FOURCC('s', 't', 's', 'd'):
1312 {
1313 if (chunk_data_size < 8) {
1314 return ERROR_MALFORMED;
1315 }
1316
1317 uint8_t buffer[8];
1318 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1319 return ERROR_MALFORMED;
1320 }
1321
1322 if (mDataSource->readAt(
1323 data_offset, buffer, 8) < 8) {
1324 return ERROR_IO;
1325 }
1326
1327 if (U32_AT(buffer) != 0) {
1328 // Should be version 0, flags 0.
1329 return ERROR_MALFORMED;
1330 }
1331
1332 uint32_t entry_count = U32_AT(&buffer[4]);
1333
1334 if (entry_count > 1) {
1335 // For 3GPP timed text, there could be multiple tx3g boxes contain
1336 // multiple text display formats. These formats will be used to
1337 // display the timed text.
1338 // For encrypted files, there may also be more than one entry.
1339 const char *mime;
1340
1341 if (mLastTrack == NULL)
1342 return ERROR_MALFORMED;
1343
1344 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1345 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1346 strcasecmp(mime, "application/octet-stream")) {
1347 // For now we only support a single type of media per track.
1348 mLastTrack->skipTrack = true;
1349 *offset += chunk_size;
1350 break;
1351 }
1352 }
1353 off64_t stop_offset = *offset + chunk_size;
1354 *offset = data_offset + 8;
1355 for (uint32_t i = 0; i < entry_count; ++i) {
1356 status_t err = parseChunk(offset, depth + 1);
1357 if (err != OK) {
1358 return err;
1359 }
1360 }
1361
1362 if (*offset != stop_offset) {
1363 return ERROR_MALFORMED;
1364 }
1365 break;
1366 }
1367
1368 case FOURCC('m', 'p', '4', 'a'):
1369 case FOURCC('e', 'n', 'c', 'a'):
1370 case FOURCC('s', 'a', 'm', 'r'):
1371 case FOURCC('s', 'a', 'w', 'b'):
1372 {
1373 uint8_t buffer[8 + 20];
1374 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1375 // Basic AudioSampleEntry size.
1376 return ERROR_MALFORMED;
1377 }
1378
1379 if (mDataSource->readAt(
1380 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1381 return ERROR_IO;
1382 }
1383
1384 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1385 uint32_t num_channels = U16_AT(&buffer[16]);
1386
1387 uint16_t sample_size = U16_AT(&buffer[18]);
1388 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1389
1390 if (mLastTrack == NULL)
1391 return ERROR_MALFORMED;
1392
1393 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1394 // if the chunk type is enca, we'll get the type from the sinf/frma box later
1395 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1396 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1397 }
1398 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1399 chunk, num_channels, sample_size, sample_rate);
1400 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1401 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1402
1403 off64_t stop_offset = *offset + chunk_size;
1404 *offset = data_offset + sizeof(buffer);
1405 while (*offset < stop_offset) {
1406 status_t err = parseChunk(offset, depth + 1);
1407 if (err != OK) {
1408 return err;
1409 }
1410 }
1411
1412 if (*offset != stop_offset) {
1413 return ERROR_MALFORMED;
1414 }
1415 break;
1416 }
1417
1418 case FOURCC('m', 'p', '4', 'v'):
1419 case FOURCC('e', 'n', 'c', 'v'):
1420 case FOURCC('s', '2', '6', '3'):
1421 case FOURCC('H', '2', '6', '3'):
1422 case FOURCC('h', '2', '6', '3'):
1423 case FOURCC('a', 'v', 'c', '1'):
1424 case FOURCC('h', 'v', 'c', '1'):
1425 case FOURCC('h', 'e', 'v', '1'):
1426 {
1427 mHasVideo = true;
1428
1429 uint8_t buffer[78];
1430 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1431 // Basic VideoSampleEntry size.
1432 return ERROR_MALFORMED;
1433 }
1434
1435 if (mDataSource->readAt(
1436 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1437 return ERROR_IO;
1438 }
1439
1440 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1441 uint16_t width = U16_AT(&buffer[6 + 18]);
1442 uint16_t height = U16_AT(&buffer[6 + 20]);
1443
1444 // The video sample is not standard-compliant if it has invalid dimension.
1445 // Use some default width and height value, and
1446 // let the decoder figure out the actual width and height (and thus
1447 // be prepared for INFO_FOMRAT_CHANGED event).
1448 if (width == 0) width = 352;
1449 if (height == 0) height = 288;
1450
1451 // printf("*** coding='%s' width=%d height=%d\n",
1452 // chunk, width, height);
1453
1454 if (mLastTrack == NULL)
1455 return ERROR_MALFORMED;
1456
1457 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1458 // if the chunk type is encv, we'll get the type from the sinf/frma box later
1459 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1460 }
1461 mLastTrack->meta->setInt32(kKeyWidth, width);
1462 mLastTrack->meta->setInt32(kKeyHeight, height);
1463
1464 off64_t stop_offset = *offset + chunk_size;
1465 *offset = data_offset + sizeof(buffer);
1466 while (*offset < stop_offset) {
1467 status_t err = parseChunk(offset, depth + 1);
1468 if (err != OK) {
1469 return err;
1470 }
1471 }
1472
1473 if (*offset != stop_offset) {
1474 return ERROR_MALFORMED;
1475 }
1476 break;
1477 }
1478
1479 case FOURCC('s', 't', 'c', 'o'):
1480 case FOURCC('c', 'o', '6', '4'):
1481 {
1482 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1483 return ERROR_MALFORMED;
1484
1485 status_t err =
1486 mLastTrack->sampleTable->setChunkOffsetParams(
1487 chunk_type, data_offset, chunk_data_size);
1488
1489 *offset += chunk_size;
1490
1491 if (err != OK) {
1492 return err;
1493 }
1494
1495 break;
1496 }
1497
1498 case FOURCC('s', 't', 's', 'c'):
1499 {
1500 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1501 return ERROR_MALFORMED;
1502
1503 status_t err =
1504 mLastTrack->sampleTable->setSampleToChunkParams(
1505 data_offset, chunk_data_size);
1506
1507 *offset += chunk_size;
1508
1509 if (err != OK) {
1510 return err;
1511 }
1512
1513 break;
1514 }
1515
1516 case FOURCC('s', 't', 's', 'z'):
1517 case FOURCC('s', 't', 'z', '2'):
1518 {
1519 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1520 return ERROR_MALFORMED;
1521
1522 status_t err =
1523 mLastTrack->sampleTable->setSampleSizeParams(
1524 chunk_type, data_offset, chunk_data_size);
1525
1526 *offset += chunk_size;
1527
1528 if (err != OK) {
1529 return err;
1530 }
1531
1532 size_t max_size;
1533 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1534
1535 if (err != OK) {
1536 return err;
1537 }
1538
1539 if (max_size != 0) {
1540 // Assume that a given buffer only contains at most 10 chunks,
1541 // each chunk originally prefixed with a 2 byte length will
1542 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1543 // and thus will grow by 2 bytes per chunk.
1544 if (max_size > SIZE_MAX - 10 * 2) {
1545 ALOGE("max sample size too big: %zu", max_size);
1546 return ERROR_MALFORMED;
1547 }
1548 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1549 } else {
1550 // No size was specified. Pick a conservatively large size.
1551 uint32_t width, height;
1552 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1553 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1554 ALOGE("No width or height, assuming worst case 1080p");
1555 width = 1920;
1556 height = 1080;
1557 } else {
1558 // A resolution was specified, check that it's not too big. The values below
1559 // were chosen so that the calculations below don't cause overflows, they're
1560 // not indicating that resolutions up to 32kx32k are actually supported.
1561 if (width > 32768 || height > 32768) {
1562 ALOGE("can't support %u x %u video", width, height);
1563 return ERROR_MALFORMED;
1564 }
1565 }
1566
1567 const char *mime;
1568 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1569 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1570 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1571 // AVC & HEVC requires compression ratio of at least 2, and uses
1572 // macroblocks
1573 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1574 } else {
1575 // For all other formats there is no minimum compression
1576 // ratio. Use compression ratio of 1.
1577 max_size = width * height * 3 / 2;
1578 }
1579 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1580 }
1581
1582 // NOTE: setting another piece of metadata invalidates any pointers (such as the
1583 // mimetype) previously obtained, so don't cache them.
1584 const char *mime;
1585 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1586 // Calculate average frame rate.
1587 if (!strncasecmp("video/", mime, 6)) {
1588 size_t nSamples = mLastTrack->sampleTable->countSamples();
1589 if (nSamples == 0) {
1590 int32_t trackId;
1591 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
1592 for (size_t i = 0; i < mTrex.size(); i++) {
1593 Trex *t = &mTrex.editItemAt(i);
1594 if (t->track_ID == (uint32_t) trackId) {
1595 if (t->default_sample_duration > 0) {
1596 int32_t frameRate =
1597 mLastTrack->timescale / t->default_sample_duration;
1598 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1599 }
1600 break;
1601 }
1602 }
1603 }
1604 } else {
1605 int64_t durationUs;
1606 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1607 if (durationUs > 0) {
1608 int32_t frameRate = (nSamples * 1000000LL +
1609 (durationUs >> 1)) / durationUs;
1610 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1611 }
1612 }
1613 }
1614 }
1615
1616 break;
1617 }
1618
1619 case FOURCC('s', 't', 't', 's'):
1620 {
1621 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1622 return ERROR_MALFORMED;
1623
1624 *offset += chunk_size;
1625
1626 status_t err =
1627 mLastTrack->sampleTable->setTimeToSampleParams(
1628 data_offset, chunk_data_size);
1629
1630 if (err != OK) {
1631 return err;
1632 }
1633
1634 break;
1635 }
1636
1637 case FOURCC('c', 't', 't', 's'):
1638 {
1639 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1640 return ERROR_MALFORMED;
1641
1642 *offset += chunk_size;
1643
1644 status_t err =
1645 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1646 data_offset, chunk_data_size);
1647
1648 if (err != OK) {
1649 return err;
1650 }
1651
1652 break;
1653 }
1654
1655 case FOURCC('s', 't', 's', 's'):
1656 {
1657 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1658 return ERROR_MALFORMED;
1659
1660 *offset += chunk_size;
1661
1662 status_t err =
1663 mLastTrack->sampleTable->setSyncSampleParams(
1664 data_offset, chunk_data_size);
1665
1666 if (err != OK) {
1667 return err;
1668 }
1669
1670 break;
1671 }
1672
1673 // \xA9xyz
1674 case FOURCC(0xA9, 'x', 'y', 'z'):
1675 {
1676 *offset += chunk_size;
1677
1678 // Best case the total data length inside "\xA9xyz" box
1679 // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/",
1680 // where "\x00\x04" is the text string length with value = 4,
1681 // "\0x15\xc7" is the language code = en, and "0+0" is a
1682 // location (string) value with longitude = 0 and latitude = 0.
1683 if (chunk_data_size < 8) {
1684 return ERROR_MALFORMED;
1685 }
1686
1687 // Worst case the location string length would be 18,
1688 // for instance +90.0000-180.0000, without the trailing "/" and
1689 // the string length + language code.
1690 char buffer[18];
1691
1692 // Substracting 5 from the data size is because the text string length +
1693 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1694 off64_t location_length = chunk_data_size - 5;
1695 if (location_length >= (off64_t) sizeof(buffer)) {
1696 return ERROR_MALFORMED;
1697 }
1698
1699 if (mDataSource->readAt(
1700 data_offset + 4, buffer, location_length) < location_length) {
1701 return ERROR_IO;
1702 }
1703
1704 buffer[location_length] = '\0';
1705 mFileMetaData->setCString(kKeyLocation, buffer);
1706 break;
1707 }
1708
1709 case FOURCC('e', 's', 'd', 's'):
1710 {
1711 *offset += chunk_size;
1712
1713 if (chunk_data_size < 4) {
1714 return ERROR_MALFORMED;
1715 }
1716
1717 uint8_t buffer[256];
1718 if (chunk_data_size > (off64_t)sizeof(buffer)) {
1719 return ERROR_BUFFER_TOO_SMALL;
1720 }
1721
1722 if (mDataSource->readAt(
1723 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1724 return ERROR_IO;
1725 }
1726
1727 if (U32_AT(buffer) != 0) {
1728 // Should be version 0, flags 0.
1729 return ERROR_MALFORMED;
1730 }
1731
1732 if (mLastTrack == NULL)
1733 return ERROR_MALFORMED;
1734
1735 mLastTrack->meta->setData(
1736 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1737
1738 if (mPath.size() >= 2
1739 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1740 // Information from the ESDS must be relied on for proper
1741 // setup of sample rate and channel count for MPEG4 Audio.
1742 // The generic header appears to only contain generic
1743 // information...
1744
1745 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1746 &buffer[4], chunk_data_size - 4);
1747
1748 if (err != OK) {
1749 return err;
1750 }
1751 }
1752 if (mPath.size() >= 2
1753 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1754 // Check if the video is MPEG2
1755 ESDS esds(&buffer[4], chunk_data_size - 4);
1756
1757 uint8_t objectTypeIndication;
1758 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1759 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1760 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1761 }
1762 }
1763 }
1764 break;
1765 }
1766
1767 case FOURCC('b', 't', 'r', 't'):
1768 {
1769 *offset += chunk_size;
1770
1771 uint8_t buffer[12];
1772 if (chunk_data_size != sizeof(buffer)) {
1773 return ERROR_MALFORMED;
1774 }
1775
1776 if (mDataSource->readAt(
1777 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1778 return ERROR_IO;
1779 }
1780
1781 uint32_t maxBitrate = U32_AT(&buffer[4]);
1782 uint32_t avgBitrate = U32_AT(&buffer[8]);
1783 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1784 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1785 }
1786 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1787 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate);
1788 }
1789 break;
1790 }
1791
1792 case FOURCC('a', 'v', 'c', 'C'):
1793 {
1794 *offset += chunk_size;
1795
1796 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1797
1798 if (buffer->data() == NULL) {
1799 ALOGE("b/28471206");
1800 return NO_MEMORY;
1801 }
1802
1803 if (mDataSource->readAt(
1804 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1805 return ERROR_IO;
1806 }
1807
1808 if (mLastTrack == NULL)
1809 return ERROR_MALFORMED;
1810
1811 mLastTrack->meta->setData(
1812 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1813
1814 break;
1815 }
1816 case FOURCC('h', 'v', 'c', 'C'):
1817 {
1818 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1819
1820 if (buffer->data() == NULL) {
1821 ALOGE("b/28471206");
1822 return NO_MEMORY;
1823 }
1824
1825 if (mDataSource->readAt(
1826 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1827 return ERROR_IO;
1828 }
1829
1830 if (mLastTrack == NULL)
1831 return ERROR_MALFORMED;
1832
1833 mLastTrack->meta->setData(
1834 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1835
1836 *offset += chunk_size;
1837 break;
1838 }
1839
1840 case FOURCC('d', '2', '6', '3'):
1841 {
1842 *offset += chunk_size;
1843 /*
1844 * d263 contains a fixed 7 bytes part:
1845 * vendor - 4 bytes
1846 * version - 1 byte
1847 * level - 1 byte
1848 * profile - 1 byte
1849 * optionally, "d263" box itself may contain a 16-byte
1850 * bit rate box (bitr)
1851 * average bit rate - 4 bytes
1852 * max bit rate - 4 bytes
1853 */
1854 char buffer[23];
1855 if (chunk_data_size != 7 &&
1856 chunk_data_size != 23) {
1857 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1858 return ERROR_MALFORMED;
1859 }
1860
1861 if (mDataSource->readAt(
1862 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1863 return ERROR_IO;
1864 }
1865
1866 if (mLastTrack == NULL)
1867 return ERROR_MALFORMED;
1868
1869 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1870
1871 break;
1872 }
1873
1874 case FOURCC('m', 'e', 't', 'a'):
1875 {
1876 off64_t stop_offset = *offset + chunk_size;
1877 *offset = data_offset;
1878 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1879 if (!isParsingMetaKeys) {
1880 uint8_t buffer[4];
1881 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1882 *offset = stop_offset;
1883 return ERROR_MALFORMED;
1884 }
1885
1886 if (mDataSource->readAt(
1887 data_offset, buffer, 4) < 4) {
1888 *offset = stop_offset;
1889 return ERROR_IO;
1890 }
1891
1892 if (U32_AT(buffer) != 0) {
1893 // Should be version 0, flags 0.
1894
1895 // If it's not, let's assume this is one of those
1896 // apparently malformed chunks that don't have flags
1897 // and completely different semantics than what's
1898 // in the MPEG4 specs and skip it.
1899 *offset = stop_offset;
1900 return OK;
1901 }
1902 *offset += sizeof(buffer);
1903 }
1904
1905 while (*offset < stop_offset) {
1906 status_t err = parseChunk(offset, depth + 1);
1907 if (err != OK) {
1908 return err;
1909 }
1910 }
1911
1912 if (*offset != stop_offset) {
1913 return ERROR_MALFORMED;
1914 }
1915 break;
1916 }
1917
1918 case FOURCC('m', 'e', 'a', 'n'):
1919 case FOURCC('n', 'a', 'm', 'e'):
1920 case FOURCC('d', 'a', 't', 'a'):
1921 {
1922 *offset += chunk_size;
1923
1924 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1925 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1926
1927 if (err != OK) {
1928 return err;
1929 }
1930 }
1931
1932 break;
1933 }
1934
1935 case FOURCC('m', 'v', 'h', 'd'):
1936 {
1937 *offset += chunk_size;
1938
1939 if (chunk_data_size < 32) {
1940 return ERROR_MALFORMED;
1941 }
1942
1943 uint8_t header[32];
1944 if (mDataSource->readAt(
1945 data_offset, header, sizeof(header))
1946 < (ssize_t)sizeof(header)) {
1947 return ERROR_IO;
1948 }
1949
1950 uint64_t creationTime;
1951 uint64_t duration = 0;
1952 if (header[0] == 1) {
1953 creationTime = U64_AT(&header[4]);
1954 mHeaderTimescale = U32_AT(&header[20]);
1955 duration = U64_AT(&header[24]);
1956 if (duration == 0xffffffffffffffff) {
1957 duration = 0;
1958 }
1959 } else if (header[0] != 0) {
1960 return ERROR_MALFORMED;
1961 } else {
1962 creationTime = U32_AT(&header[4]);
1963 mHeaderTimescale = U32_AT(&header[12]);
1964 uint32_t d32 = U32_AT(&header[16]);
1965 if (d32 == 0xffffffff) {
1966 d32 = 0;
1967 }
1968 duration = d32;
1969 }
1970 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
1971 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1972 }
1973
1974 String8 s;
1975 if (convertTimeToDate(creationTime, &s)) {
1976 mFileMetaData->setCString(kKeyDate, s.string());
1977 }
1978
1979
1980 break;
1981 }
1982
1983 case FOURCC('m', 'e', 'h', 'd'):
1984 {
1985 *offset += chunk_size;
1986
1987 if (chunk_data_size < 8) {
1988 return ERROR_MALFORMED;
1989 }
1990
1991 uint8_t flags[4];
1992 if (mDataSource->readAt(
1993 data_offset, flags, sizeof(flags))
1994 < (ssize_t)sizeof(flags)) {
1995 return ERROR_IO;
1996 }
1997
1998 uint64_t duration = 0;
1999 if (flags[0] == 1) {
2000 // 64 bit
2001 if (chunk_data_size < 12) {
2002 return ERROR_MALFORMED;
2003 }
2004 mDataSource->getUInt64(data_offset + 4, &duration);
2005 if (duration == 0xffffffffffffffff) {
2006 duration = 0;
2007 }
2008 } else if (flags[0] == 0) {
2009 // 32 bit
2010 uint32_t d32;
2011 mDataSource->getUInt32(data_offset + 4, &d32);
2012 if (d32 == 0xffffffff) {
2013 d32 = 0;
2014 }
2015 duration = d32;
2016 } else {
2017 return ERROR_MALFORMED;
2018 }
2019
2020 if (duration != 0 && mHeaderTimescale != 0) {
2021 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2022 }
2023
2024 break;
2025 }
2026
2027 case FOURCC('m', 'd', 'a', 't'):
2028 {
2029 ALOGV("mdat chunk, drm: %d", mIsDrm);
2030
2031 mMdatFound = true;
2032
2033 if (!mIsDrm) {
2034 *offset += chunk_size;
2035 break;
2036 }
2037
2038 if (chunk_size < 8) {
2039 return ERROR_MALFORMED;
2040 }
2041
2042 return parseDrmSINF(offset, data_offset);
2043 }
2044
2045 case FOURCC('h', 'd', 'l', 'r'):
2046 {
2047 *offset += chunk_size;
2048
2049 if (underQTMetaPath(mPath, 3)) {
2050 break;
2051 }
2052
2053 uint32_t buffer;
2054 if (mDataSource->readAt(
2055 data_offset + 8, &buffer, 4) < 4) {
2056 return ERROR_IO;
2057 }
2058
2059 uint32_t type = ntohl(buffer);
2060 // For the 3GPP file format, the handler-type within the 'hdlr' box
2061 // shall be 'text'. We also want to support 'sbtl' handler type
2062 // for a practical reason as various MPEG4 containers use it.
2063 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2064 if (mLastTrack != NULL) {
2065 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2066 }
2067 }
2068
2069 break;
2070 }
2071
2072 case FOURCC('k', 'e', 'y', 's'):
2073 {
2074 *offset += chunk_size;
2075
2076 if (underQTMetaPath(mPath, 3)) {
2077 parseQTMetaKey(data_offset, chunk_data_size);
2078 }
2079 break;
2080 }
2081
2082 case FOURCC('t', 'r', 'e', 'x'):
2083 {
2084 *offset += chunk_size;
2085
2086 if (chunk_data_size < 24) {
2087 return ERROR_IO;
2088 }
2089 Trex trex;
2090 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2091 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2092 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2093 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2094 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2095 return ERROR_IO;
2096 }
2097 mTrex.add(trex);
2098 break;
2099 }
2100
2101 case FOURCC('t', 'x', '3', 'g'):
2102 {
2103 if (mLastTrack == NULL)
2104 return ERROR_MALFORMED;
2105
2106 uint32_t type;
2107 const void *data;
2108 size_t size = 0;
2109 if (!mLastTrack->meta->findData(
2110 kKeyTextFormatData, &type, &data, &size)) {
2111 size = 0;
2112 }
2113
2114 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2115 return ERROR_MALFORMED;
2116 }
2117
2118 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2119 if (buffer == NULL) {
2120 return ERROR_MALFORMED;
2121 }
2122
2123 if (size > 0) {
2124 memcpy(buffer, data, size);
2125 }
2126
2127 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2128 < chunk_size) {
2129 delete[] buffer;
2130 buffer = NULL;
2131
2132 // advance read pointer so we don't end up reading this again
2133 *offset += chunk_size;
2134 return ERROR_IO;
2135 }
2136
2137 mLastTrack->meta->setData(
2138 kKeyTextFormatData, 0, buffer, size + chunk_size);
2139
2140 delete[] buffer;
2141
2142 *offset += chunk_size;
2143 break;
2144 }
2145
2146 case FOURCC('c', 'o', 'v', 'r'):
2147 {
2148 *offset += chunk_size;
2149
2150 if (mFileMetaData != NULL) {
2151 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2152 chunk_data_size, data_offset);
2153
2154 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2155 return ERROR_MALFORMED;
2156 }
2157 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
2158 if (buffer->data() == NULL) {
2159 ALOGE("b/28471206");
2160 return NO_MEMORY;
2161 }
2162 if (mDataSource->readAt(
2163 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
2164 return ERROR_IO;
2165 }
2166 const int kSkipBytesOfDataBox = 16;
2167 if (chunk_data_size <= kSkipBytesOfDataBox) {
2168 return ERROR_MALFORMED;
2169 }
2170
2171 mFileMetaData->setData(
2172 kKeyAlbumArt, MetaData::TYPE_NONE,
2173 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2174 }
2175
2176 break;
2177 }
2178
2179 case FOURCC('c', 'o', 'l', 'r'):
2180 {
2181 *offset += chunk_size;
2182 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2183 // ignore otherwise
2184 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2185 status_t err = parseColorInfo(data_offset, chunk_data_size);
2186 if (err != OK) {
2187 return err;
2188 }
2189 }
2190
2191 break;
2192 }
2193
2194 case FOURCC('t', 'i', 't', 'l'):
2195 case FOURCC('p', 'e', 'r', 'f'):
2196 case FOURCC('a', 'u', 't', 'h'):
2197 case FOURCC('g', 'n', 'r', 'e'):
2198 case FOURCC('a', 'l', 'b', 'm'):
2199 case FOURCC('y', 'r', 'r', 'c'):
2200 {
2201 *offset += chunk_size;
2202
2203 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2204
2205 if (err != OK) {
2206 return err;
2207 }
2208
2209 break;
2210 }
2211
2212 case FOURCC('I', 'D', '3', '2'):
2213 {
2214 *offset += chunk_size;
2215
2216 if (chunk_data_size < 6) {
2217 return ERROR_MALFORMED;
2218 }
2219
2220 parseID3v2MetaData(data_offset + 6);
2221
2222 break;
2223 }
2224
2225 case FOURCC('-', '-', '-', '-'):
2226 {
2227 mLastCommentMean.clear();
2228 mLastCommentName.clear();
2229 mLastCommentData.clear();
2230 *offset += chunk_size;
2231 break;
2232 }
2233
2234 case FOURCC('s', 'i', 'd', 'x'):
2235 {
2236 parseSegmentIndex(data_offset, chunk_data_size);
2237 *offset += chunk_size;
2238 return UNKNOWN_ERROR; // stop parsing after sidx
2239 }
2240
2241 default:
2242 {
2243 // check if we're parsing 'ilst' for meta keys
2244 // if so, treat type as a number (key-id).
2245 if (underQTMetaPath(mPath, 3)) {
2246 parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2247 }
2248
2249 *offset += chunk_size;
2250 break;
2251 }
2252 }
2253
2254 return OK;
2255 }
2256
parseSegmentIndex(off64_t offset,size_t size)2257 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2258 ALOGV("MPEG4Extractor::parseSegmentIndex");
2259
2260 if (size < 12) {
2261 return -EINVAL;
2262 }
2263
2264 uint32_t flags;
2265 if (!mDataSource->getUInt32(offset, &flags)) {
2266 return ERROR_MALFORMED;
2267 }
2268
2269 uint32_t version = flags >> 24;
2270 flags &= 0xffffff;
2271
2272 ALOGV("sidx version %d", version);
2273
2274 uint32_t referenceId;
2275 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2276 return ERROR_MALFORMED;
2277 }
2278
2279 uint32_t timeScale;
2280 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2281 return ERROR_MALFORMED;
2282 }
2283 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2284 if (timeScale == 0)
2285 return ERROR_MALFORMED;
2286
2287 uint64_t earliestPresentationTime;
2288 uint64_t firstOffset;
2289
2290 offset += 12;
2291 size -= 12;
2292
2293 if (version == 0) {
2294 if (size < 8) {
2295 return -EINVAL;
2296 }
2297 uint32_t tmp;
2298 if (!mDataSource->getUInt32(offset, &tmp)) {
2299 return ERROR_MALFORMED;
2300 }
2301 earliestPresentationTime = tmp;
2302 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2303 return ERROR_MALFORMED;
2304 }
2305 firstOffset = tmp;
2306 offset += 8;
2307 size -= 8;
2308 } else {
2309 if (size < 16) {
2310 return -EINVAL;
2311 }
2312 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2313 return ERROR_MALFORMED;
2314 }
2315 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2316 return ERROR_MALFORMED;
2317 }
2318 offset += 16;
2319 size -= 16;
2320 }
2321 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2322
2323 if (size < 4) {
2324 return -EINVAL;
2325 }
2326
2327 uint16_t referenceCount;
2328 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2329 return ERROR_MALFORMED;
2330 }
2331 offset += 4;
2332 size -= 4;
2333 ALOGV("refcount: %d", referenceCount);
2334
2335 if (size < referenceCount * 12) {
2336 return -EINVAL;
2337 }
2338
2339 uint64_t total_duration = 0;
2340 for (unsigned int i = 0; i < referenceCount; i++) {
2341 uint32_t d1, d2, d3;
2342
2343 if (!mDataSource->getUInt32(offset, &d1) || // size
2344 !mDataSource->getUInt32(offset + 4, &d2) || // duration
2345 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2346 return ERROR_MALFORMED;
2347 }
2348
2349 if (d1 & 0x80000000) {
2350 ALOGW("sub-sidx boxes not supported yet");
2351 }
2352 bool sap = d3 & 0x80000000;
2353 uint32_t saptype = (d3 >> 28) & 7;
2354 if (!sap || (saptype != 1 && saptype != 2)) {
2355 // type 1 and 2 are sync samples
2356 ALOGW("not a stream access point, or unsupported type: %08x", d3);
2357 }
2358 total_duration += d2;
2359 offset += 12;
2360 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2361 SidxEntry se;
2362 se.mSize = d1 & 0x7fffffff;
2363 se.mDurationUs = 1000000LL * d2 / timeScale;
2364 mSidxEntries.add(se);
2365 }
2366
2367 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2368
2369 if (mLastTrack == NULL)
2370 return ERROR_MALFORMED;
2371
2372 int64_t metaDuration;
2373 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2374 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2375 }
2376 return OK;
2377 }
2378
parseQTMetaKey(off64_t offset,size_t size)2379 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2380 if (size < 8) {
2381 return ERROR_MALFORMED;
2382 }
2383
2384 uint32_t count;
2385 if (!mDataSource->getUInt32(offset + 4, &count)) {
2386 return ERROR_MALFORMED;
2387 }
2388
2389 if (mMetaKeyMap.size() > 0) {
2390 ALOGW("'keys' atom seen again, discarding existing entries");
2391 mMetaKeyMap.clear();
2392 }
2393
2394 off64_t keyOffset = offset + 8;
2395 off64_t stopOffset = offset + size;
2396 for (size_t i = 1; i <= count; i++) {
2397 if (keyOffset + 8 > stopOffset) {
2398 return ERROR_MALFORMED;
2399 }
2400
2401 uint32_t keySize;
2402 if (!mDataSource->getUInt32(keyOffset, &keySize)
2403 || keySize < 8
2404 || keyOffset + keySize > stopOffset) {
2405 return ERROR_MALFORMED;
2406 }
2407
2408 uint32_t type;
2409 if (!mDataSource->getUInt32(keyOffset + 4, &type)
2410 || type != FOURCC('m', 'd', 't', 'a')) {
2411 return ERROR_MALFORMED;
2412 }
2413
2414 keySize -= 8;
2415 keyOffset += 8;
2416
2417 sp<ABuffer> keyData = new ABuffer(keySize);
2418 if (keyData->data() == NULL) {
2419 return ERROR_MALFORMED;
2420 }
2421 if (mDataSource->readAt(
2422 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) {
2423 return ERROR_MALFORMED;
2424 }
2425
2426 AString key((const char *)keyData->data(), keySize);
2427 mMetaKeyMap.add(i, key);
2428
2429 keyOffset += keySize;
2430 }
2431 return OK;
2432 }
2433
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)2434 status_t MPEG4Extractor::parseQTMetaVal(
2435 int32_t keyId, off64_t offset, size_t size) {
2436 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2437 if (index < 0) {
2438 // corresponding key is not present, ignore
2439 return ERROR_MALFORMED;
2440 }
2441
2442 if (size <= 16) {
2443 return ERROR_MALFORMED;
2444 }
2445 uint32_t dataSize;
2446 if (!mDataSource->getUInt32(offset, &dataSize)
2447 || dataSize > size || dataSize <= 16) {
2448 return ERROR_MALFORMED;
2449 }
2450 uint32_t atomFourCC;
2451 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2452 || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2453 return ERROR_MALFORMED;
2454 }
2455 uint32_t dataType;
2456 if (!mDataSource->getUInt32(offset + 8, &dataType)
2457 || ((dataType & 0xff000000) != 0)) {
2458 // not well-known type
2459 return ERROR_MALFORMED;
2460 }
2461
2462 dataSize -= 16;
2463 offset += 16;
2464
2465 if (dataType == 23 && dataSize >= 4) {
2466 // BE Float32
2467 uint32_t val;
2468 if (!mDataSource->getUInt32(offset, &val)) {
2469 return ERROR_MALFORMED;
2470 }
2471 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2472 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
2473 }
2474 } else {
2475 // add more keys if needed
2476 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2477 }
2478
2479 return OK;
2480 }
2481
parseTrackHeader(off64_t data_offset,off64_t data_size)2482 status_t MPEG4Extractor::parseTrackHeader(
2483 off64_t data_offset, off64_t data_size) {
2484 if (data_size < 4) {
2485 return ERROR_MALFORMED;
2486 }
2487
2488 uint8_t version;
2489 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2490 return ERROR_IO;
2491 }
2492
2493 size_t dynSize = (version == 1) ? 36 : 24;
2494
2495 uint8_t buffer[36 + 60];
2496
2497 if (data_size != (off64_t)dynSize + 60) {
2498 return ERROR_MALFORMED;
2499 }
2500
2501 if (mDataSource->readAt(
2502 data_offset, buffer, data_size) < (ssize_t)data_size) {
2503 return ERROR_IO;
2504 }
2505
2506 uint64_t ctime __unused, mtime __unused, duration __unused;
2507 int32_t id;
2508
2509 if (version == 1) {
2510 ctime = U64_AT(&buffer[4]);
2511 mtime = U64_AT(&buffer[12]);
2512 id = U32_AT(&buffer[20]);
2513 duration = U64_AT(&buffer[28]);
2514 } else if (version == 0) {
2515 ctime = U32_AT(&buffer[4]);
2516 mtime = U32_AT(&buffer[8]);
2517 id = U32_AT(&buffer[12]);
2518 duration = U32_AT(&buffer[20]);
2519 } else {
2520 return ERROR_UNSUPPORTED;
2521 }
2522
2523 if (mLastTrack == NULL)
2524 return ERROR_MALFORMED;
2525
2526 mLastTrack->meta->setInt32(kKeyTrackID, id);
2527
2528 size_t matrixOffset = dynSize + 16;
2529 int32_t a00 = U32_AT(&buffer[matrixOffset]);
2530 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2531 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2532 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2533
2534 #if 0
2535 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2536 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2537
2538 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2539 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2540 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2541 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2542 #endif
2543
2544 uint32_t rotationDegrees;
2545
2546 static const int32_t kFixedOne = 0x10000;
2547 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2548 // Identity, no rotation
2549 rotationDegrees = 0;
2550 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2551 rotationDegrees = 90;
2552 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2553 rotationDegrees = 270;
2554 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2555 rotationDegrees = 180;
2556 } else {
2557 ALOGW("We only support 0,90,180,270 degree rotation matrices");
2558 rotationDegrees = 0;
2559 }
2560
2561 if (rotationDegrees != 0) {
2562 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2563 }
2564
2565 // Handle presentation display size, which could be different
2566 // from the image size indicated by kKeyWidth and kKeyHeight.
2567 uint32_t width = U32_AT(&buffer[dynSize + 52]);
2568 uint32_t height = U32_AT(&buffer[dynSize + 56]);
2569 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2570 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2571
2572 return OK;
2573 }
2574
parseITunesMetaData(off64_t offset,size_t size)2575 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2576 if (size < 4 || size == SIZE_MAX) {
2577 return ERROR_MALFORMED;
2578 }
2579
2580 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2581 if (buffer == NULL) {
2582 return ERROR_MALFORMED;
2583 }
2584 if (mDataSource->readAt(
2585 offset, buffer, size) != (ssize_t)size) {
2586 delete[] buffer;
2587 buffer = NULL;
2588
2589 return ERROR_IO;
2590 }
2591
2592 uint32_t flags = U32_AT(buffer);
2593
2594 uint32_t metadataKey = 0;
2595 char chunk[5];
2596 MakeFourCCString(mPath[4], chunk);
2597 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2598 switch ((int32_t)mPath[4]) {
2599 case FOURCC(0xa9, 'a', 'l', 'b'):
2600 {
2601 metadataKey = kKeyAlbum;
2602 break;
2603 }
2604 case FOURCC(0xa9, 'A', 'R', 'T'):
2605 {
2606 metadataKey = kKeyArtist;
2607 break;
2608 }
2609 case FOURCC('a', 'A', 'R', 'T'):
2610 {
2611 metadataKey = kKeyAlbumArtist;
2612 break;
2613 }
2614 case FOURCC(0xa9, 'd', 'a', 'y'):
2615 {
2616 metadataKey = kKeyYear;
2617 break;
2618 }
2619 case FOURCC(0xa9, 'n', 'a', 'm'):
2620 {
2621 metadataKey = kKeyTitle;
2622 break;
2623 }
2624 case FOURCC(0xa9, 'w', 'r', 't'):
2625 {
2626 metadataKey = kKeyWriter;
2627 break;
2628 }
2629 case FOURCC('c', 'o', 'v', 'r'):
2630 {
2631 metadataKey = kKeyAlbumArt;
2632 break;
2633 }
2634 case FOURCC('g', 'n', 'r', 'e'):
2635 {
2636 metadataKey = kKeyGenre;
2637 break;
2638 }
2639 case FOURCC(0xa9, 'g', 'e', 'n'):
2640 {
2641 metadataKey = kKeyGenre;
2642 break;
2643 }
2644 case FOURCC('c', 'p', 'i', 'l'):
2645 {
2646 if (size == 9 && flags == 21) {
2647 char tmp[16];
2648 sprintf(tmp, "%d",
2649 (int)buffer[size - 1]);
2650
2651 mFileMetaData->setCString(kKeyCompilation, tmp);
2652 }
2653 break;
2654 }
2655 case FOURCC('t', 'r', 'k', 'n'):
2656 {
2657 if (size == 16 && flags == 0) {
2658 char tmp[16];
2659 uint16_t* pTrack = (uint16_t*)&buffer[10];
2660 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2661 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2662
2663 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2664 }
2665 break;
2666 }
2667 case FOURCC('d', 'i', 's', 'k'):
2668 {
2669 if ((size == 14 || size == 16) && flags == 0) {
2670 char tmp[16];
2671 uint16_t* pDisc = (uint16_t*)&buffer[10];
2672 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2673 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2674
2675 mFileMetaData->setCString(kKeyDiscNumber, tmp);
2676 }
2677 break;
2678 }
2679 case FOURCC('-', '-', '-', '-'):
2680 {
2681 buffer[size] = '\0';
2682 switch (mPath[5]) {
2683 case FOURCC('m', 'e', 'a', 'n'):
2684 mLastCommentMean.setTo((const char *)buffer + 4);
2685 break;
2686 case FOURCC('n', 'a', 'm', 'e'):
2687 mLastCommentName.setTo((const char *)buffer + 4);
2688 break;
2689 case FOURCC('d', 'a', 't', 'a'):
2690 if (size < 8) {
2691 delete[] buffer;
2692 buffer = NULL;
2693 ALOGE("b/24346430");
2694 return ERROR_MALFORMED;
2695 }
2696 mLastCommentData.setTo((const char *)buffer + 8);
2697 break;
2698 }
2699
2700 // Once we have a set of mean/name/data info, go ahead and process
2701 // it to see if its something we are interested in. Whether or not
2702 // were are interested in the specific tag, make sure to clear out
2703 // the set so we can be ready to process another tuple should one
2704 // show up later in the file.
2705 if ((mLastCommentMean.length() != 0) &&
2706 (mLastCommentName.length() != 0) &&
2707 (mLastCommentData.length() != 0)) {
2708
2709 if (mLastCommentMean == "com.apple.iTunes"
2710 && mLastCommentName == "iTunSMPB") {
2711 int32_t delay, padding;
2712 if (sscanf(mLastCommentData,
2713 " %*x %x %x %*x", &delay, &padding) == 2) {
2714 if (mLastTrack == NULL)
2715 return ERROR_MALFORMED;
2716
2717 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2718 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2719 }
2720 }
2721
2722 mLastCommentMean.clear();
2723 mLastCommentName.clear();
2724 mLastCommentData.clear();
2725 }
2726 break;
2727 }
2728
2729 default:
2730 break;
2731 }
2732
2733 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2734 if (metadataKey == kKeyAlbumArt) {
2735 mFileMetaData->setData(
2736 kKeyAlbumArt, MetaData::TYPE_NONE,
2737 buffer + 8, size - 8);
2738 } else if (metadataKey == kKeyGenre) {
2739 if (flags == 0) {
2740 // uint8_t genre code, iTunes genre codes are
2741 // the standard id3 codes, except they start
2742 // at 1 instead of 0 (e.g. Pop is 14, not 13)
2743 // We use standard id3 numbering, so subtract 1.
2744 int genrecode = (int)buffer[size - 1];
2745 genrecode--;
2746 if (genrecode < 0) {
2747 genrecode = 255; // reserved for 'unknown genre'
2748 }
2749 char genre[10];
2750 sprintf(genre, "%d", genrecode);
2751
2752 mFileMetaData->setCString(metadataKey, genre);
2753 } else if (flags == 1) {
2754 // custom genre string
2755 buffer[size] = '\0';
2756
2757 mFileMetaData->setCString(
2758 metadataKey, (const char *)buffer + 8);
2759 }
2760 } else {
2761 buffer[size] = '\0';
2762
2763 mFileMetaData->setCString(
2764 metadataKey, (const char *)buffer + 8);
2765 }
2766 }
2767
2768 delete[] buffer;
2769 buffer = NULL;
2770
2771 return OK;
2772 }
2773
parseColorInfo(off64_t offset,size_t size)2774 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
2775 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
2776 return ERROR_MALFORMED;
2777 }
2778
2779 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2780 if (buffer == NULL) {
2781 return ERROR_MALFORMED;
2782 }
2783 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
2784 delete[] buffer;
2785 buffer = NULL;
2786
2787 return ERROR_IO;
2788 }
2789
2790 int32_t type = U32_AT(&buffer[0]);
2791 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
2792 || (type == FOURCC('n', 'c', 'l', 'c' && size >= 10))) {
2793 int32_t primaries = U16_AT(&buffer[4]);
2794 int32_t transfer = U16_AT(&buffer[6]);
2795 int32_t coeffs = U16_AT(&buffer[8]);
2796 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
2797
2798 ColorAspects aspects;
2799 ColorUtils::convertIsoColorAspectsToCodecAspects(
2800 primaries, transfer, coeffs, fullRange, aspects);
2801
2802 // only store the first color specification
2803 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) {
2804 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries);
2805 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer);
2806 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
2807 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange);
2808 }
2809 }
2810
2811 delete[] buffer;
2812 buffer = NULL;
2813
2814 return OK;
2815 }
2816
parse3GPPMetaData(off64_t offset,size_t size,int depth)2817 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2818 if (size < 4 || size == SIZE_MAX) {
2819 return ERROR_MALFORMED;
2820 }
2821
2822 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2823 if (buffer == NULL) {
2824 return ERROR_MALFORMED;
2825 }
2826 if (mDataSource->readAt(
2827 offset, buffer, size) != (ssize_t)size) {
2828 delete[] buffer;
2829 buffer = NULL;
2830
2831 return ERROR_IO;
2832 }
2833
2834 uint32_t metadataKey = 0;
2835 switch (mPath[depth]) {
2836 case FOURCC('t', 'i', 't', 'l'):
2837 {
2838 metadataKey = kKeyTitle;
2839 break;
2840 }
2841 case FOURCC('p', 'e', 'r', 'f'):
2842 {
2843 metadataKey = kKeyArtist;
2844 break;
2845 }
2846 case FOURCC('a', 'u', 't', 'h'):
2847 {
2848 metadataKey = kKeyWriter;
2849 break;
2850 }
2851 case FOURCC('g', 'n', 'r', 'e'):
2852 {
2853 metadataKey = kKeyGenre;
2854 break;
2855 }
2856 case FOURCC('a', 'l', 'b', 'm'):
2857 {
2858 if (buffer[size - 1] != '\0') {
2859 char tmp[4];
2860 sprintf(tmp, "%u", buffer[size - 1]);
2861
2862 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2863 }
2864
2865 metadataKey = kKeyAlbum;
2866 break;
2867 }
2868 case FOURCC('y', 'r', 'r', 'c'):
2869 {
2870 char tmp[5];
2871 uint16_t year = U16_AT(&buffer[4]);
2872
2873 if (year < 10000) {
2874 sprintf(tmp, "%u", year);
2875
2876 mFileMetaData->setCString(kKeyYear, tmp);
2877 }
2878 break;
2879 }
2880
2881 default:
2882 break;
2883 }
2884
2885 if (metadataKey > 0) {
2886 bool isUTF8 = true; // Common case
2887 char16_t *framedata = NULL;
2888 int len16 = 0; // Number of UTF-16 characters
2889
2890 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2891 if (size < 6) {
2892 return ERROR_MALFORMED;
2893 }
2894
2895 if (size - 6 >= 4) {
2896 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2897 framedata = (char16_t *)(buffer + 6);
2898 if (0xfffe == *framedata) {
2899 // endianness marker (BOM) doesn't match host endianness
2900 for (int i = 0; i < len16; i++) {
2901 framedata[i] = bswap_16(framedata[i]);
2902 }
2903 // BOM is now swapped to 0xfeff, we will execute next block too
2904 }
2905
2906 if (0xfeff == *framedata) {
2907 // Remove the BOM
2908 framedata++;
2909 len16--;
2910 isUTF8 = false;
2911 }
2912 // else normal non-zero-length UTF-8 string
2913 // we can't handle UTF-16 without BOM as there is no other
2914 // indication of encoding.
2915 }
2916
2917 if (isUTF8) {
2918 buffer[size] = 0;
2919 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2920 } else {
2921 // Convert from UTF-16 string to UTF-8 string.
2922 String8 tmpUTF8str(framedata, len16);
2923 mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2924 }
2925 }
2926
2927 delete[] buffer;
2928 buffer = NULL;
2929
2930 return OK;
2931 }
2932
parseID3v2MetaData(off64_t offset)2933 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2934 ID3 id3(mDataSource, true /* ignorev1 */, offset);
2935
2936 if (id3.isValid()) {
2937 struct Map {
2938 int key;
2939 const char *tag1;
2940 const char *tag2;
2941 };
2942 static const Map kMap[] = {
2943 { kKeyAlbum, "TALB", "TAL" },
2944 { kKeyArtist, "TPE1", "TP1" },
2945 { kKeyAlbumArtist, "TPE2", "TP2" },
2946 { kKeyComposer, "TCOM", "TCM" },
2947 { kKeyGenre, "TCON", "TCO" },
2948 { kKeyTitle, "TIT2", "TT2" },
2949 { kKeyYear, "TYE", "TYER" },
2950 { kKeyAuthor, "TXT", "TEXT" },
2951 { kKeyCDTrackNumber, "TRK", "TRCK" },
2952 { kKeyDiscNumber, "TPA", "TPOS" },
2953 { kKeyCompilation, "TCP", "TCMP" },
2954 };
2955 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2956
2957 for (size_t i = 0; i < kNumMapEntries; ++i) {
2958 if (!mFileMetaData->hasData(kMap[i].key)) {
2959 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2960 if (it->done()) {
2961 delete it;
2962 it = new ID3::Iterator(id3, kMap[i].tag2);
2963 }
2964
2965 if (it->done()) {
2966 delete it;
2967 continue;
2968 }
2969
2970 String8 s;
2971 it->getString(&s);
2972 delete it;
2973
2974 mFileMetaData->setCString(kMap[i].key, s);
2975 }
2976 }
2977
2978 size_t dataSize;
2979 String8 mime;
2980 const void *data = id3.getAlbumArt(&dataSize, &mime);
2981
2982 if (data) {
2983 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2984 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2985 }
2986 }
2987 }
2988
getTrack(size_t index)2989 sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) {
2990 status_t err;
2991 if ((err = readMetaData()) != OK) {
2992 return NULL;
2993 }
2994
2995 Track *track = mFirstTrack;
2996 while (index > 0) {
2997 if (track == NULL) {
2998 return NULL;
2999 }
3000
3001 track = track->next;
3002 --index;
3003 }
3004
3005 if (track == NULL) {
3006 return NULL;
3007 }
3008
3009
3010 Trex *trex = NULL;
3011 int32_t trackId;
3012 if (track->meta->findInt32(kKeyTrackID, &trackId)) {
3013 for (size_t i = 0; i < mTrex.size(); i++) {
3014 Trex *t = &mTrex.editItemAt(i);
3015 if (t->track_ID == (uint32_t) trackId) {
3016 trex = t;
3017 break;
3018 }
3019 }
3020 } else {
3021 ALOGE("b/21657957");
3022 return NULL;
3023 }
3024
3025 ALOGV("getTrack called, pssh: %zu", mPssh.size());
3026
3027 const char *mime;
3028 if (!track->meta->findCString(kKeyMIMEType, &mime)) {
3029 return NULL;
3030 }
3031
3032 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3033 uint32_t type;
3034 const void *data;
3035 size_t size;
3036 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) {
3037 return NULL;
3038 }
3039
3040 const uint8_t *ptr = (const uint8_t *)data;
3041
3042 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
3043 return NULL;
3044 }
3045 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3046 uint32_t type;
3047 const void *data;
3048 size_t size;
3049 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) {
3050 return NULL;
3051 }
3052
3053 const uint8_t *ptr = (const uint8_t *)data;
3054
3055 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
3056 return NULL;
3057 }
3058 }
3059
3060 return new MPEG4Source(this,
3061 track->meta, mDataSource, track->timescale, track->sampleTable,
3062 mSidxEntries, trex, mMoofOffset);
3063 }
3064
3065 // static
verifyTrack(Track * track)3066 status_t MPEG4Extractor::verifyTrack(Track *track) {
3067 const char *mime;
3068 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
3069
3070 uint32_t type;
3071 const void *data;
3072 size_t size;
3073 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3074 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
3075 || type != kTypeAVCC) {
3076 return ERROR_MALFORMED;
3077 }
3078 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3079 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
3080 || type != kTypeHVCC) {
3081 return ERROR_MALFORMED;
3082 }
3083 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3084 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3085 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3086 if (!track->meta->findData(kKeyESDS, &type, &data, &size)
3087 || type != kTypeESDS) {
3088 return ERROR_MALFORMED;
3089 }
3090 }
3091
3092 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3093 // Make sure we have all the metadata we need.
3094 ALOGE("stbl atom missing/invalid.");
3095 return ERROR_MALFORMED;
3096 }
3097
3098 if (track->timescale == 0) {
3099 ALOGE("timescale invalid.");
3100 return ERROR_MALFORMED;
3101 }
3102
3103 return OK;
3104 }
3105
3106 typedef enum {
3107 //AOT_NONE = -1,
3108 //AOT_NULL_OBJECT = 0,
3109 //AOT_AAC_MAIN = 1, /**< Main profile */
3110 AOT_AAC_LC = 2, /**< Low Complexity object */
3111 //AOT_AAC_SSR = 3,
3112 //AOT_AAC_LTP = 4,
3113 AOT_SBR = 5,
3114 //AOT_AAC_SCAL = 6,
3115 //AOT_TWIN_VQ = 7,
3116 //AOT_CELP = 8,
3117 //AOT_HVXC = 9,
3118 //AOT_RSVD_10 = 10, /**< (reserved) */
3119 //AOT_RSVD_11 = 11, /**< (reserved) */
3120 //AOT_TTSI = 12, /**< TTSI Object */
3121 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
3122 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
3123 //AOT_GEN_MIDI = 15, /**< General MIDI object */
3124 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3125 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
3126 //AOT_RSVD_18 = 18, /**< (reserved) */
3127 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
3128 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
3129 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
3130 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
3131 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
3132 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
3133 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
3134 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
3135 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
3136 //AOT_RSVD_28 = 28, /**< might become SSC */
3137 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
3138 //AOT_MPEGS = 30, /**< MPEG Surround */
3139
3140 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
3141
3142 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
3143 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
3144 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
3145 //AOT_RSVD_35 = 35, /**< might become DST */
3146 //AOT_RSVD_36 = 36, /**< might become ALS */
3147 //AOT_AAC_SLS = 37, /**< AAC + SLS */
3148 //AOT_SLS = 38, /**< SLS */
3149 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
3150
3151 //AOT_USAC = 42, /**< USAC */
3152 //AOT_SAOC = 43, /**< SAOC */
3153 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
3154
3155 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
3156 } AUDIO_OBJECT_TYPE;
3157
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)3158 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3159 const void *esds_data, size_t esds_size) {
3160 ESDS esds(esds_data, esds_size);
3161
3162 uint8_t objectTypeIndication;
3163 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3164 return ERROR_MALFORMED;
3165 }
3166
3167 if (objectTypeIndication == 0xe1) {
3168 // This isn't MPEG4 audio at all, it's QCELP 14k...
3169 if (mLastTrack == NULL)
3170 return ERROR_MALFORMED;
3171
3172 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3173 return OK;
3174 }
3175
3176 if (objectTypeIndication == 0x6b) {
3177 // The media subtype is MP3 audio
3178 // Our software MP3 audio decoder may not be able to handle
3179 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3180 ALOGE("MP3 track in MP4/3GPP file is not supported");
3181 return ERROR_UNSUPPORTED;
3182 }
3183
3184 const uint8_t *csd;
3185 size_t csd_size;
3186 if (esds.getCodecSpecificInfo(
3187 (const void **)&csd, &csd_size) != OK) {
3188 return ERROR_MALFORMED;
3189 }
3190
3191 if (kUseHexDump) {
3192 printf("ESD of size %zu\n", csd_size);
3193 hexdump(csd, csd_size);
3194 }
3195
3196 if (csd_size == 0) {
3197 // There's no further information, i.e. no codec specific data
3198 // Let's assume that the information provided in the mpeg4 headers
3199 // is accurate and hope for the best.
3200
3201 return OK;
3202 }
3203
3204 if (csd_size < 2) {
3205 return ERROR_MALFORMED;
3206 }
3207
3208 static uint32_t kSamplingRate[] = {
3209 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3210 16000, 12000, 11025, 8000, 7350
3211 };
3212
3213 ABitReader br(csd, csd_size);
3214 uint32_t objectType = br.getBits(5);
3215
3216 if (objectType == 31) { // AAC-ELD => additional 6 bits
3217 objectType = 32 + br.getBits(6);
3218 }
3219
3220 if (mLastTrack == NULL)
3221 return ERROR_MALFORMED;
3222
3223 //keep AOT type
3224 mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
3225
3226 uint32_t freqIndex = br.getBits(4);
3227
3228 int32_t sampleRate = 0;
3229 int32_t numChannels = 0;
3230 if (freqIndex == 15) {
3231 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3232 sampleRate = br.getBits(24);
3233 numChannels = br.getBits(4);
3234 } else {
3235 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3236 numChannels = br.getBits(4);
3237
3238 if (freqIndex == 13 || freqIndex == 14) {
3239 return ERROR_MALFORMED;
3240 }
3241
3242 sampleRate = kSamplingRate[freqIndex];
3243 }
3244
3245 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3246 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3247 uint32_t extFreqIndex = br.getBits(4);
3248 int32_t extSampleRate __unused;
3249 if (extFreqIndex == 15) {
3250 if (csd_size < 8) {
3251 return ERROR_MALFORMED;
3252 }
3253 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3254 extSampleRate = br.getBits(24);
3255 } else {
3256 if (extFreqIndex == 13 || extFreqIndex == 14) {
3257 return ERROR_MALFORMED;
3258 }
3259 extSampleRate = kSamplingRate[extFreqIndex];
3260 }
3261 //TODO: save the extension sampling rate value in meta data =>
3262 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
3263 }
3264
3265 switch (numChannels) {
3266 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3267 case 0:
3268 case 1:// FC
3269 case 2:// FL FR
3270 case 3:// FC, FL FR
3271 case 4:// FC, FL FR, RC
3272 case 5:// FC, FL FR, SL SR
3273 case 6:// FC, FL FR, SL SR, LFE
3274 //numChannels already contains the right value
3275 break;
3276 case 11:// FC, FL FR, SL SR, RC, LFE
3277 numChannels = 7;
3278 break;
3279 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3280 case 12:// FC, FL FR, SL SR, RL RR, LFE
3281 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
3282 numChannels = 8;
3283 break;
3284 default:
3285 return ERROR_UNSUPPORTED;
3286 }
3287
3288 {
3289 if (objectType == AOT_SBR || objectType == AOT_PS) {
3290 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3291 objectType = br.getBits(5);
3292
3293 if (objectType == AOT_ESCAPE) {
3294 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3295 objectType = 32 + br.getBits(6);
3296 }
3297 }
3298 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3299 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3300 objectType == AOT_ER_BSAC) {
3301 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3302 const int32_t frameLengthFlag __unused = br.getBits(1);
3303
3304 const int32_t dependsOnCoreCoder = br.getBits(1);
3305
3306 if (dependsOnCoreCoder ) {
3307 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3308 const int32_t coreCoderDelay __unused = br.getBits(14);
3309 }
3310
3311 int32_t extensionFlag = -1;
3312 if (br.numBitsLeft() > 0) {
3313 extensionFlag = br.getBits(1);
3314 } else {
3315 switch (objectType) {
3316 // 14496-3 4.5.1.1 extensionFlag
3317 case AOT_AAC_LC:
3318 extensionFlag = 0;
3319 break;
3320 case AOT_ER_AAC_LC:
3321 case AOT_ER_AAC_SCAL:
3322 case AOT_ER_BSAC:
3323 case AOT_ER_AAC_LD:
3324 extensionFlag = 1;
3325 break;
3326 default:
3327 return ERROR_MALFORMED;
3328 break;
3329 }
3330 ALOGW("csd missing extension flag; assuming %d for object type %u.",
3331 extensionFlag, objectType);
3332 }
3333
3334 if (numChannels == 0) {
3335 int32_t channelsEffectiveNum = 0;
3336 int32_t channelsNum = 0;
3337 if (br.numBitsLeft() < 32) {
3338 return ERROR_MALFORMED;
3339 }
3340 const int32_t ElementInstanceTag __unused = br.getBits(4);
3341 const int32_t Profile __unused = br.getBits(2);
3342 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3343 const int32_t NumFrontChannelElements = br.getBits(4);
3344 const int32_t NumSideChannelElements = br.getBits(4);
3345 const int32_t NumBackChannelElements = br.getBits(4);
3346 const int32_t NumLfeChannelElements = br.getBits(2);
3347 const int32_t NumAssocDataElements __unused = br.getBits(3);
3348 const int32_t NumValidCcElements __unused = br.getBits(4);
3349
3350 const int32_t MonoMixdownPresent = br.getBits(1);
3351
3352 if (MonoMixdownPresent != 0) {
3353 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3354 const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3355 }
3356
3357 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3358 const int32_t StereoMixdownPresent = br.getBits(1);
3359 if (StereoMixdownPresent != 0) {
3360 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3361 const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3362 }
3363
3364 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3365 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3366 if (MatrixMixdownIndexPresent != 0) {
3367 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3368 const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3369 const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3370 }
3371
3372 int i;
3373 for (i=0; i < NumFrontChannelElements; i++) {
3374 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3375 const int32_t FrontElementIsCpe = br.getBits(1);
3376 const int32_t FrontElementTagSelect __unused = br.getBits(4);
3377 channelsNum += FrontElementIsCpe ? 2 : 1;
3378 }
3379
3380 for (i=0; i < NumSideChannelElements; i++) {
3381 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3382 const int32_t SideElementIsCpe = br.getBits(1);
3383 const int32_t SideElementTagSelect __unused = br.getBits(4);
3384 channelsNum += SideElementIsCpe ? 2 : 1;
3385 }
3386
3387 for (i=0; i < NumBackChannelElements; i++) {
3388 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3389 const int32_t BackElementIsCpe = br.getBits(1);
3390 const int32_t BackElementTagSelect __unused = br.getBits(4);
3391 channelsNum += BackElementIsCpe ? 2 : 1;
3392 }
3393 channelsEffectiveNum = channelsNum;
3394
3395 for (i=0; i < NumLfeChannelElements; i++) {
3396 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3397 const int32_t LfeElementTagSelect __unused = br.getBits(4);
3398 channelsNum += 1;
3399 }
3400 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3401 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3402 numChannels = channelsNum;
3403 }
3404 }
3405 }
3406
3407 if (numChannels == 0) {
3408 return ERROR_UNSUPPORTED;
3409 }
3410
3411 if (mLastTrack == NULL)
3412 return ERROR_MALFORMED;
3413
3414 int32_t prevSampleRate;
3415 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
3416
3417 if (prevSampleRate != sampleRate) {
3418 ALOGV("mpeg4 audio sample rate different from previous setting. "
3419 "was: %d, now: %d", prevSampleRate, sampleRate);
3420 }
3421
3422 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
3423
3424 int32_t prevChannelCount;
3425 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
3426
3427 if (prevChannelCount != numChannels) {
3428 ALOGV("mpeg4 audio channel count different from previous setting. "
3429 "was: %d, now: %d", prevChannelCount, numChannels);
3430 }
3431
3432 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
3433
3434 return OK;
3435 }
3436
3437 ////////////////////////////////////////////////////////////////////////////////
3438
MPEG4Source(const sp<MPEG4Extractor> & owner,const sp<MetaData> & format,const sp<DataSource> & dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset)3439 MPEG4Source::MPEG4Source(
3440 const sp<MPEG4Extractor> &owner,
3441 const sp<MetaData> &format,
3442 const sp<DataSource> &dataSource,
3443 int32_t timeScale,
3444 const sp<SampleTable> &sampleTable,
3445 Vector<SidxEntry> &sidx,
3446 const Trex *trex,
3447 off64_t firstMoofOffset)
3448 : mOwner(owner),
3449 mFormat(format),
3450 mDataSource(dataSource),
3451 mTimescale(timeScale),
3452 mSampleTable(sampleTable),
3453 mCurrentSampleIndex(0),
3454 mCurrentFragmentIndex(0),
3455 mSegments(sidx),
3456 mTrex(trex),
3457 mFirstMoofOffset(firstMoofOffset),
3458 mCurrentMoofOffset(firstMoofOffset),
3459 mCurrentTime(0),
3460 mCurrentSampleInfoAllocSize(0),
3461 mCurrentSampleInfoSizes(NULL),
3462 mCurrentSampleInfoOffsetsAllocSize(0),
3463 mCurrentSampleInfoOffsets(NULL),
3464 mIsAVC(false),
3465 mIsHEVC(false),
3466 mNALLengthSize(0),
3467 mStarted(false),
3468 mGroup(NULL),
3469 mBuffer(NULL),
3470 mWantsNALFragments(false),
3471 mSrcBuffer(NULL) {
3472
3473 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3474
3475 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3476 mDefaultIVSize = 0;
3477 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3478 uint32_t keytype;
3479 const void *key;
3480 size_t keysize;
3481 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3482 CHECK(keysize <= 16);
3483 memset(mCryptoKey, 0, 16);
3484 memcpy(mCryptoKey, key, keysize);
3485 }
3486
3487 const char *mime;
3488 bool success = mFormat->findCString(kKeyMIMEType, &mime);
3489 CHECK(success);
3490
3491 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3492 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3493
3494 if (mIsAVC) {
3495 uint32_t type;
3496 const void *data;
3497 size_t size;
3498 CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3499
3500 const uint8_t *ptr = (const uint8_t *)data;
3501
3502 CHECK(size >= 7);
3503 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3504
3505 // The number of bytes used to encode the length of a NAL unit.
3506 mNALLengthSize = 1 + (ptr[4] & 3);
3507 } else if (mIsHEVC) {
3508 uint32_t type;
3509 const void *data;
3510 size_t size;
3511 CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3512
3513 const uint8_t *ptr = (const uint8_t *)data;
3514
3515 CHECK(size >= 22);
3516 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3517
3518 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3519 }
3520
3521 CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3522
3523 if (mFirstMoofOffset != 0) {
3524 off64_t offset = mFirstMoofOffset;
3525 parseChunk(&offset);
3526 }
3527 }
3528
~MPEG4Source()3529 MPEG4Source::~MPEG4Source() {
3530 if (mStarted) {
3531 stop();
3532 }
3533 free(mCurrentSampleInfoSizes);
3534 free(mCurrentSampleInfoOffsets);
3535 }
3536
start(MetaData * params)3537 status_t MPEG4Source::start(MetaData *params) {
3538 Mutex::Autolock autoLock(mLock);
3539
3540 CHECK(!mStarted);
3541
3542 int32_t val;
3543 if (params && params->findInt32(kKeyWantsNALFragments, &val)
3544 && val != 0) {
3545 mWantsNALFragments = true;
3546 } else {
3547 mWantsNALFragments = false;
3548 }
3549
3550 int32_t tmp;
3551 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp));
3552 size_t max_size = tmp;
3553
3554 // A somewhat arbitrary limit that should be sufficient for 8k video frames
3555 // If you see the message below for a valid input stream: increase the limit
3556 if (max_size > 64 * 1024 * 1024) {
3557 ALOGE("bogus max input size: %zu", max_size);
3558 return ERROR_MALFORMED;
3559 }
3560 mGroup = new MediaBufferGroup;
3561 mGroup->add_buffer(new MediaBuffer(max_size));
3562
3563 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3564 if (mSrcBuffer == NULL) {
3565 // file probably specified a bad max size
3566 delete mGroup;
3567 mGroup = NULL;
3568 return ERROR_MALFORMED;
3569 }
3570
3571 mStarted = true;
3572
3573 return OK;
3574 }
3575
stop()3576 status_t MPEG4Source::stop() {
3577 Mutex::Autolock autoLock(mLock);
3578
3579 CHECK(mStarted);
3580
3581 if (mBuffer != NULL) {
3582 mBuffer->release();
3583 mBuffer = NULL;
3584 }
3585
3586 delete[] mSrcBuffer;
3587 mSrcBuffer = NULL;
3588
3589 delete mGroup;
3590 mGroup = NULL;
3591
3592 mStarted = false;
3593 mCurrentSampleIndex = 0;
3594
3595 return OK;
3596 }
3597
parseChunk(off64_t * offset)3598 status_t MPEG4Source::parseChunk(off64_t *offset) {
3599 uint32_t hdr[2];
3600 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3601 return ERROR_IO;
3602 }
3603 uint64_t chunk_size = ntohl(hdr[0]);
3604 uint32_t chunk_type = ntohl(hdr[1]);
3605 off64_t data_offset = *offset + 8;
3606
3607 if (chunk_size == 1) {
3608 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3609 return ERROR_IO;
3610 }
3611 chunk_size = ntoh64(chunk_size);
3612 data_offset += 8;
3613
3614 if (chunk_size < 16) {
3615 // The smallest valid chunk is 16 bytes long in this case.
3616 return ERROR_MALFORMED;
3617 }
3618 } else if (chunk_size < 8) {
3619 // The smallest valid chunk is 8 bytes long.
3620 return ERROR_MALFORMED;
3621 }
3622
3623 char chunk[5];
3624 MakeFourCCString(chunk_type, chunk);
3625 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
3626
3627 off64_t chunk_data_size = *offset + chunk_size - data_offset;
3628
3629 switch(chunk_type) {
3630
3631 case FOURCC('t', 'r', 'a', 'f'):
3632 case FOURCC('m', 'o', 'o', 'f'): {
3633 off64_t stop_offset = *offset + chunk_size;
3634 *offset = data_offset;
3635 while (*offset < stop_offset) {
3636 status_t err = parseChunk(offset);
3637 if (err != OK) {
3638 return err;
3639 }
3640 }
3641 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3642 // *offset points to the box following this moof. Find the next moof from there.
3643
3644 while (true) {
3645 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3646 return ERROR_END_OF_STREAM;
3647 }
3648 chunk_size = ntohl(hdr[0]);
3649 chunk_type = ntohl(hdr[1]);
3650 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3651 mNextMoofOffset = *offset;
3652 break;
3653 }
3654 *offset += chunk_size;
3655 }
3656 }
3657 break;
3658 }
3659
3660 case FOURCC('t', 'f', 'h', 'd'): {
3661 status_t err;
3662 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3663 return err;
3664 }
3665 *offset += chunk_size;
3666 break;
3667 }
3668
3669 case FOURCC('t', 'r', 'u', 'n'): {
3670 status_t err;
3671 if (mLastParsedTrackId == mTrackId) {
3672 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3673 return err;
3674 }
3675 }
3676
3677 *offset += chunk_size;
3678 break;
3679 }
3680
3681 case FOURCC('s', 'a', 'i', 'z'): {
3682 status_t err;
3683 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3684 return err;
3685 }
3686 *offset += chunk_size;
3687 break;
3688 }
3689 case FOURCC('s', 'a', 'i', 'o'): {
3690 status_t err;
3691 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3692 return err;
3693 }
3694 *offset += chunk_size;
3695 break;
3696 }
3697
3698 case FOURCC('m', 'd', 'a', 't'): {
3699 // parse DRM info if present
3700 ALOGV("MPEG4Source::parseChunk mdat");
3701 // if saiz/saoi was previously observed, do something with the sampleinfos
3702 *offset += chunk_size;
3703 break;
3704 }
3705
3706 default: {
3707 *offset += chunk_size;
3708 break;
3709 }
3710 }
3711 return OK;
3712 }
3713
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)3714 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3715 off64_t offset, off64_t /* size */) {
3716 ALOGV("parseSampleAuxiliaryInformationSizes");
3717 // 14496-12 8.7.12
3718 uint8_t version;
3719 if (mDataSource->readAt(
3720 offset, &version, sizeof(version))
3721 < (ssize_t)sizeof(version)) {
3722 return ERROR_IO;
3723 }
3724
3725 if (version != 0) {
3726 return ERROR_UNSUPPORTED;
3727 }
3728 offset++;
3729
3730 uint32_t flags;
3731 if (!mDataSource->getUInt24(offset, &flags)) {
3732 return ERROR_IO;
3733 }
3734 offset += 3;
3735
3736 if (flags & 1) {
3737 uint32_t tmp;
3738 if (!mDataSource->getUInt32(offset, &tmp)) {
3739 return ERROR_MALFORMED;
3740 }
3741 mCurrentAuxInfoType = tmp;
3742 offset += 4;
3743 if (!mDataSource->getUInt32(offset, &tmp)) {
3744 return ERROR_MALFORMED;
3745 }
3746 mCurrentAuxInfoTypeParameter = tmp;
3747 offset += 4;
3748 }
3749
3750 uint8_t defsize;
3751 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3752 return ERROR_MALFORMED;
3753 }
3754 mCurrentDefaultSampleInfoSize = defsize;
3755 offset++;
3756
3757 uint32_t smplcnt;
3758 if (!mDataSource->getUInt32(offset, &smplcnt)) {
3759 return ERROR_MALFORMED;
3760 }
3761 mCurrentSampleInfoCount = smplcnt;
3762 offset += 4;
3763
3764 if (mCurrentDefaultSampleInfoSize != 0) {
3765 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3766 return OK;
3767 }
3768 if (smplcnt > mCurrentSampleInfoAllocSize) {
3769 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3770 mCurrentSampleInfoAllocSize = smplcnt;
3771 }
3772
3773 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3774 return OK;
3775 }
3776
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)3777 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3778 off64_t offset, off64_t /* size */) {
3779 ALOGV("parseSampleAuxiliaryInformationOffsets");
3780 // 14496-12 8.7.13
3781 uint8_t version;
3782 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3783 return ERROR_IO;
3784 }
3785 offset++;
3786
3787 uint32_t flags;
3788 if (!mDataSource->getUInt24(offset, &flags)) {
3789 return ERROR_IO;
3790 }
3791 offset += 3;
3792
3793 uint32_t entrycount;
3794 if (!mDataSource->getUInt32(offset, &entrycount)) {
3795 return ERROR_IO;
3796 }
3797 offset += 4;
3798 if (entrycount == 0) {
3799 return OK;
3800 }
3801 if (entrycount > UINT32_MAX / 8) {
3802 return ERROR_MALFORMED;
3803 }
3804
3805 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3806 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3807 if (newPtr == NULL) {
3808 return NO_MEMORY;
3809 }
3810 mCurrentSampleInfoOffsets = newPtr;
3811 mCurrentSampleInfoOffsetsAllocSize = entrycount;
3812 }
3813 mCurrentSampleInfoOffsetCount = entrycount;
3814
3815 if (mCurrentSampleInfoOffsets == NULL) {
3816 return OK;
3817 }
3818
3819 for (size_t i = 0; i < entrycount; i++) {
3820 if (version == 0) {
3821 uint32_t tmp;
3822 if (!mDataSource->getUInt32(offset, &tmp)) {
3823 return ERROR_IO;
3824 }
3825 mCurrentSampleInfoOffsets[i] = tmp;
3826 offset += 4;
3827 } else {
3828 uint64_t tmp;
3829 if (!mDataSource->getUInt64(offset, &tmp)) {
3830 return ERROR_IO;
3831 }
3832 mCurrentSampleInfoOffsets[i] = tmp;
3833 offset += 8;
3834 }
3835 }
3836
3837 // parse clear/encrypted data
3838
3839 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3840
3841 drmoffset += mCurrentMoofOffset;
3842 int ivlength;
3843 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3844
3845 // only 0, 8 and 16 byte initialization vectors are supported
3846 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
3847 ALOGW("unsupported IV length: %d", ivlength);
3848 return ERROR_MALFORMED;
3849 }
3850 // read CencSampleAuxiliaryDataFormats
3851 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3852 if (i >= mCurrentSamples.size()) {
3853 ALOGW("too few samples");
3854 break;
3855 }
3856 Sample *smpl = &mCurrentSamples.editItemAt(i);
3857
3858 memset(smpl->iv, 0, 16);
3859 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3860 return ERROR_IO;
3861 }
3862
3863 drmoffset += ivlength;
3864
3865 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3866 if (smplinfosize == 0) {
3867 smplinfosize = mCurrentSampleInfoSizes[i];
3868 }
3869 if (smplinfosize > ivlength) {
3870 uint16_t numsubsamples;
3871 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3872 return ERROR_IO;
3873 }
3874 drmoffset += 2;
3875 for (size_t j = 0; j < numsubsamples; j++) {
3876 uint16_t numclear;
3877 uint32_t numencrypted;
3878 if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3879 return ERROR_IO;
3880 }
3881 drmoffset += 2;
3882 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3883 return ERROR_IO;
3884 }
3885 drmoffset += 4;
3886 smpl->clearsizes.add(numclear);
3887 smpl->encryptedsizes.add(numencrypted);
3888 }
3889 } else {
3890 smpl->clearsizes.add(0);
3891 smpl->encryptedsizes.add(smpl->size);
3892 }
3893 }
3894
3895
3896 return OK;
3897 }
3898
parseTrackFragmentHeader(off64_t offset,off64_t size)3899 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3900
3901 if (size < 8) {
3902 return -EINVAL;
3903 }
3904
3905 uint32_t flags;
3906 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3907 return ERROR_MALFORMED;
3908 }
3909
3910 if (flags & 0xff000000) {
3911 return -EINVAL;
3912 }
3913
3914 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3915 return ERROR_MALFORMED;
3916 }
3917
3918 if (mLastParsedTrackId != mTrackId) {
3919 // this is not the right track, skip it
3920 return OK;
3921 }
3922
3923 mTrackFragmentHeaderInfo.mFlags = flags;
3924 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3925 offset += 8;
3926 size -= 8;
3927
3928 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3929
3930 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3931 if (size < 8) {
3932 return -EINVAL;
3933 }
3934
3935 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3936 return ERROR_MALFORMED;
3937 }
3938 offset += 8;
3939 size -= 8;
3940 }
3941
3942 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3943 if (size < 4) {
3944 return -EINVAL;
3945 }
3946
3947 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3948 return ERROR_MALFORMED;
3949 }
3950 offset += 4;
3951 size -= 4;
3952 }
3953
3954 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3955 if (size < 4) {
3956 return -EINVAL;
3957 }
3958
3959 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3960 return ERROR_MALFORMED;
3961 }
3962 offset += 4;
3963 size -= 4;
3964 }
3965
3966 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3967 if (size < 4) {
3968 return -EINVAL;
3969 }
3970
3971 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3972 return ERROR_MALFORMED;
3973 }
3974 offset += 4;
3975 size -= 4;
3976 }
3977
3978 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3979 if (size < 4) {
3980 return -EINVAL;
3981 }
3982
3983 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3984 return ERROR_MALFORMED;
3985 }
3986 offset += 4;
3987 size -= 4;
3988 }
3989
3990 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3991 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3992 }
3993
3994 mTrackFragmentHeaderInfo.mDataOffset = 0;
3995 return OK;
3996 }
3997
parseTrackFragmentRun(off64_t offset,off64_t size)3998 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3999
4000 ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4001 if (size < 8) {
4002 return -EINVAL;
4003 }
4004
4005 enum {
4006 kDataOffsetPresent = 0x01,
4007 kFirstSampleFlagsPresent = 0x04,
4008 kSampleDurationPresent = 0x100,
4009 kSampleSizePresent = 0x200,
4010 kSampleFlagsPresent = 0x400,
4011 kSampleCompositionTimeOffsetPresent = 0x800,
4012 };
4013
4014 uint32_t flags;
4015 if (!mDataSource->getUInt32(offset, &flags)) {
4016 return ERROR_MALFORMED;
4017 }
4018 ALOGV("fragment run flags: %08x", flags);
4019
4020 if (flags & 0xff000000) {
4021 return -EINVAL;
4022 }
4023
4024 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4025 // These two shall not be used together.
4026 return -EINVAL;
4027 }
4028
4029 uint32_t sampleCount;
4030 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4031 return ERROR_MALFORMED;
4032 }
4033 offset += 8;
4034 size -= 8;
4035
4036 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4037
4038 uint32_t firstSampleFlags = 0;
4039
4040 if (flags & kDataOffsetPresent) {
4041 if (size < 4) {
4042 return -EINVAL;
4043 }
4044
4045 int32_t dataOffsetDelta;
4046 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4047 return ERROR_MALFORMED;
4048 }
4049
4050 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4051
4052 offset += 4;
4053 size -= 4;
4054 }
4055
4056 if (flags & kFirstSampleFlagsPresent) {
4057 if (size < 4) {
4058 return -EINVAL;
4059 }
4060
4061 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4062 return ERROR_MALFORMED;
4063 }
4064 offset += 4;
4065 size -= 4;
4066 }
4067
4068 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4069 sampleCtsOffset = 0;
4070
4071 size_t bytesPerSample = 0;
4072 if (flags & kSampleDurationPresent) {
4073 bytesPerSample += 4;
4074 } else if (mTrackFragmentHeaderInfo.mFlags
4075 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4076 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4077 } else if (mTrex) {
4078 sampleDuration = mTrex->default_sample_duration;
4079 }
4080
4081 if (flags & kSampleSizePresent) {
4082 bytesPerSample += 4;
4083 } else if (mTrackFragmentHeaderInfo.mFlags
4084 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4085 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4086 } else {
4087 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4088 }
4089
4090 if (flags & kSampleFlagsPresent) {
4091 bytesPerSample += 4;
4092 } else if (mTrackFragmentHeaderInfo.mFlags
4093 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4094 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4095 } else {
4096 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4097 }
4098
4099 if (flags & kSampleCompositionTimeOffsetPresent) {
4100 bytesPerSample += 4;
4101 } else {
4102 sampleCtsOffset = 0;
4103 }
4104
4105 if (size < (off64_t)(sampleCount * bytesPerSample)) {
4106 return -EINVAL;
4107 }
4108
4109 Sample tmp;
4110 for (uint32_t i = 0; i < sampleCount; ++i) {
4111 if (flags & kSampleDurationPresent) {
4112 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4113 return ERROR_MALFORMED;
4114 }
4115 offset += 4;
4116 }
4117
4118 if (flags & kSampleSizePresent) {
4119 if (!mDataSource->getUInt32(offset, &sampleSize)) {
4120 return ERROR_MALFORMED;
4121 }
4122 offset += 4;
4123 }
4124
4125 if (flags & kSampleFlagsPresent) {
4126 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4127 return ERROR_MALFORMED;
4128 }
4129 offset += 4;
4130 }
4131
4132 if (flags & kSampleCompositionTimeOffsetPresent) {
4133 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4134 return ERROR_MALFORMED;
4135 }
4136 offset += 4;
4137 }
4138
4139 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4140 " flags 0x%08x", i + 1,
4141 dataOffset, sampleSize, sampleDuration,
4142 (flags & kFirstSampleFlagsPresent) && i == 0
4143 ? firstSampleFlags : sampleFlags);
4144 tmp.offset = dataOffset;
4145 tmp.size = sampleSize;
4146 tmp.duration = sampleDuration;
4147 tmp.compositionOffset = sampleCtsOffset;
4148 mCurrentSamples.add(tmp);
4149
4150 dataOffset += sampleSize;
4151 }
4152
4153 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4154
4155 return OK;
4156 }
4157
getFormat()4158 sp<MetaData> MPEG4Source::getFormat() {
4159 Mutex::Autolock autoLock(mLock);
4160
4161 return mFormat;
4162 }
4163
parseNALSize(const uint8_t * data) const4164 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4165 switch (mNALLengthSize) {
4166 case 1:
4167 return *data;
4168 case 2:
4169 return U16_AT(data);
4170 case 3:
4171 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4172 case 4:
4173 return U32_AT(data);
4174 }
4175
4176 // This cannot happen, mNALLengthSize springs to life by adding 1 to
4177 // a 2-bit integer.
4178 CHECK(!"Should not be here.");
4179
4180 return 0;
4181 }
4182
read(MediaBuffer ** out,const ReadOptions * options)4183 status_t MPEG4Source::read(
4184 MediaBuffer **out, const ReadOptions *options) {
4185 Mutex::Autolock autoLock(mLock);
4186
4187 CHECK(mStarted);
4188
4189 if (mFirstMoofOffset > 0) {
4190 return fragmentedRead(out, options);
4191 }
4192
4193 *out = NULL;
4194
4195 int64_t targetSampleTimeUs = -1;
4196
4197 int64_t seekTimeUs;
4198 ReadOptions::SeekMode mode;
4199 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4200 uint32_t findFlags = 0;
4201 switch (mode) {
4202 case ReadOptions::SEEK_PREVIOUS_SYNC:
4203 findFlags = SampleTable::kFlagBefore;
4204 break;
4205 case ReadOptions::SEEK_NEXT_SYNC:
4206 findFlags = SampleTable::kFlagAfter;
4207 break;
4208 case ReadOptions::SEEK_CLOSEST_SYNC:
4209 case ReadOptions::SEEK_CLOSEST:
4210 findFlags = SampleTable::kFlagClosest;
4211 break;
4212 default:
4213 CHECK(!"Should not be here.");
4214 break;
4215 }
4216
4217 uint32_t sampleIndex;
4218 status_t err = mSampleTable->findSampleAtTime(
4219 seekTimeUs, 1000000, mTimescale,
4220 &sampleIndex, findFlags);
4221
4222 if (mode == ReadOptions::SEEK_CLOSEST) {
4223 // We found the closest sample already, now we want the sync
4224 // sample preceding it (or the sample itself of course), even
4225 // if the subsequent sync sample is closer.
4226 findFlags = SampleTable::kFlagBefore;
4227 }
4228
4229 uint32_t syncSampleIndex;
4230 if (err == OK) {
4231 err = mSampleTable->findSyncSampleNear(
4232 sampleIndex, &syncSampleIndex, findFlags);
4233 }
4234
4235 uint32_t sampleTime;
4236 if (err == OK) {
4237 err = mSampleTable->getMetaDataForSample(
4238 sampleIndex, NULL, NULL, &sampleTime);
4239 }
4240
4241 if (err != OK) {
4242 if (err == ERROR_OUT_OF_RANGE) {
4243 // An attempt to seek past the end of the stream would
4244 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4245 // this all the way to the MediaPlayer would cause abnormal
4246 // termination. Legacy behaviour appears to be to behave as if
4247 // we had seeked to the end of stream, ending normally.
4248 err = ERROR_END_OF_STREAM;
4249 }
4250 ALOGV("end of stream");
4251 return err;
4252 }
4253
4254 if (mode == ReadOptions::SEEK_CLOSEST) {
4255 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4256 }
4257
4258 #if 0
4259 uint32_t syncSampleTime;
4260 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4261 syncSampleIndex, NULL, NULL, &syncSampleTime));
4262
4263 ALOGI("seek to time %lld us => sample at time %lld us, "
4264 "sync sample at time %lld us",
4265 seekTimeUs,
4266 sampleTime * 1000000ll / mTimescale,
4267 syncSampleTime * 1000000ll / mTimescale);
4268 #endif
4269
4270 mCurrentSampleIndex = syncSampleIndex;
4271 if (mBuffer != NULL) {
4272 mBuffer->release();
4273 mBuffer = NULL;
4274 }
4275
4276 // fall through
4277 }
4278
4279 off64_t offset;
4280 size_t size;
4281 uint32_t cts, stts;
4282 bool isSyncSample;
4283 bool newBuffer = false;
4284 if (mBuffer == NULL) {
4285 newBuffer = true;
4286
4287 status_t err =
4288 mSampleTable->getMetaDataForSample(
4289 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4290
4291 if (err != OK) {
4292 return err;
4293 }
4294
4295 err = mGroup->acquire_buffer(&mBuffer);
4296
4297 if (err != OK) {
4298 CHECK(mBuffer == NULL);
4299 return err;
4300 }
4301 if (size > mBuffer->size()) {
4302 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4303 return ERROR_BUFFER_TOO_SMALL;
4304 }
4305 }
4306
4307 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4308 if (newBuffer) {
4309 ssize_t num_bytes_read =
4310 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4311
4312 if (num_bytes_read < (ssize_t)size) {
4313 mBuffer->release();
4314 mBuffer = NULL;
4315
4316 return ERROR_IO;
4317 }
4318
4319 CHECK(mBuffer != NULL);
4320 mBuffer->set_range(0, size);
4321 mBuffer->meta_data()->clear();
4322 mBuffer->meta_data()->setInt64(
4323 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4324 mBuffer->meta_data()->setInt64(
4325 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4326
4327 if (targetSampleTimeUs >= 0) {
4328 mBuffer->meta_data()->setInt64(
4329 kKeyTargetTime, targetSampleTimeUs);
4330 }
4331
4332 if (isSyncSample) {
4333 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4334 }
4335
4336 ++mCurrentSampleIndex;
4337 }
4338
4339 if (!mIsAVC && !mIsHEVC) {
4340 *out = mBuffer;
4341 mBuffer = NULL;
4342
4343 return OK;
4344 }
4345
4346 // Each NAL unit is split up into its constituent fragments and
4347 // each one of them returned in its own buffer.
4348
4349 CHECK(mBuffer->range_length() >= mNALLengthSize);
4350
4351 const uint8_t *src =
4352 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4353
4354 size_t nal_size = parseNALSize(src);
4355 if (mNALLengthSize > SIZE_MAX - nal_size) {
4356 ALOGE("b/24441553, b/24445122");
4357 }
4358 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4359 ALOGE("incomplete NAL unit.");
4360
4361 mBuffer->release();
4362 mBuffer = NULL;
4363
4364 return ERROR_MALFORMED;
4365 }
4366
4367 MediaBuffer *clone = mBuffer->clone();
4368 CHECK(clone != NULL);
4369 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4370
4371 CHECK(mBuffer != NULL);
4372 mBuffer->set_range(
4373 mBuffer->range_offset() + mNALLengthSize + nal_size,
4374 mBuffer->range_length() - mNALLengthSize - nal_size);
4375
4376 if (mBuffer->range_length() == 0) {
4377 mBuffer->release();
4378 mBuffer = NULL;
4379 }
4380
4381 *out = clone;
4382
4383 return OK;
4384 } else {
4385 // Whole NAL units are returned but each fragment is prefixed by
4386 // the start code (0x00 00 00 01).
4387 ssize_t num_bytes_read = 0;
4388 int32_t drm = 0;
4389 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4390 if (usesDRM) {
4391 num_bytes_read =
4392 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4393 } else {
4394 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4395 }
4396
4397 if (num_bytes_read < (ssize_t)size) {
4398 mBuffer->release();
4399 mBuffer = NULL;
4400
4401 return ERROR_IO;
4402 }
4403
4404 if (usesDRM) {
4405 CHECK(mBuffer != NULL);
4406 mBuffer->set_range(0, size);
4407
4408 } else {
4409 uint8_t *dstData = (uint8_t *)mBuffer->data();
4410 size_t srcOffset = 0;
4411 size_t dstOffset = 0;
4412
4413 while (srcOffset < size) {
4414 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4415 size_t nalLength = 0;
4416 if (!isMalFormed) {
4417 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4418 srcOffset += mNALLengthSize;
4419 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4420 }
4421
4422 if (isMalFormed) {
4423 ALOGE("Video is malformed");
4424 mBuffer->release();
4425 mBuffer = NULL;
4426 return ERROR_MALFORMED;
4427 }
4428
4429 if (nalLength == 0) {
4430 continue;
4431 }
4432
4433 if (dstOffset > SIZE_MAX - 4 ||
4434 dstOffset + 4 > SIZE_MAX - nalLength ||
4435 dstOffset + 4 + nalLength > mBuffer->size()) {
4436 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4437 android_errorWriteLog(0x534e4554, "27208621");
4438 mBuffer->release();
4439 mBuffer = NULL;
4440 return ERROR_MALFORMED;
4441 }
4442
4443 dstData[dstOffset++] = 0;
4444 dstData[dstOffset++] = 0;
4445 dstData[dstOffset++] = 0;
4446 dstData[dstOffset++] = 1;
4447 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4448 srcOffset += nalLength;
4449 dstOffset += nalLength;
4450 }
4451 CHECK_EQ(srcOffset, size);
4452 CHECK(mBuffer != NULL);
4453 mBuffer->set_range(0, dstOffset);
4454 }
4455
4456 mBuffer->meta_data()->clear();
4457 mBuffer->meta_data()->setInt64(
4458 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4459 mBuffer->meta_data()->setInt64(
4460 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4461
4462 if (targetSampleTimeUs >= 0) {
4463 mBuffer->meta_data()->setInt64(
4464 kKeyTargetTime, targetSampleTimeUs);
4465 }
4466
4467 if (isSyncSample) {
4468 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4469 }
4470
4471 ++mCurrentSampleIndex;
4472
4473 *out = mBuffer;
4474 mBuffer = NULL;
4475
4476 return OK;
4477 }
4478 }
4479
fragmentedRead(MediaBuffer ** out,const ReadOptions * options)4480 status_t MPEG4Source::fragmentedRead(
4481 MediaBuffer **out, const ReadOptions *options) {
4482
4483 ALOGV("MPEG4Source::fragmentedRead");
4484
4485 CHECK(mStarted);
4486
4487 *out = NULL;
4488
4489 int64_t targetSampleTimeUs = -1;
4490
4491 int64_t seekTimeUs;
4492 ReadOptions::SeekMode mode;
4493 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4494
4495 int numSidxEntries = mSegments.size();
4496 if (numSidxEntries != 0) {
4497 int64_t totalTime = 0;
4498 off64_t totalOffset = mFirstMoofOffset;
4499 for (int i = 0; i < numSidxEntries; i++) {
4500 const SidxEntry *se = &mSegments[i];
4501 if (totalTime + se->mDurationUs > seekTimeUs) {
4502 // The requested time is somewhere in this segment
4503 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4504 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4505 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4506 // requested next sync, or closest sync and it was closer to the end of
4507 // this segment
4508 totalTime += se->mDurationUs;
4509 totalOffset += se->mSize;
4510 }
4511 break;
4512 }
4513 totalTime += se->mDurationUs;
4514 totalOffset += se->mSize;
4515 }
4516 mCurrentMoofOffset = totalOffset;
4517 mCurrentSamples.clear();
4518 mCurrentSampleIndex = 0;
4519 parseChunk(&totalOffset);
4520 mCurrentTime = totalTime * mTimescale / 1000000ll;
4521 } else {
4522 // without sidx boxes, we can only seek to 0
4523 mCurrentMoofOffset = mFirstMoofOffset;
4524 mCurrentSamples.clear();
4525 mCurrentSampleIndex = 0;
4526 off64_t tmp = mCurrentMoofOffset;
4527 parseChunk(&tmp);
4528 mCurrentTime = 0;
4529 }
4530
4531 if (mBuffer != NULL) {
4532 mBuffer->release();
4533 mBuffer = NULL;
4534 }
4535
4536 // fall through
4537 }
4538
4539 off64_t offset = 0;
4540 size_t size = 0;
4541 uint32_t cts = 0;
4542 bool isSyncSample = false;
4543 bool newBuffer = false;
4544 if (mBuffer == NULL) {
4545 newBuffer = true;
4546
4547 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4548 // move to next fragment if there is one
4549 if (mNextMoofOffset <= mCurrentMoofOffset) {
4550 return ERROR_END_OF_STREAM;
4551 }
4552 off64_t nextMoof = mNextMoofOffset;
4553 mCurrentMoofOffset = nextMoof;
4554 mCurrentSamples.clear();
4555 mCurrentSampleIndex = 0;
4556 parseChunk(&nextMoof);
4557 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4558 return ERROR_END_OF_STREAM;
4559 }
4560 }
4561
4562 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4563 offset = smpl->offset;
4564 size = smpl->size;
4565 cts = mCurrentTime + smpl->compositionOffset;
4566 mCurrentTime += smpl->duration;
4567 isSyncSample = (mCurrentSampleIndex == 0); // XXX
4568
4569 status_t err = mGroup->acquire_buffer(&mBuffer);
4570
4571 if (err != OK) {
4572 CHECK(mBuffer == NULL);
4573 ALOGV("acquire_buffer returned %d", err);
4574 return err;
4575 }
4576 if (size > mBuffer->size()) {
4577 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4578 return ERROR_BUFFER_TOO_SMALL;
4579 }
4580 }
4581
4582 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4583 const sp<MetaData> bufmeta = mBuffer->meta_data();
4584 bufmeta->clear();
4585 if (smpl->encryptedsizes.size()) {
4586 // store clear/encrypted lengths in metadata
4587 bufmeta->setData(kKeyPlainSizes, 0,
4588 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4589 bufmeta->setData(kKeyEncryptedSizes, 0,
4590 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4591 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4592 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4593 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4594 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4595 }
4596
4597 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4598 if (newBuffer) {
4599 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
4600 mBuffer->release();
4601 mBuffer = NULL;
4602
4603 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
4604 return ERROR_MALFORMED;
4605 }
4606
4607 ssize_t num_bytes_read =
4608 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4609
4610 if (num_bytes_read < (ssize_t)size) {
4611 mBuffer->release();
4612 mBuffer = NULL;
4613
4614 ALOGE("i/o error");
4615 return ERROR_IO;
4616 }
4617
4618 CHECK(mBuffer != NULL);
4619 mBuffer->set_range(0, size);
4620 mBuffer->meta_data()->setInt64(
4621 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4622 mBuffer->meta_data()->setInt64(
4623 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4624
4625 if (targetSampleTimeUs >= 0) {
4626 mBuffer->meta_data()->setInt64(
4627 kKeyTargetTime, targetSampleTimeUs);
4628 }
4629
4630 if (isSyncSample) {
4631 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4632 }
4633
4634 ++mCurrentSampleIndex;
4635 }
4636
4637 if (!mIsAVC && !mIsHEVC) {
4638 *out = mBuffer;
4639 mBuffer = NULL;
4640
4641 return OK;
4642 }
4643
4644 // Each NAL unit is split up into its constituent fragments and
4645 // each one of them returned in its own buffer.
4646
4647 CHECK(mBuffer->range_length() >= mNALLengthSize);
4648
4649 const uint8_t *src =
4650 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4651
4652 size_t nal_size = parseNALSize(src);
4653 if (mNALLengthSize > SIZE_MAX - nal_size) {
4654 ALOGE("b/24441553, b/24445122");
4655 }
4656
4657 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4658 ALOGE("incomplete NAL unit.");
4659
4660 mBuffer->release();
4661 mBuffer = NULL;
4662
4663 return ERROR_MALFORMED;
4664 }
4665
4666 MediaBuffer *clone = mBuffer->clone();
4667 CHECK(clone != NULL);
4668 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4669
4670 CHECK(mBuffer != NULL);
4671 mBuffer->set_range(
4672 mBuffer->range_offset() + mNALLengthSize + nal_size,
4673 mBuffer->range_length() - mNALLengthSize - nal_size);
4674
4675 if (mBuffer->range_length() == 0) {
4676 mBuffer->release();
4677 mBuffer = NULL;
4678 }
4679
4680 *out = clone;
4681
4682 return OK;
4683 } else {
4684 ALOGV("whole NAL");
4685 // Whole NAL units are returned but each fragment is prefixed by
4686 // the start code (0x00 00 00 01).
4687 ssize_t num_bytes_read = 0;
4688 int32_t drm = 0;
4689 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4690 void *data = NULL;
4691 bool isMalFormed = false;
4692 if (usesDRM) {
4693 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
4694 isMalFormed = true;
4695 } else {
4696 data = mBuffer->data();
4697 }
4698 } else {
4699 int32_t max_size;
4700 if (mFormat == NULL
4701 || !mFormat->findInt32(kKeyMaxInputSize, &max_size)
4702 || !isInRange((size_t)0u, (size_t)max_size, size)) {
4703 isMalFormed = true;
4704 } else {
4705 data = mSrcBuffer;
4706 }
4707 }
4708
4709 if (isMalFormed || data == NULL) {
4710 ALOGE("isMalFormed size %zu", size);
4711 if (mBuffer != NULL) {
4712 mBuffer->release();
4713 mBuffer = NULL;
4714 }
4715 return ERROR_MALFORMED;
4716 }
4717 num_bytes_read = mDataSource->readAt(offset, data, size);
4718
4719 if (num_bytes_read < (ssize_t)size) {
4720 mBuffer->release();
4721 mBuffer = NULL;
4722
4723 ALOGE("i/o error");
4724 return ERROR_IO;
4725 }
4726
4727 if (usesDRM) {
4728 CHECK(mBuffer != NULL);
4729 mBuffer->set_range(0, size);
4730
4731 } else {
4732 uint8_t *dstData = (uint8_t *)mBuffer->data();
4733 size_t srcOffset = 0;
4734 size_t dstOffset = 0;
4735
4736 while (srcOffset < size) {
4737 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4738 size_t nalLength = 0;
4739 if (!isMalFormed) {
4740 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4741 srcOffset += mNALLengthSize;
4742 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
4743 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
4744 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
4745 }
4746
4747 if (isMalFormed) {
4748 ALOGE("Video is malformed; nalLength %zu", nalLength);
4749 mBuffer->release();
4750 mBuffer = NULL;
4751 return ERROR_MALFORMED;
4752 }
4753
4754 if (nalLength == 0) {
4755 continue;
4756 }
4757
4758 if (dstOffset > SIZE_MAX - 4 ||
4759 dstOffset + 4 > SIZE_MAX - nalLength ||
4760 dstOffset + 4 + nalLength > mBuffer->size()) {
4761 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
4762 android_errorWriteLog(0x534e4554, "26365349");
4763 mBuffer->release();
4764 mBuffer = NULL;
4765 return ERROR_MALFORMED;
4766 }
4767
4768 dstData[dstOffset++] = 0;
4769 dstData[dstOffset++] = 0;
4770 dstData[dstOffset++] = 0;
4771 dstData[dstOffset++] = 1;
4772 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4773 srcOffset += nalLength;
4774 dstOffset += nalLength;
4775 }
4776 CHECK_EQ(srcOffset, size);
4777 CHECK(mBuffer != NULL);
4778 mBuffer->set_range(0, dstOffset);
4779 }
4780
4781 mBuffer->meta_data()->setInt64(
4782 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4783 mBuffer->meta_data()->setInt64(
4784 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4785
4786 if (targetSampleTimeUs >= 0) {
4787 mBuffer->meta_data()->setInt64(
4788 kKeyTargetTime, targetSampleTimeUs);
4789 }
4790
4791 if (isSyncSample) {
4792 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4793 }
4794
4795 ++mCurrentSampleIndex;
4796
4797 *out = mBuffer;
4798 mBuffer = NULL;
4799
4800 return OK;
4801 }
4802 }
4803
findTrackByMimePrefix(const char * mimePrefix)4804 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4805 const char *mimePrefix) {
4806 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4807 const char *mime;
4808 if (track->meta != NULL
4809 && track->meta->findCString(kKeyMIMEType, &mime)
4810 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4811 return track;
4812 }
4813 }
4814
4815 return NULL;
4816 }
4817
LegacySniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence)4818 static bool LegacySniffMPEG4(
4819 const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4820 uint8_t header[8];
4821
4822 ssize_t n = source->readAt(4, header, sizeof(header));
4823 if (n < (ssize_t)sizeof(header)) {
4824 return false;
4825 }
4826
4827 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4828 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4829 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4830 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4831 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4832 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4833 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4834 *confidence = 0.4;
4835
4836 return true;
4837 }
4838
4839 return false;
4840 }
4841
isCompatibleBrand(uint32_t fourcc)4842 static bool isCompatibleBrand(uint32_t fourcc) {
4843 static const uint32_t kCompatibleBrands[] = {
4844 FOURCC('i', 's', 'o', 'm'),
4845 FOURCC('i', 's', 'o', '2'),
4846 FOURCC('a', 'v', 'c', '1'),
4847 FOURCC('h', 'v', 'c', '1'),
4848 FOURCC('h', 'e', 'v', '1'),
4849 FOURCC('3', 'g', 'p', '4'),
4850 FOURCC('m', 'p', '4', '1'),
4851 FOURCC('m', 'p', '4', '2'),
4852
4853 // Won't promise that the following file types can be played.
4854 // Just give these file types a chance.
4855 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime
4856 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP
4857
4858 FOURCC('3', 'g', '2', 'a'), // 3GPP2
4859 FOURCC('3', 'g', '2', 'b'),
4860 };
4861
4862 for (size_t i = 0;
4863 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4864 ++i) {
4865 if (kCompatibleBrands[i] == fourcc) {
4866 return true;
4867 }
4868 }
4869
4870 return false;
4871 }
4872
4873 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
4874 // compatible brand is present.
4875 // Also try to identify where this file's metadata ends
4876 // (end of the 'moov' atom) and report it to the caller as part of
4877 // the metadata.
BetterSniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4878 static bool BetterSniffMPEG4(
4879 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4880 sp<AMessage> *meta) {
4881 // We scan up to 128 bytes to identify this file as an MP4.
4882 static const off64_t kMaxScanOffset = 128ll;
4883
4884 off64_t offset = 0ll;
4885 bool foundGoodFileType = false;
4886 off64_t moovAtomEndOffset = -1ll;
4887 bool done = false;
4888
4889 while (!done && offset < kMaxScanOffset) {
4890 uint32_t hdr[2];
4891 if (source->readAt(offset, hdr, 8) < 8) {
4892 return false;
4893 }
4894
4895 uint64_t chunkSize = ntohl(hdr[0]);
4896 uint32_t chunkType = ntohl(hdr[1]);
4897 off64_t chunkDataOffset = offset + 8;
4898
4899 if (chunkSize == 1) {
4900 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4901 return false;
4902 }
4903
4904 chunkSize = ntoh64(chunkSize);
4905 chunkDataOffset += 8;
4906
4907 if (chunkSize < 16) {
4908 // The smallest valid chunk is 16 bytes long in this case.
4909 return false;
4910 }
4911
4912 } else if (chunkSize < 8) {
4913 // The smallest valid chunk is 8 bytes long.
4914 return false;
4915 }
4916
4917 // (data_offset - offset) is either 8 or 16
4918 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
4919 if (chunkDataSize < 0) {
4920 ALOGE("b/23540914");
4921 return ERROR_MALFORMED;
4922 }
4923
4924 char chunkstring[5];
4925 MakeFourCCString(chunkType, chunkstring);
4926 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
4927 switch (chunkType) {
4928 case FOURCC('f', 't', 'y', 'p'):
4929 {
4930 if (chunkDataSize < 8) {
4931 return false;
4932 }
4933
4934 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4935 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4936 if (i == 1) {
4937 // Skip this index, it refers to the minorVersion,
4938 // not a brand.
4939 continue;
4940 }
4941
4942 uint32_t brand;
4943 if (source->readAt(
4944 chunkDataOffset + 4 * i, &brand, 4) < 4) {
4945 return false;
4946 }
4947
4948 brand = ntohl(brand);
4949
4950 if (isCompatibleBrand(brand)) {
4951 foundGoodFileType = true;
4952 break;
4953 }
4954 }
4955
4956 if (!foundGoodFileType) {
4957 return false;
4958 }
4959
4960 break;
4961 }
4962
4963 case FOURCC('m', 'o', 'o', 'v'):
4964 {
4965 moovAtomEndOffset = offset + chunkSize;
4966
4967 done = true;
4968 break;
4969 }
4970
4971 default:
4972 break;
4973 }
4974
4975 offset += chunkSize;
4976 }
4977
4978 if (!foundGoodFileType) {
4979 return false;
4980 }
4981
4982 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4983 *confidence = 0.4f;
4984
4985 if (moovAtomEndOffset >= 0) {
4986 *meta = new AMessage;
4987 (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4988
4989 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset);
4990 }
4991
4992 return true;
4993 }
4994
SniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4995 bool SniffMPEG4(
4996 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4997 sp<AMessage> *meta) {
4998 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4999 return true;
5000 }
5001
5002 if (LegacySniffMPEG4(source, mimeType, confidence)) {
5003 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5004 return true;
5005 }
5006
5007 return false;
5008 }
5009
5010 } // namespace android
5011