1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "ESQueue"
19 #include <media/stagefright/foundation/ADebug.h>
20
21 #include "ESQueue.h"
22
23 #include <media/stagefright/foundation/hexdump.h>
24 #include <media/stagefright/foundation/ABitReader.h>
25 #include <media/stagefright/foundation/ABuffer.h>
26 #include <media/stagefright/foundation/AMessage.h>
27 #include <media/stagefright/foundation/ByteUtils.h>
28 #include <media/stagefright/foundation/avc_utils.h>
29 #include <media/stagefright/MediaErrors.h>
30 #include <media/stagefright/MediaDefs.h>
31 #include <media/stagefright/MetaData.h>
32 #include <media/stagefright/MetaDataUtils.h>
33 #include <media/cas/DescramblerAPI.h>
34 #include <media/hardware/CryptoAPI.h>
35
36 #include <inttypes.h>
37 #include <netinet/in.h>
38
39 #ifndef __ANDROID_APEX__
40 #include "HlsSampleDecryptor.h"
41 #endif
42
43 namespace android {
44
ElementaryStreamQueue(Mode mode,uint32_t flags)45 ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
46 : mMode(mode),
47 mFlags(flags),
48 mEOSReached(false),
49 mCASystemId(0),
50 mAUIndex(0) {
51
52 ALOGV("ElementaryStreamQueue(%p) mode %x flags %x isScrambled %d isSampleEncrypted %d",
53 this, mode, flags, isScrambled(), isSampleEncrypted());
54
55 // Create the decryptor anyway since we don't know the use-case unless key is provided
56 // Won't decrypt if key info not available (e.g., scanner/extractor just parsing ts files)
57 mSampleDecryptor = isSampleEncrypted() ?
58 #ifdef __ANDROID_APEX__
59 new SampleDecryptor
60 #else
61 new HlsSampleDecryptor
62 #endif
63 : NULL;
64 }
65
getFormat()66 sp<MetaData> ElementaryStreamQueue::getFormat() {
67 return mFormat;
68 }
69
clear(bool clearFormat)70 void ElementaryStreamQueue::clear(bool clearFormat) {
71 if (mBuffer != NULL) {
72 mBuffer->setRange(0, 0);
73 }
74
75 mRangeInfos.clear();
76
77 if (mScrambledBuffer != NULL) {
78 mScrambledBuffer->setRange(0, 0);
79 }
80 mScrambledRangeInfos.clear();
81
82 if (clearFormat) {
83 mFormat.clear();
84 }
85
86 mEOSReached = false;
87 }
88
isScrambled() const89 bool ElementaryStreamQueue::isScrambled() const {
90 return (mFlags & kFlag_ScrambledData) != 0;
91 }
92
setCasInfo(int32_t systemId,const std::vector<uint8_t> & sessionId)93 void ElementaryStreamQueue::setCasInfo(
94 int32_t systemId, const std::vector<uint8_t> &sessionId) {
95 mCASystemId = systemId;
96 mCasSessionId = sessionId;
97 }
98
readVariableBits(ABitReader & bits,int32_t nbits)99 static int32_t readVariableBits(ABitReader &bits, int32_t nbits) {
100 int32_t value = 0;
101 int32_t more_bits = 1;
102
103 while (more_bits) {
104 value += bits.getBits(nbits);
105 more_bits = bits.getBits(1);
106 if (!more_bits)
107 break;
108 value++;
109 value <<= nbits;
110 }
111 return value;
112 }
113
114 // Parse AC3 header assuming the current ptr is start position of syncframe,
115 // update metadata only applicable, and return the payload size
parseAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)116 static unsigned parseAC3SyncFrame(
117 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
118 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
119 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
120
121 static const unsigned frameSizeTable[19][3] = {
122 { 64, 69, 96 },
123 { 80, 87, 120 },
124 { 96, 104, 144 },
125 { 112, 121, 168 },
126 { 128, 139, 192 },
127 { 160, 174, 240 },
128 { 192, 208, 288 },
129 { 224, 243, 336 },
130 { 256, 278, 384 },
131 { 320, 348, 480 },
132 { 384, 417, 576 },
133 { 448, 487, 672 },
134 { 512, 557, 768 },
135 { 640, 696, 960 },
136 { 768, 835, 1152 },
137 { 896, 975, 1344 },
138 { 1024, 1114, 1536 },
139 { 1152, 1253, 1728 },
140 { 1280, 1393, 1920 },
141 };
142
143 ABitReader bits(ptr, size);
144 if (bits.numBitsLeft() < 16) {
145 return 0;
146 }
147 if (bits.getBits(16) != 0x0B77) {
148 return 0;
149 }
150
151 if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
152 ALOGV("Not enough bits left for further parsing");
153 return 0;
154 }
155 bits.skipBits(16); // crc1
156
157 unsigned fscod = bits.getBits(2);
158 if (fscod == 3) {
159 ALOGW("Incorrect fscod in AC3 header");
160 return 0;
161 }
162
163 unsigned frmsizecod = bits.getBits(6);
164 if (frmsizecod > 37) {
165 ALOGW("Incorrect frmsizecod in AC3 header");
166 return 0;
167 }
168
169 unsigned bsid = bits.getBits(5);
170 if (bsid > 8) {
171 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
172 return 0;
173 }
174
175 unsigned bsmod __unused = bits.getBits(3);
176 unsigned acmod = bits.getBits(3);
177 unsigned cmixlev __unused = 0;
178 unsigned surmixlev __unused = 0;
179 unsigned dsurmod __unused = 0;
180
181 if ((acmod & 1) > 0 && acmod != 1) {
182 if (bits.numBitsLeft() < 2) {
183 return 0;
184 }
185 cmixlev = bits.getBits(2);
186 }
187 if ((acmod & 4) > 0) {
188 if (bits.numBitsLeft() < 2) {
189 return 0;
190 }
191 surmixlev = bits.getBits(2);
192 }
193 if (acmod == 2) {
194 if (bits.numBitsLeft() < 2) {
195 return 0;
196 }
197 dsurmod = bits.getBits(2);
198 }
199
200 if (bits.numBitsLeft() < 1) {
201 return 0;
202 }
203 unsigned lfeon = bits.getBits(1);
204
205 unsigned samplingRate = samplingRateTable[fscod];
206 unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
207 if (fscod == 1) {
208 payloadSize += frmsizecod & 1;
209 }
210 payloadSize <<= 1; // convert from 16-bit words to bytes
211
212 unsigned channelCount = channelCountTable[acmod] + lfeon;
213
214 if (metaData != NULL) {
215 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
216 (*metaData)->setInt32(kKeyChannelCount, channelCount);
217 (*metaData)->setInt32(kKeySampleRate, samplingRate);
218 }
219
220 return payloadSize;
221 }
222
223 // Parse EAC3 header assuming the current ptr is start position of syncframe,
224 // update metadata only applicable, and return the payload size
225 // ATSC A/52:2012 E2.3.1
parseEAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)226 static unsigned parseEAC3SyncFrame(
227 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
228 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
229 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
230 static const unsigned samplingRateTable2[] = {24000, 22050, 16000};
231
232 ABitReader bits(ptr, size);
233 if (bits.numBitsLeft() < 16) {
234 ALOGE("Not enough bits left for further parsing");
235 return 0;
236 }
237 if (bits.getBits(16) != 0x0B77) {
238 ALOGE("No valid sync word in EAC3 header");
239 return 0;
240 }
241
242 // we parse up to bsid so there needs to be at least that many bits
243 if (bits.numBitsLeft() < 2 + 3 + 11 + 2 + 2 + 3 + 1 + 5) {
244 ALOGE("Not enough bits left for further parsing");
245 return 0;
246 }
247
248 unsigned strmtyp = bits.getBits(2);
249 if (strmtyp == 3) {
250 ALOGE("Incorrect strmtyp in EAC3 header");
251 return 0;
252 }
253
254 unsigned substreamid = bits.getBits(3);
255 // only the first independent stream is supported
256 if ((strmtyp == 0 || strmtyp == 2) && substreamid != 0)
257 return 0;
258
259 unsigned frmsiz = bits.getBits(11);
260 unsigned fscod = bits.getBits(2);
261
262 unsigned samplingRate = 0;
263 if (fscod == 0x3) {
264 unsigned fscod2 = bits.getBits(2);
265 if (fscod2 == 3) {
266 ALOGW("Incorrect fscod2 in EAC3 header");
267 return 0;
268 }
269 samplingRate = samplingRateTable2[fscod2];
270 } else {
271 samplingRate = samplingRateTable[fscod];
272 unsigned numblkscod __unused = bits.getBits(2);
273 }
274
275 unsigned acmod = bits.getBits(3);
276 unsigned lfeon = bits.getBits(1);
277 unsigned bsid = bits.getBits(5);
278 if (bsid < 11 || bsid > 16) {
279 ALOGW("Incorrect bsid in EAC3 header. Could be AC-3 or some unknown EAC3 format");
280 return 0;
281 }
282
283 // we currently only support the first independant stream
284 if (metaData != NULL && (strmtyp == 0 || strmtyp == 2)) {
285 unsigned channelCount = channelCountTable[acmod] + lfeon;
286 ALOGV("EAC3 channelCount = %d", channelCount);
287 ALOGV("EAC3 samplingRate = %d", samplingRate);
288 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_EAC3);
289 (*metaData)->setInt32(kKeyChannelCount, channelCount);
290 (*metaData)->setInt32(kKeySampleRate, samplingRate);
291 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
292 }
293
294 unsigned payloadSize = frmsiz + 1;
295 payloadSize <<= 1; // convert from 16-bit words to bytes
296
297 return payloadSize;
298 }
299
300 // Parse AC4 header assuming the current ptr is start position of syncframe
301 // and update frameSize and metadata.
parseAC4SyncFrame(const uint8_t * ptr,size_t size,unsigned & frameSize,sp<MetaData> * metaData)302 static status_t parseAC4SyncFrame(
303 const uint8_t *ptr, size_t size, unsigned &frameSize, sp<MetaData> *metaData) {
304 // ETSI TS 103 190-2 V1.1.1 (2015-09), Annex C
305 // The sync_word can be either 0xAC40 or 0xAC41.
306 static const int kSyncWordAC40 = 0xAC40;
307 static const int kSyncWordAC41 = 0xAC41;
308
309 size_t headerSize = 0;
310 ABitReader bits(ptr, size);
311 int32_t syncWord = bits.getBits(16);
312 if ((syncWord != kSyncWordAC40) && (syncWord != kSyncWordAC41)) {
313 ALOGE("Invalid syncword in AC4 header");
314 return ERROR_MALFORMED;
315 }
316 headerSize += 2;
317
318 frameSize = bits.getBits(16);
319 headerSize += 2;
320 if (frameSize == 0xFFFF) {
321 frameSize = bits.getBits(24);
322 headerSize += 3;
323 }
324
325 if (frameSize == 0) {
326 ALOGE("Invalid frame size in AC4 header");
327 return ERROR_MALFORMED;
328 }
329 frameSize += headerSize;
330 // If the sync_word is 0xAC41, a crc_word is also transmitted.
331 if (syncWord == kSyncWordAC41) {
332 frameSize += 2; // crc_word
333 }
334 ALOGV("AC4 frameSize = %u", frameSize);
335
336 // ETSI TS 103 190-2 V1.1.1 6.2.1.1
337 uint32_t bitstreamVersion = bits.getBits(2);
338 if (bitstreamVersion == 3) {
339 bitstreamVersion += readVariableBits(bits, 2);
340 }
341
342 bits.skipBits(10); // Sequence Counter
343
344 uint32_t bWaitFrames = bits.getBits(1);
345 if (bWaitFrames) {
346 uint32_t waitFrames = bits.getBits(3);
347 if (waitFrames > 0) {
348 bits.skipBits(2); // br_code;
349 }
350 }
351
352 // ETSI TS 103 190 V1.1.1 Table 82
353 bool fsIndex = bits.getBits(1);
354 uint32_t samplingRate = fsIndex ? 48000 : 44100;
355
356 if (metaData != NULL) {
357 ALOGV("dequeueAccessUnitAC4 Setting mFormat");
358 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC4);
359 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
360 // [FIXME] AC4 channel count is defined per presentation. Provide a default channel count
361 // as stereo for the entire stream.
362 (*metaData)->setInt32(kKeyChannelCount, 2);
363 (*metaData)->setInt32(kKeySampleRate, samplingRate);
364 }
365 return OK;
366 }
367
IsSeeminglyValidAC4Header(const uint8_t * ptr,size_t size,unsigned & frameSize)368 static status_t IsSeeminglyValidAC4Header(const uint8_t *ptr, size_t size, unsigned &frameSize) {
369 return parseAC4SyncFrame(ptr, size, frameSize, NULL);
370 }
371
IsSeeminglyValidADTSHeader(const uint8_t * ptr,size_t size,size_t * frameLength)372 static bool IsSeeminglyValidADTSHeader(
373 const uint8_t *ptr, size_t size, size_t *frameLength) {
374 if (size < 7) {
375 // Not enough data to verify header.
376 return false;
377 }
378
379 if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
380 return false;
381 }
382
383 unsigned layer = (ptr[1] >> 1) & 3;
384
385 if (layer != 0) {
386 return false;
387 }
388
389 unsigned ID = (ptr[1] >> 3) & 1;
390 unsigned profile_ObjectType = ptr[2] >> 6;
391
392 if (ID == 1 && profile_ObjectType == 3) {
393 // MPEG-2 profile 3 is reserved.
394 return false;
395 }
396
397 size_t frameLengthInHeader =
398 ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
399 if (frameLengthInHeader > size) {
400 return false;
401 }
402
403 *frameLength = frameLengthInHeader;
404 return true;
405 }
406
IsSeeminglyValidMPEGAudioHeader(const uint8_t * ptr,size_t size)407 static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
408 if (size < 3) {
409 // Not enough data to verify header.
410 return false;
411 }
412
413 if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
414 return false;
415 }
416
417 unsigned ID = (ptr[1] >> 3) & 3;
418
419 if (ID == 1) {
420 return false; // reserved
421 }
422
423 unsigned layer = (ptr[1] >> 1) & 3;
424
425 if (layer == 0) {
426 return false; // reserved
427 }
428
429 unsigned bitrateIndex = (ptr[2] >> 4);
430
431 if (bitrateIndex == 0x0f) {
432 return false; // reserved
433 }
434
435 unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
436
437 if (samplingRateIndex == 3) {
438 return false; // reserved
439 }
440
441 return true;
442 }
443
appendData(const void * data,size_t size,int64_t timeUs,int32_t payloadOffset,uint32_t pesScramblingControl)444 status_t ElementaryStreamQueue::appendData(
445 const void *data, size_t size, int64_t timeUs,
446 int32_t payloadOffset, uint32_t pesScramblingControl) {
447
448 if (mEOSReached) {
449 ALOGE("appending data after EOS");
450 return ERROR_MALFORMED;
451 }
452
453 if (!isScrambled() && (mBuffer == NULL || mBuffer->size() == 0)) {
454 switch (mMode) {
455 case H264:
456 case MPEG_VIDEO:
457 {
458 #if 0
459 if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
460 return ERROR_MALFORMED;
461 }
462 #else
463 uint8_t *ptr = (uint8_t *)data;
464
465 ssize_t startOffset = -1;
466 for (size_t i = 0; i + 2 < size; ++i) {
467 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
468 startOffset = i;
469 break;
470 }
471 }
472
473 if (startOffset < 0) {
474 return ERROR_MALFORMED;
475 }
476
477 if (mFormat == NULL && startOffset > 0) {
478 ALOGI("found something resembling an H.264/MPEG syncword "
479 "at offset %zd",
480 startOffset);
481 }
482
483 data = &ptr[startOffset];
484 size -= startOffset;
485 #endif
486 break;
487 }
488
489 case MPEG4_VIDEO:
490 {
491 #if 0
492 if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
493 return ERROR_MALFORMED;
494 }
495 #else
496 uint8_t *ptr = (uint8_t *)data;
497
498 ssize_t startOffset = -1;
499 for (size_t i = 0; i + 2 < size; ++i) {
500 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
501 startOffset = i;
502 break;
503 }
504 }
505
506 if (startOffset < 0) {
507 return ERROR_MALFORMED;
508 }
509
510 if (startOffset > 0) {
511 ALOGI("found something resembling an H.264/MPEG syncword "
512 "at offset %zd",
513 startOffset);
514 }
515
516 data = &ptr[startOffset];
517 size -= startOffset;
518 #endif
519 break;
520 }
521
522 case AAC:
523 {
524 uint8_t *ptr = (uint8_t *)data;
525
526 #if 0
527 if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
528 return ERROR_MALFORMED;
529 }
530 #else
531 ssize_t startOffset = -1;
532 size_t frameLength;
533 for (size_t i = 0; i < size; ++i) {
534 if (IsSeeminglyValidADTSHeader(
535 &ptr[i], size - i, &frameLength)) {
536 startOffset = i;
537 break;
538 }
539 }
540
541 if (startOffset < 0) {
542 return ERROR_MALFORMED;
543 }
544
545 if (startOffset > 0) {
546 ALOGI("found something resembling an AAC syncword at "
547 "offset %zd",
548 startOffset);
549 }
550
551 if (frameLength != size - startOffset) {
552 ALOGV("First ADTS AAC frame length is %zd bytes, "
553 "while the buffer size is %zd bytes.",
554 frameLength, size - startOffset);
555 }
556
557 data = &ptr[startOffset];
558 size -= startOffset;
559 #endif
560 break;
561 }
562
563 case AC3:
564 case EAC3:
565 {
566 uint8_t *ptr = (uint8_t *)data;
567
568 ssize_t startOffset = -1;
569 for (size_t i = 0; i < size; ++i) {
570 unsigned payloadSize = 0;
571 if (mMode == AC3) {
572 payloadSize = parseAC3SyncFrame(&ptr[i], size - i, NULL);
573 } else if (mMode == EAC3) {
574 payloadSize = parseEAC3SyncFrame(&ptr[i], size - i, NULL);
575 }
576 if (payloadSize > 0) {
577 startOffset = i;
578 break;
579 }
580 }
581
582 if (startOffset < 0) {
583 return ERROR_MALFORMED;
584 }
585
586 if (startOffset > 0) {
587 ALOGI("found something resembling an (E)AC3 syncword at "
588 "offset %zd",
589 startOffset);
590 }
591
592 data = &ptr[startOffset];
593 size -= startOffset;
594 break;
595 }
596
597 case AC4:
598 {
599 uint8_t *ptr = (uint8_t *)data;
600 unsigned frameSize = 0;
601 ssize_t startOffset = -1;
602
603 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
604 // (Sync header 4 bytes + AC4 toc 3 bytes)
605 if (size < 7) {
606 return ERROR_MALFORMED;
607 }
608 for (size_t i = 0; i < size; ++i) {
609 if (IsSeeminglyValidAC4Header(&ptr[i], size - i, frameSize) == OK) {
610 startOffset = i;
611 break;
612 }
613 }
614
615 if (startOffset < 0) {
616 return ERROR_MALFORMED;
617 }
618
619 if (startOffset > 0) {
620 ALOGI("found something resembling an AC4 syncword at "
621 "offset %zd",
622 startOffset);
623 }
624 if (frameSize != size - startOffset) {
625 ALOGV("AC4 frame size is %u bytes, while the buffer size is %zd bytes.",
626 frameSize, size - startOffset);
627 }
628
629 data = &ptr[startOffset];
630 size -= startOffset;
631 break;
632 }
633
634 case MPEG_AUDIO:
635 {
636 uint8_t *ptr = (uint8_t *)data;
637
638 ssize_t startOffset = -1;
639 for (size_t i = 0; i < size; ++i) {
640 if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
641 startOffset = i;
642 break;
643 }
644 }
645
646 if (startOffset < 0) {
647 return ERROR_MALFORMED;
648 }
649
650 if (startOffset > 0) {
651 ALOGI("found something resembling an MPEG audio "
652 "syncword at offset %zd",
653 startOffset);
654 }
655
656 data = &ptr[startOffset];
657 size -= startOffset;
658 break;
659 }
660
661 case PCM_AUDIO:
662 case METADATA:
663 {
664 break;
665 }
666
667 default:
668 ALOGE("Unknown mode: %d", mMode);
669 return ERROR_MALFORMED;
670 }
671 }
672
673 size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
674 if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
675 neededSize = (neededSize + 65535) & ~65535;
676
677 ALOGV("resizing buffer to size %zu", neededSize);
678
679 sp<ABuffer> buffer = new ABuffer(neededSize);
680 if (mBuffer != NULL) {
681 memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
682 buffer->setRange(0, mBuffer->size());
683 } else {
684 buffer->setRange(0, 0);
685 }
686
687 mBuffer = buffer;
688 }
689
690 memcpy(mBuffer->data() + mBuffer->size(), data, size);
691 mBuffer->setRange(0, mBuffer->size() + size);
692
693 RangeInfo info;
694 info.mLength = size;
695 info.mTimestampUs = timeUs;
696 info.mPesOffset = payloadOffset;
697 info.mPesScramblingControl = pesScramblingControl;
698 mRangeInfos.push_back(info);
699
700 #if 0
701 if (mMode == AAC) {
702 ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
703 hexdump(data, size);
704 }
705 #endif
706
707 return OK;
708 }
709
appendScrambledData(const void * data,size_t size,size_t leadingClearBytes,int32_t keyId,bool isSync,sp<ABuffer> clearSizes,sp<ABuffer> encSizes)710 void ElementaryStreamQueue::appendScrambledData(
711 const void *data, size_t size,
712 size_t leadingClearBytes,
713 int32_t keyId, bool isSync,
714 sp<ABuffer> clearSizes, sp<ABuffer> encSizes) {
715 if (!isScrambled()) {
716 return;
717 }
718
719 size_t neededSize = (mScrambledBuffer == NULL ? 0 : mScrambledBuffer->size()) + size;
720 if (mScrambledBuffer == NULL || neededSize > mScrambledBuffer->capacity()) {
721 neededSize = (neededSize + 65535) & ~65535;
722
723 ALOGI("resizing scrambled buffer to size %zu", neededSize);
724
725 sp<ABuffer> buffer = new ABuffer(neededSize);
726 if (mScrambledBuffer != NULL) {
727 memcpy(buffer->data(), mScrambledBuffer->data(), mScrambledBuffer->size());
728 buffer->setRange(0, mScrambledBuffer->size());
729 } else {
730 buffer->setRange(0, 0);
731 }
732
733 mScrambledBuffer = buffer;
734 }
735 memcpy(mScrambledBuffer->data() + mScrambledBuffer->size(), data, size);
736 mScrambledBuffer->setRange(0, mScrambledBuffer->size() + size);
737
738 ScrambledRangeInfo scrambledInfo;
739 scrambledInfo.mLength = size;
740 scrambledInfo.mLeadingClearBytes = leadingClearBytes;
741 scrambledInfo.mKeyId = keyId;
742 scrambledInfo.mIsSync = isSync;
743 scrambledInfo.mClearSizes = clearSizes;
744 scrambledInfo.mEncSizes = encSizes;
745
746 ALOGV("[stream %d] appending scrambled range: size=%zu", mMode, size);
747
748 mScrambledRangeInfos.push_back(scrambledInfo);
749 }
750
dequeueScrambledAccessUnit()751 sp<ABuffer> ElementaryStreamQueue::dequeueScrambledAccessUnit() {
752 size_t nextScan = mBuffer->size();
753 int32_t pesOffset = 0, pesScramblingControl = 0;
754 int64_t timeUs = fetchTimestamp(nextScan, &pesOffset, &pesScramblingControl);
755 if (timeUs < 0ll) {
756 ALOGE("Negative timeUs");
757 return NULL;
758 }
759
760 // return scrambled unit
761 int32_t keyId = pesScramblingControl, isSync = 0, scrambledLength = 0;
762 sp<ABuffer> clearSizes, encSizes;
763 size_t leadingClearBytes;
764 while (mScrambledRangeInfos.size() > mRangeInfos.size()) {
765 auto it = mScrambledRangeInfos.begin();
766 ALOGV("[stream %d] fetching scrambled range: size=%zu", mMode, it->mLength);
767
768 if (scrambledLength > 0) {
769 // This shouldn't happen since we always dequeue the entire PES.
770 ALOGW("Discarding srambled length %d", scrambledLength);
771 }
772 scrambledLength = it->mLength;
773
774 // TODO: handle key id change, use first non-zero keyId for now
775 if (keyId == 0) {
776 keyId = it->mKeyId;
777 }
778 clearSizes = it->mClearSizes;
779 encSizes = it->mEncSizes;
780 isSync = it->mIsSync;
781 leadingClearBytes = it->mLeadingClearBytes;
782 mScrambledRangeInfos.erase(it);
783 }
784 if (scrambledLength == 0) {
785 ALOGE("[stream %d] empty scrambled unit!", mMode);
786 return NULL;
787 }
788
789 // Retrieve the leading clear bytes info, and use it to set the clear
790 // range on mBuffer. Note that the leading clear bytes includes the
791 // PES header portion, while mBuffer doesn't.
792 if ((int32_t)leadingClearBytes > pesOffset) {
793 mBuffer->setRange(0, leadingClearBytes - pesOffset);
794 } else {
795 mBuffer->setRange(0, 0);
796 }
797
798 // Try to parse formats, and if unavailable set up a dummy format.
799 // Only support the following modes for scrambled content for now.
800 // (will be expanded later).
801 if (mFormat == NULL) {
802 mFormat = new MetaData;
803 switch (mMode) {
804 case H264:
805 {
806 if (!MakeAVCCodecSpecificData(
807 *mFormat, mBuffer->data(), mBuffer->size())) {
808 ALOGI("Creating dummy AVC format for scrambled content");
809
810 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
811 mFormat->setInt32(kKeyWidth, 1280);
812 mFormat->setInt32(kKeyHeight, 720);
813 }
814 break;
815 }
816 case AAC:
817 {
818 if (!MakeAACCodecSpecificData(
819 *mFormat, mBuffer->data(), mBuffer->size())) {
820 ALOGI("Creating dummy AAC format for scrambled content");
821
822 MakeAACCodecSpecificData(*mFormat,
823 1 /*profile*/, 7 /*sampling_freq_index*/, 1 /*channel_config*/);
824 mFormat->setInt32(kKeyIsADTS, true);
825 }
826
827 break;
828 }
829 case MPEG_VIDEO:
830 {
831 ALOGI("Creating dummy MPEG format for scrambled content");
832
833 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
834 mFormat->setInt32(kKeyWidth, 1280);
835 mFormat->setInt32(kKeyHeight, 720);
836 break;
837 }
838 default:
839 {
840 ALOGE("Unknown mode for scrambled content");
841 return NULL;
842 }
843 }
844
845 // for MediaExtractor.CasInfo
846 mFormat->setInt32(kKeyCASystemID, mCASystemId);
847 mFormat->setData(kKeyCASessionID,
848 0, mCasSessionId.data(), mCasSessionId.size());
849 }
850
851 mBuffer->setRange(0, 0);
852
853 // copy into scrambled access unit
854 sp<ABuffer> scrambledAccessUnit = ABuffer::CreateAsCopy(
855 mScrambledBuffer->data(), scrambledLength);
856
857 scrambledAccessUnit->meta()->setInt64("timeUs", timeUs);
858 if (isSync) {
859 scrambledAccessUnit->meta()->setInt32("isSync", 1);
860 }
861
862 // fill in CryptoInfo fields for AnotherPacketSource::read()
863 // MediaCas doesn't use cryptoMode, but set to non-zero value here.
864 scrambledAccessUnit->meta()->setInt32(
865 "cryptoMode", CryptoPlugin::kMode_AES_CTR);
866 scrambledAccessUnit->meta()->setInt32("cryptoKey", keyId);
867 scrambledAccessUnit->meta()->setBuffer("clearBytes", clearSizes);
868 scrambledAccessUnit->meta()->setBuffer("encBytes", encSizes);
869 scrambledAccessUnit->meta()->setInt32("pesOffset", pesOffset);
870
871 memmove(mScrambledBuffer->data(),
872 mScrambledBuffer->data() + scrambledLength,
873 mScrambledBuffer->size() - scrambledLength);
874
875 mScrambledBuffer->setRange(0, mScrambledBuffer->size() - scrambledLength);
876
877 ALOGV("[stream %d] dequeued scrambled AU: timeUs=%lld, size=%zu",
878 mMode, (long long)timeUs, scrambledAccessUnit->size());
879
880 return scrambledAccessUnit;
881 }
882
dequeueAccessUnit()883 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
884 if (isScrambled()) {
885 return dequeueScrambledAccessUnit();
886 }
887
888 if ((mFlags & kFlag_AlignedData) && mMode == H264) {
889 if (mRangeInfos.empty()) {
890 return NULL;
891 }
892
893 RangeInfo info = *mRangeInfos.begin();
894 mRangeInfos.erase(mRangeInfos.begin());
895
896 sp<ABuffer> accessUnit = new ABuffer(info.mLength);
897 memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
898 accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
899
900 memmove(mBuffer->data(),
901 mBuffer->data() + info.mLength,
902 mBuffer->size() - info.mLength);
903
904 mBuffer->setRange(0, mBuffer->size() - info.mLength);
905
906 if (mFormat == NULL) {
907 mFormat = new MetaData;
908 if (!MakeAVCCodecSpecificData(*mFormat, accessUnit->data(), accessUnit->size())) {
909 mFormat.clear();
910 }
911 }
912
913 return accessUnit;
914 }
915
916 switch (mMode) {
917 case H264:
918 return dequeueAccessUnitH264();
919 case AAC:
920 return dequeueAccessUnitAAC();
921 case AC3:
922 case EAC3:
923 return dequeueAccessUnitEAC3();
924 case AC4:
925 return dequeueAccessUnitAC4();
926 case MPEG_VIDEO:
927 return dequeueAccessUnitMPEGVideo();
928 case MPEG4_VIDEO:
929 return dequeueAccessUnitMPEG4Video();
930 case PCM_AUDIO:
931 return dequeueAccessUnitPCMAudio();
932 case METADATA:
933 return dequeueAccessUnitMetadata();
934 default:
935 if (mMode != MPEG_AUDIO) {
936 ALOGE("Unknown mode");
937 return NULL;
938 }
939 return dequeueAccessUnitMPEGAudio();
940 }
941 }
942
dequeueAccessUnitEAC3()943 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitEAC3() {
944 unsigned syncStartPos = 0; // in bytes
945 unsigned payloadSize = 0;
946 sp<MetaData> format = new MetaData;
947
948 ALOGV("dequeueAccessUnitEAC3[%d]: mBuffer %p(%zu)", mAUIndex,
949 mBuffer->data(), mBuffer->size());
950
951 while (true) {
952 if (syncStartPos + 2 >= mBuffer->size()) {
953 return NULL;
954 }
955
956 uint8_t *ptr = mBuffer->data() + syncStartPos;
957 size_t size = mBuffer->size() - syncStartPos;
958 if (mMode == AC3) {
959 payloadSize = parseAC3SyncFrame(ptr, size, &format);
960 } else if (mMode == EAC3) {
961 payloadSize = parseEAC3SyncFrame(ptr, size, &format);
962 }
963 if (payloadSize > 0) {
964 break;
965 }
966
967 ALOGV("dequeueAccessUnitEAC3[%d]: syncStartPos %u payloadSize %u",
968 mAUIndex, syncStartPos, payloadSize);
969
970 ++syncStartPos;
971 }
972
973 if (mBuffer->size() < syncStartPos + payloadSize) {
974 ALOGV("Not enough buffer size for E/AC3");
975 return NULL;
976 }
977
978 if (mFormat == NULL) {
979 mFormat = format;
980 }
981
982 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
983 if (timeUs < 0ll) {
984 ALOGE("negative timeUs");
985 return NULL;
986 }
987
988 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
989 if (mSampleDecryptor != NULL) {
990 if (mMode == AC3) {
991 mSampleDecryptor->processAC3(mBuffer->data() + syncStartPos, payloadSize);
992 } else if (mMode == EAC3) {
993 ALOGE("EAC3 AU is encrypted and decryption is not supported");
994 return NULL;
995 }
996 }
997 mAUIndex++;
998
999 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
1000 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
1001
1002 accessUnit->meta()->setInt64("timeUs", timeUs);
1003 accessUnit->meta()->setInt32("isSync", 1);
1004
1005 memmove(
1006 mBuffer->data(),
1007 mBuffer->data() + syncStartPos + payloadSize,
1008 mBuffer->size() - syncStartPos - payloadSize);
1009
1010 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1011
1012 return accessUnit;
1013 }
1014
dequeueAccessUnitAC4()1015 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC4() {
1016 unsigned syncStartPos = 0;
1017 unsigned payloadSize = 0;
1018 sp<MetaData> format = new MetaData;
1019 ALOGV("dequeueAccessUnit_AC4[%d]: mBuffer %p(%zu)", mAUIndex, mBuffer->data(), mBuffer->size());
1020
1021 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
1022 // (Sync header 4 bytes + AC4 toc 3 bytes)
1023 if (mBuffer->size() < 7) {
1024 return NULL;
1025 }
1026
1027 while (true) {
1028 if (syncStartPos + 2 >= mBuffer->size()) {
1029 return NULL;
1030 }
1031
1032 status_t status = parseAC4SyncFrame(
1033 mBuffer->data() + syncStartPos,
1034 mBuffer->size() - syncStartPos,
1035 payloadSize,
1036 &format);
1037 if (status == OK) {
1038 break;
1039 }
1040
1041 ALOGV("dequeueAccessUnit_AC4[%d]: syncStartPos %u payloadSize %u",
1042 mAUIndex, syncStartPos, payloadSize);
1043
1044 ++syncStartPos;
1045 }
1046
1047 if (mBuffer->size() < syncStartPos + payloadSize) {
1048 ALOGV("Not enough buffer size for AC4");
1049 return NULL;
1050 }
1051
1052 if (mFormat == NULL) {
1053 mFormat = format;
1054 }
1055
1056 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
1057 if (timeUs < 0ll) {
1058 ALOGE("negative timeUs");
1059 return NULL;
1060 }
1061 mAUIndex++;
1062
1063 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
1064 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
1065
1066 accessUnit->meta()->setInt64("timeUs", timeUs);
1067 accessUnit->meta()->setInt32("isSync", 1);
1068
1069 memmove(
1070 mBuffer->data(),
1071 mBuffer->data() + syncStartPos + payloadSize,
1072 mBuffer->size() - syncStartPos - payloadSize);
1073
1074 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1075 return accessUnit;
1076 }
1077
dequeueAccessUnitPCMAudio()1078 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
1079 if (mBuffer->size() < 4) {
1080 return NULL;
1081 }
1082
1083 ABitReader bits(mBuffer->data(), 4);
1084 if (bits.getBits(8) != 0xa0) {
1085 ALOGE("Unexpected bit values");
1086 return NULL;
1087 }
1088 unsigned numAUs = bits.getBits(8);
1089 bits.skipBits(8);
1090 unsigned quantization_word_length __unused = bits.getBits(2);
1091 unsigned audio_sampling_frequency = bits.getBits(3);
1092 unsigned num_channels = bits.getBits(3);
1093
1094 if (audio_sampling_frequency != 2) {
1095 ALOGE("Wrong sampling freq");
1096 return NULL;
1097 }
1098 if (num_channels != 1u) {
1099 ALOGE("Wrong channel #");
1100 return NULL;
1101 }
1102
1103 if (mFormat == NULL) {
1104 mFormat = new MetaData;
1105 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
1106 mFormat->setInt32(kKeyChannelCount, 2);
1107 mFormat->setInt32(kKeySampleRate, 48000);
1108 mFormat->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
1109 }
1110
1111 static const size_t kFramesPerAU = 80;
1112 size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
1113
1114 size_t payloadSize = numAUs * frameSize * kFramesPerAU;
1115
1116 if (mBuffer->size() < 4 + payloadSize) {
1117 return NULL;
1118 }
1119
1120 sp<ABuffer> accessUnit = new ABuffer(payloadSize);
1121 memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
1122
1123 int64_t timeUs = fetchTimestamp(payloadSize + 4);
1124 if (timeUs < 0LL) {
1125 ALOGE("Negative timeUs");
1126 return NULL;
1127 }
1128 accessUnit->meta()->setInt64("timeUs", timeUs);
1129 accessUnit->meta()->setInt32("isSync", 1);
1130
1131 int16_t *ptr = (int16_t *)accessUnit->data();
1132 for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
1133 ptr[i] = ntohs(ptr[i]);
1134 }
1135
1136 memmove(
1137 mBuffer->data(),
1138 mBuffer->data() + 4 + payloadSize,
1139 mBuffer->size() - 4 - payloadSize);
1140
1141 mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
1142
1143 return accessUnit;
1144 }
1145
dequeueAccessUnitAAC()1146 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
1147 if (mBuffer->size() == 0) {
1148 return NULL;
1149 }
1150
1151 if (mRangeInfos.empty()) {
1152 return NULL;
1153 }
1154
1155 const RangeInfo &info = *mRangeInfos.begin();
1156 if (info.mLength == 0 || mBuffer->size() < info.mLength) {
1157 return NULL;
1158 }
1159
1160 if (info.mTimestampUs < 0LL) {
1161 ALOGE("Negative info.mTimestampUs");
1162 return NULL;
1163 }
1164
1165 ALOGV("dequeueAccessUnit_AAC[%d]: mBuffer %zu info.mLength %zu",
1166 mAUIndex, mBuffer->size(), info.mLength);
1167
1168 struct ADTSPosition {
1169 size_t offset;
1170 size_t headerSize;
1171 size_t length;
1172 };
1173
1174 Vector<ADTSPosition> frames;
1175
1176 // The idea here is consume all AAC frames starting at offsets before
1177 // info.mLength so we can assign a meaningful timestamp without
1178 // having to interpolate.
1179 // The final AAC frame may well extend into the next RangeInfo but
1180 // that's ok.
1181 size_t offset = 0;
1182 while (offset < info.mLength) {
1183 if (offset + 7 > mBuffer->size()) {
1184 return NULL;
1185 }
1186
1187 ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
1188
1189 // adts_fixed_header
1190
1191 if (bits.getBits(12) != 0xfffu) {
1192 ALOGE("Wrong atds_fixed_header");
1193 return NULL;
1194 }
1195 bits.skipBits(3); // ID, layer
1196 bool protection_absent = bits.getBits(1) != 0;
1197
1198 if (mFormat == NULL) {
1199 mFormat = new MetaData;
1200 if (!MakeAACCodecSpecificData(
1201 *mFormat, mBuffer->data() + offset, mBuffer->size() - offset)) {
1202 return NULL;
1203 }
1204
1205 int32_t sampleRate;
1206 int32_t numChannels;
1207 if (!mFormat->findInt32(kKeySampleRate, &sampleRate)) {
1208 ALOGE("SampleRate not found");
1209 return NULL;
1210 }
1211 if (!mFormat->findInt32(kKeyChannelCount, &numChannels)) {
1212 ALOGE("ChannelCount not found");
1213 return NULL;
1214 }
1215
1216 ALOGI("found AAC codec config (%d Hz, %d channels)",
1217 sampleRate, numChannels);
1218 }
1219
1220 // profile_ObjectType, sampling_frequency_index, private_bits,
1221 // channel_configuration, original_copy, home
1222 bits.skipBits(12);
1223
1224 // adts_variable_header
1225
1226 // copyright_identification_bit, copyright_identification_start
1227 bits.skipBits(2);
1228
1229 unsigned aac_frame_length = bits.getBits(13);
1230 if (aac_frame_length == 0){
1231 ALOGE("b/62673179, Invalid AAC frame length!");
1232 android_errorWriteLog(0x534e4554, "62673179");
1233 return NULL;
1234 }
1235
1236 bits.skipBits(11); // adts_buffer_fullness
1237
1238 unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
1239
1240 if (number_of_raw_data_blocks_in_frame != 0) {
1241 // To be implemented.
1242 ALOGE("Should not reach here.");
1243 return NULL;
1244 }
1245
1246 if (offset + aac_frame_length > mBuffer->size()) {
1247 return NULL;
1248 }
1249
1250 size_t headerSize = protection_absent ? 7 : 9;
1251
1252 // tracking the frame positions first then decrypt only if an accessUnit to be generated
1253 if (mSampleDecryptor != NULL) {
1254 ADTSPosition frame = {
1255 .offset = offset,
1256 .headerSize = headerSize,
1257 .length = aac_frame_length
1258 };
1259
1260 frames.push(frame);
1261 }
1262
1263 offset += aac_frame_length;
1264 }
1265
1266 // Decrypting only if the loop didn't exit early and an accessUnit is about to be generated
1267 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
1268 if (mSampleDecryptor != NULL) {
1269 for (size_t frameId = 0; frameId < frames.size(); frameId++) {
1270 const ADTSPosition &frame = frames.itemAt(frameId);
1271
1272 mSampleDecryptor->processAAC(frame.headerSize,
1273 mBuffer->data() + frame.offset, frame.length);
1274 // ALOGV("dequeueAccessUnitAAC[%zu]: while offset %zu headerSize %zu frame_len %zu",
1275 // frameId, frame.offset, frame.headerSize, frame.length);
1276 }
1277 }
1278 mAUIndex++;
1279
1280 int64_t timeUs = fetchTimestamp(offset);
1281
1282 sp<ABuffer> accessUnit = new ABuffer(offset);
1283 memcpy(accessUnit->data(), mBuffer->data(), offset);
1284
1285 memmove(mBuffer->data(), mBuffer->data() + offset,
1286 mBuffer->size() - offset);
1287 mBuffer->setRange(0, mBuffer->size() - offset);
1288
1289 accessUnit->meta()->setInt64("timeUs", timeUs);
1290 accessUnit->meta()->setInt32("isSync", 1);
1291
1292 return accessUnit;
1293 }
1294
fetchTimestamp(size_t size,int32_t * pesOffset,int32_t * pesScramblingControl)1295 int64_t ElementaryStreamQueue::fetchTimestamp(
1296 size_t size, int32_t *pesOffset, int32_t *pesScramblingControl) {
1297 int64_t timeUs = -1;
1298 bool first = true;
1299
1300 while (size > 0) {
1301 if (mRangeInfos.empty()) {
1302 return timeUs;
1303 }
1304
1305 RangeInfo *info = &*mRangeInfos.begin();
1306
1307 if (first) {
1308 timeUs = info->mTimestampUs;
1309 if (pesOffset != NULL) {
1310 *pesOffset = info->mPesOffset;
1311 }
1312 if (pesScramblingControl != NULL) {
1313 *pesScramblingControl = info->mPesScramblingControl;
1314 }
1315 first = false;
1316 }
1317
1318 if (info->mLength > size) {
1319 info->mLength -= size;
1320 size = 0;
1321 } else {
1322 size -= info->mLength;
1323
1324 mRangeInfos.erase(mRangeInfos.begin());
1325 info = NULL;
1326 }
1327
1328 }
1329
1330 if (timeUs == 0LL) {
1331 ALOGV("Returning 0 timestamp");
1332 }
1333
1334 return timeUs;
1335 }
1336
dequeueAccessUnitH264()1337 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
1338 const uint8_t *data = mBuffer->data();
1339
1340 size_t size = mBuffer->size();
1341 Vector<NALPosition> nals;
1342
1343 size_t totalSize = 0;
1344 size_t seiCount = 0;
1345
1346 status_t err;
1347 const uint8_t *nalStart;
1348 size_t nalSize;
1349 bool foundSlice = false;
1350 bool foundIDR = false;
1351
1352 ALOGV("dequeueAccessUnit_H264[%d] %p/%zu", mAUIndex, data, size);
1353
1354 while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
1355 if (nalSize == 0) continue;
1356
1357 unsigned nalType = nalStart[0] & 0x1f;
1358 bool flush = false;
1359
1360 if (nalType == 1 || nalType == 5) {
1361 if (nalType == 5) {
1362 foundIDR = true;
1363 }
1364 if (foundSlice) {
1365 //TODO: Shouldn't this have been called with nalSize-1?
1366 ABitReader br(nalStart + 1, nalSize);
1367 unsigned first_mb_in_slice = parseUE(&br);
1368
1369 if (first_mb_in_slice == 0) {
1370 // This slice starts a new frame.
1371
1372 flush = true;
1373 }
1374 }
1375
1376 foundSlice = true;
1377 } else if ((nalType == 9 || nalType == 7) && foundSlice) {
1378 // Access unit delimiter and SPS will be associated with the
1379 // next frame.
1380
1381 flush = true;
1382 } else if (nalType == 6 && nalSize > 0) {
1383 // found non-zero sized SEI
1384 ++seiCount;
1385 }
1386
1387 if (flush) {
1388 // The access unit will contain all nal units up to, but excluding
1389 // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
1390
1391 size_t auSize = 4 * nals.size() + totalSize;
1392 sp<ABuffer> accessUnit = new ABuffer(auSize);
1393 sp<ABuffer> sei;
1394
1395 if (seiCount > 0) {
1396 sei = new ABuffer(seiCount * sizeof(NALPosition));
1397 accessUnit->meta()->setBuffer("sei", sei);
1398 }
1399
1400 #if !LOG_NDEBUG
1401 AString out;
1402 #endif
1403
1404 size_t dstOffset = 0;
1405 size_t seiIndex = 0;
1406 size_t shrunkBytes = 0;
1407 for (size_t i = 0; i < nals.size(); ++i) {
1408 const NALPosition &pos = nals.itemAt(i);
1409
1410 unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
1411
1412 if (nalType == 6 && pos.nalSize > 0) {
1413 if (seiIndex >= sei->size() / sizeof(NALPosition)) {
1414 ALOGE("Wrong seiIndex");
1415 return NULL;
1416 }
1417 NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
1418 seiPos.nalOffset = dstOffset + 4;
1419 seiPos.nalSize = pos.nalSize;
1420 }
1421
1422 #if !LOG_NDEBUG
1423 char tmp[128];
1424 sprintf(tmp, "0x%02x", nalType);
1425 if (i > 0) {
1426 out.append(", ");
1427 }
1428 out.append(tmp);
1429 #endif
1430
1431 memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
1432
1433 if (mSampleDecryptor != NULL && (nalType == 1 || nalType == 5)) {
1434 uint8_t *nalData = mBuffer->data() + pos.nalOffset;
1435 size_t newSize = mSampleDecryptor->processNal(nalData, pos.nalSize);
1436 // Note: the data can shrink due to unescaping
1437 memcpy(accessUnit->data() + dstOffset + 4,
1438 nalData,
1439 newSize);
1440 dstOffset += newSize + 4;
1441
1442 size_t thisShrunkBytes = pos.nalSize - newSize;
1443 //ALOGV("dequeueAccessUnitH264[%d]: nalType: %d -> %zu (%zu)",
1444 // nalType, (int)pos.nalSize, newSize, thisShrunkBytes);
1445
1446 shrunkBytes += thisShrunkBytes;
1447 }
1448 else {
1449 memcpy(accessUnit->data() + dstOffset + 4,
1450 mBuffer->data() + pos.nalOffset,
1451 pos.nalSize);
1452
1453 dstOffset += pos.nalSize + 4;
1454 //ALOGV("dequeueAccessUnitH264 [%d] %d @%d",
1455 // nalType, (int)pos.nalSize, (int)pos.nalOffset);
1456 }
1457 }
1458
1459 #if !LOG_NDEBUG
1460 ALOGV("accessUnit contains nal types %s", out.c_str());
1461 #endif
1462
1463 const NALPosition &pos = nals.itemAt(nals.size() - 1);
1464 size_t nextScan = pos.nalOffset + pos.nalSize;
1465
1466 memmove(mBuffer->data(),
1467 mBuffer->data() + nextScan,
1468 mBuffer->size() - nextScan);
1469
1470 mBuffer->setRange(0, mBuffer->size() - nextScan);
1471
1472 int64_t timeUs = fetchTimestamp(nextScan);
1473 if (timeUs < 0LL) {
1474 ALOGE("Negative timeUs");
1475 return NULL;
1476 }
1477
1478 accessUnit->meta()->setInt64("timeUs", timeUs);
1479 if (foundIDR) {
1480 accessUnit->meta()->setInt32("isSync", 1);
1481 }
1482
1483 if (mFormat == NULL) {
1484 mFormat = new MetaData;
1485 if (!MakeAVCCodecSpecificData(*mFormat,
1486 accessUnit->data(),
1487 accessUnit->size())) {
1488 mFormat.clear();
1489 }
1490 }
1491
1492 if (mSampleDecryptor != NULL && shrunkBytes > 0) {
1493 size_t adjustedSize = accessUnit->size() - shrunkBytes;
1494 ALOGV("dequeueAccessUnitH264[%d]: AU size adjusted %zu -> %zu",
1495 mAUIndex, accessUnit->size(), adjustedSize);
1496 accessUnit->setRange(0, adjustedSize);
1497 }
1498
1499 ALOGV("dequeueAccessUnitH264[%d]: AU %p(%zu) dstOffset:%zu, nals:%zu, totalSize:%zu ",
1500 mAUIndex, accessUnit->data(), accessUnit->size(),
1501 dstOffset, nals.size(), totalSize);
1502 mAUIndex++;
1503
1504 return accessUnit;
1505 }
1506
1507 NALPosition pos;
1508 pos.nalOffset = nalStart - mBuffer->data();
1509 pos.nalSize = nalSize;
1510
1511 nals.push(pos);
1512
1513 totalSize += nalSize;
1514 }
1515 if (err != (status_t)-EAGAIN) {
1516 ALOGE("Unexpeted err");
1517 return NULL;
1518 }
1519
1520 return NULL;
1521 }
1522
dequeueAccessUnitMPEGAudio()1523 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
1524 const uint8_t *data = mBuffer->data();
1525 size_t size = mBuffer->size();
1526
1527 if (size < 4) {
1528 return NULL;
1529 }
1530
1531 uint32_t header = U32_AT(data);
1532
1533 size_t frameSize;
1534 int samplingRate, numChannels, bitrate, numSamples;
1535 if (!GetMPEGAudioFrameSize(
1536 header, &frameSize, &samplingRate, &numChannels,
1537 &bitrate, &numSamples)) {
1538 ALOGE("Failed to get audio frame size");
1539 mBuffer->setRange(0, 0);
1540 return NULL;
1541 }
1542
1543 if (size < frameSize) {
1544 return NULL;
1545 }
1546
1547 unsigned layer = 4 - ((header >> 17) & 3);
1548
1549 sp<ABuffer> accessUnit = new ABuffer(frameSize);
1550 memcpy(accessUnit->data(), data, frameSize);
1551
1552 memmove(mBuffer->data(),
1553 mBuffer->data() + frameSize,
1554 mBuffer->size() - frameSize);
1555
1556 mBuffer->setRange(0, mBuffer->size() - frameSize);
1557
1558 int64_t timeUs = fetchTimestamp(frameSize);
1559 if (timeUs < 0LL) {
1560 ALOGE("Negative timeUs");
1561 return NULL;
1562 }
1563
1564 if (mFormat != NULL) {
1565 const char *mime;
1566 if (mFormat->findCString(kKeyMIMEType, &mime)) {
1567 if ((layer == 1) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I)) {
1568 ALOGE("Audio layer is not MPEG_LAYER_I");
1569 return NULL;
1570 } else if ((layer == 2) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II)) {
1571 ALOGE("Audio layer is not MPEG_LAYER_II");
1572 return NULL;
1573 } else if ((layer == 3) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG)) {
1574 ALOGE("Audio layer is not AUDIO_MPEG");
1575 return NULL;
1576 }
1577 }
1578 }
1579
1580 accessUnit->meta()->setInt64("timeUs", timeUs);
1581 accessUnit->meta()->setInt32("isSync", 1);
1582
1583 if (mFormat == NULL) {
1584 mFormat = new MetaData;
1585
1586 switch (layer) {
1587 case 1:
1588 mFormat->setCString(
1589 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
1590 break;
1591 case 2:
1592 mFormat->setCString(
1593 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
1594 break;
1595 case 3:
1596 mFormat->setCString(
1597 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
1598 break;
1599 default:
1600 return NULL;
1601 }
1602
1603 mFormat->setInt32(kKeySampleRate, samplingRate);
1604 mFormat->setInt32(kKeyChannelCount, numChannels);
1605 }
1606
1607 return accessUnit;
1608 }
1609
EncodeSize14(uint8_t ** _ptr,size_t size)1610 static void EncodeSize14(uint8_t **_ptr, size_t size) {
1611 if (size > 0x3fff) {
1612 ALOGE("Wrong size");
1613 return;
1614 }
1615
1616 uint8_t *ptr = *_ptr;
1617
1618 *ptr++ = 0x80 | (size >> 7);
1619 *ptr++ = size & 0x7f;
1620
1621 *_ptr = ptr;
1622 }
1623
MakeMPEGVideoESDS(const sp<ABuffer> & csd)1624 static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
1625 sp<ABuffer> esds = new ABuffer(csd->size() + 25);
1626
1627 uint8_t *ptr = esds->data();
1628 *ptr++ = 0x03;
1629 EncodeSize14(&ptr, 22 + csd->size());
1630
1631 *ptr++ = 0x00; // ES_ID
1632 *ptr++ = 0x00;
1633
1634 *ptr++ = 0x00; // streamDependenceFlag, URL_Flag, OCRstreamFlag
1635
1636 *ptr++ = 0x04;
1637 EncodeSize14(&ptr, 16 + csd->size());
1638
1639 *ptr++ = 0x40; // Audio ISO/IEC 14496-3
1640
1641 for (size_t i = 0; i < 12; ++i) {
1642 *ptr++ = 0x00;
1643 }
1644
1645 *ptr++ = 0x05;
1646 EncodeSize14(&ptr, csd->size());
1647
1648 memcpy(ptr, csd->data(), csd->size());
1649
1650 return esds;
1651 }
1652
dequeueAccessUnitMPEGVideo()1653 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1654 const uint8_t *data = mBuffer->data();
1655 size_t size = mBuffer->size();
1656
1657 Vector<size_t> userDataPositions;
1658
1659 bool sawPictureStart = false;
1660 int pprevStartCode = -1;
1661 int prevStartCode = -1;
1662 int currentStartCode = -1;
1663 bool gopFound = false;
1664 bool isClosedGop = false;
1665 bool brokenLink = false;
1666
1667 size_t offset = 0;
1668 while (offset + 3 < size) {
1669 if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1670 ++offset;
1671 continue;
1672 }
1673
1674 pprevStartCode = prevStartCode;
1675 prevStartCode = currentStartCode;
1676 currentStartCode = data[offset + 3];
1677
1678 if (currentStartCode == 0xb3 && mFormat == NULL) {
1679 memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1680 size -= offset;
1681 (void)fetchTimestamp(offset);
1682 offset = 0;
1683 mBuffer->setRange(0, size);
1684 }
1685
1686 if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1687 || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1688 // seqHeader without/with extension
1689
1690 if (mFormat == NULL) {
1691 if (size < 7u) {
1692 ALOGE("Size too small");
1693 return NULL;
1694 }
1695
1696 unsigned width =
1697 (data[4] << 4) | data[5] >> 4;
1698
1699 unsigned height =
1700 ((data[5] & 0x0f) << 8) | data[6];
1701
1702 mFormat = new MetaData;
1703 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1704 mFormat->setInt32(kKeyWidth, width);
1705 mFormat->setInt32(kKeyHeight, height);
1706
1707 ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1708
1709 sp<ABuffer> csd = new ABuffer(offset);
1710 memcpy(csd->data(), data, offset);
1711
1712 memmove(mBuffer->data(),
1713 mBuffer->data() + offset,
1714 mBuffer->size() - offset);
1715
1716 mBuffer->setRange(0, mBuffer->size() - offset);
1717 size -= offset;
1718 (void)fetchTimestamp(offset);
1719 offset = 0;
1720
1721 // hexdump(csd->data(), csd->size());
1722
1723 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1724 mFormat->setData(
1725 kKeyESDS, kTypeESDS, esds->data(), esds->size());
1726
1727 return NULL;
1728 }
1729 }
1730
1731 if (mFormat != NULL && currentStartCode == 0xb8) {
1732 // GOP layer
1733 if (offset + 7 >= size) {
1734 ALOGE("Size too small");
1735 return NULL;
1736 }
1737 gopFound = true;
1738 isClosedGop = (data[offset + 7] & 0x40) != 0;
1739 brokenLink = (data[offset + 7] & 0x20) != 0;
1740 }
1741
1742 if (mFormat != NULL && currentStartCode == 0xb2) {
1743 userDataPositions.add(offset);
1744 }
1745
1746 if (mFormat != NULL && currentStartCode == 0x00) {
1747 // Picture start
1748
1749 if (!sawPictureStart) {
1750 sawPictureStart = true;
1751 } else {
1752 sp<ABuffer> accessUnit = new ABuffer(offset);
1753 memcpy(accessUnit->data(), data, offset);
1754
1755 memmove(mBuffer->data(),
1756 mBuffer->data() + offset,
1757 mBuffer->size() - offset);
1758
1759 mBuffer->setRange(0, mBuffer->size() - offset);
1760
1761 int64_t timeUs = fetchTimestamp(offset);
1762 if (timeUs < 0LL) {
1763 ALOGE("Negative timeUs");
1764 return NULL;
1765 }
1766
1767 offset = 0;
1768
1769 accessUnit->meta()->setInt64("timeUs", timeUs);
1770 if (gopFound && (!brokenLink || isClosedGop)) {
1771 accessUnit->meta()->setInt32("isSync", 1);
1772 }
1773
1774 ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1775 timeUs);
1776
1777 // hexdump(accessUnit->data(), accessUnit->size());
1778
1779 if (userDataPositions.size() > 0) {
1780 sp<ABuffer> mpegUserData =
1781 new ABuffer(userDataPositions.size() * sizeof(size_t));
1782 if (mpegUserData != NULL && mpegUserData->data() != NULL) {
1783 for (size_t i = 0; i < userDataPositions.size(); ++i) {
1784 memcpy(
1785 mpegUserData->data() + i * sizeof(size_t),
1786 &userDataPositions[i], sizeof(size_t));
1787 }
1788 accessUnit->meta()->setBuffer("mpeg-user-data", mpegUserData);
1789 }
1790 }
1791
1792 return accessUnit;
1793 }
1794 }
1795
1796 ++offset;
1797 }
1798
1799 return NULL;
1800 }
1801
getNextChunkSize(const uint8_t * data,size_t size)1802 static ssize_t getNextChunkSize(
1803 const uint8_t *data, size_t size) {
1804 static const char kStartCode[] = "\x00\x00\x01";
1805
1806 // per ISO/IEC 14496-2 6.2.1, a chunk has a 3-byte prefix + 1-byte start code
1807 // we need at least <prefix><start><next prefix> to successfully scan
1808 if (size < 3 + 1 + 3) {
1809 return -EAGAIN;
1810 }
1811
1812 if (memcmp(kStartCode, data, 3)) {
1813 return -EAGAIN;
1814 }
1815
1816 size_t offset = 4;
1817 while (offset + 2 < size) {
1818 if (!memcmp(&data[offset], kStartCode, 3)) {
1819 return offset;
1820 }
1821
1822 ++offset;
1823 }
1824
1825 return -EAGAIN;
1826 }
1827
dequeueAccessUnitMPEG4Video()1828 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1829 uint8_t *data = mBuffer->data();
1830 size_t size = mBuffer->size();
1831
1832 enum {
1833 SKIP_TO_VISUAL_OBJECT_SEQ_START,
1834 EXPECT_VISUAL_OBJECT_START,
1835 EXPECT_VO_START,
1836 EXPECT_VOL_START,
1837 WAIT_FOR_VOP_START,
1838 SKIP_TO_VOP_START,
1839
1840 } state;
1841
1842 if (mFormat == NULL) {
1843 state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1844 } else {
1845 state = SKIP_TO_VOP_START;
1846 }
1847
1848 int32_t width = -1, height = -1;
1849
1850 size_t offset = 0;
1851 ssize_t chunkSize;
1852 while ((chunkSize = getNextChunkSize(
1853 &data[offset], size - offset)) > 0) {
1854 bool discard = false;
1855
1856 unsigned chunkType = data[offset + 3];
1857
1858 switch (state) {
1859 case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1860 {
1861 if (chunkType == 0xb0) {
1862 // Discard anything before this marker.
1863
1864 state = EXPECT_VISUAL_OBJECT_START;
1865 } else {
1866 discard = true;
1867 offset += chunkSize;
1868 ALOGW("b/74114680, advance to next chunk");
1869 android_errorWriteLog(0x534e4554, "74114680");
1870 }
1871 break;
1872 }
1873
1874 case EXPECT_VISUAL_OBJECT_START:
1875 {
1876 if (chunkType != 0xb5) {
1877 ALOGE("Unexpected chunkType");
1878 return NULL;
1879 }
1880 state = EXPECT_VO_START;
1881 break;
1882 }
1883
1884 case EXPECT_VO_START:
1885 {
1886 if (chunkType > 0x1f) {
1887 ALOGE("Unexpected chunkType");
1888 return NULL;
1889 }
1890 state = EXPECT_VOL_START;
1891 break;
1892 }
1893
1894 case EXPECT_VOL_START:
1895 {
1896 if ((chunkType & 0xf0) != 0x20) {
1897 ALOGE("Wrong chunkType");
1898 return NULL;
1899 }
1900
1901 if (!ExtractDimensionsFromVOLHeader(
1902 &data[offset], chunkSize,
1903 &width, &height)) {
1904 ALOGE("Failed to get dimension");
1905 return NULL;
1906 }
1907
1908 state = WAIT_FOR_VOP_START;
1909 break;
1910 }
1911
1912 case WAIT_FOR_VOP_START:
1913 {
1914 if (chunkType == 0xb3 || chunkType == 0xb6) {
1915 // group of VOP or VOP start.
1916
1917 mFormat = new MetaData;
1918 mFormat->setCString(
1919 kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1920
1921 mFormat->setInt32(kKeyWidth, width);
1922 mFormat->setInt32(kKeyHeight, height);
1923
1924 ALOGI("found MPEG4 video codec config (%d x %d)",
1925 width, height);
1926
1927 sp<ABuffer> csd = new ABuffer(offset);
1928 memcpy(csd->data(), data, offset);
1929
1930 // hexdump(csd->data(), csd->size());
1931
1932 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1933 mFormat->setData(
1934 kKeyESDS, kTypeESDS,
1935 esds->data(), esds->size());
1936
1937 discard = true;
1938 state = SKIP_TO_VOP_START;
1939 }
1940
1941 break;
1942 }
1943
1944 case SKIP_TO_VOP_START:
1945 {
1946 if (chunkType == 0xb6) {
1947 int vopCodingType = (data[offset + 4] & 0xc0) >> 6;
1948
1949 offset += chunkSize;
1950
1951 sp<ABuffer> accessUnit = new ABuffer(offset);
1952 memcpy(accessUnit->data(), data, offset);
1953
1954 memmove(data, &data[offset], size - offset);
1955 size -= offset;
1956 mBuffer->setRange(0, size);
1957
1958 int64_t timeUs = fetchTimestamp(offset);
1959 if (timeUs < 0LL) {
1960 ALOGE("Negative timeus");
1961 return NULL;
1962 }
1963
1964 offset = 0;
1965
1966 accessUnit->meta()->setInt64("timeUs", timeUs);
1967 if (vopCodingType == 0) { // intra-coded VOP
1968 accessUnit->meta()->setInt32("isSync", 1);
1969 }
1970
1971 ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1972 timeUs);
1973
1974 // hexdump(accessUnit->data(), accessUnit->size());
1975
1976 return accessUnit;
1977 } else if (chunkType != 0xb3) {
1978 offset += chunkSize;
1979 discard = true;
1980 }
1981
1982 break;
1983 }
1984
1985 default:
1986 ALOGE("Unknown state: %d", state);
1987 return NULL;
1988 }
1989
1990 if (discard) {
1991 (void)fetchTimestamp(offset);
1992 memmove(data, &data[offset], size - offset);
1993 size -= offset;
1994 offset = 0;
1995 mBuffer->setRange(0, size);
1996 } else {
1997 offset += chunkSize;
1998 }
1999 }
2000
2001 return NULL;
2002 }
2003
signalEOS()2004 void ElementaryStreamQueue::signalEOS() {
2005 if (!mEOSReached) {
2006 if (mMode == MPEG_VIDEO) {
2007 const char *theEnd = "\x00\x00\x01\x00";
2008 appendData(theEnd, 4, 0);
2009 }
2010 mEOSReached = true;
2011 } else {
2012 ALOGW("EOS already signaled");
2013 }
2014 }
2015
dequeueAccessUnitMetadata()2016 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMetadata() {
2017 size_t size = mBuffer->size();
2018 if (!size) {
2019 return NULL;
2020 }
2021
2022 sp<ABuffer> accessUnit = new ABuffer(size);
2023 int64_t timeUs = fetchTimestamp(size);
2024 accessUnit->meta()->setInt64("timeUs", timeUs);
2025
2026 memcpy(accessUnit->data(), mBuffer->data(), size);
2027 mBuffer->setRange(0, 0);
2028
2029 if (mFormat == NULL) {
2030 mFormat = new MetaData;
2031 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_DATA_TIMED_ID3);
2032 }
2033
2034 return accessUnit;
2035 }
2036
signalNewSampleAesKey(const sp<AMessage> & keyItem)2037 void ElementaryStreamQueue::signalNewSampleAesKey(const sp<AMessage> &keyItem) {
2038 if (mSampleDecryptor == NULL) {
2039 ALOGE("signalNewSampleAesKey: Stream %x is not encrypted; keyItem: %p",
2040 mMode, keyItem.get());
2041 return;
2042 }
2043
2044 mSampleDecryptor->signalNewSampleAesKey(keyItem);
2045 }
2046
2047
2048 } // namespace android
2049