1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "ESQueue"
19 #include <media/stagefright/foundation/ADebug.h>
20 
21 #include "ESQueue.h"
22 
23 #include <media/stagefright/foundation/hexdump.h>
24 #include <media/stagefright/foundation/ABitReader.h>
25 #include <media/stagefright/foundation/ABuffer.h>
26 #include <media/stagefright/foundation/AMessage.h>
27 #include <media/stagefright/MediaErrors.h>
28 #include <media/stagefright/MediaDefs.h>
29 #include <media/stagefright/MetaData.h>
30 #include <media/stagefright/Utils.h>
31 
32 #include "include/avc_utils.h"
33 
34 #include <inttypes.h>
35 #include <netinet/in.h>
36 
37 namespace android {
38 
ElementaryStreamQueue(Mode mode,uint32_t flags)39 ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
40     : mMode(mode),
41       mFlags(flags),
42       mEOSReached(false) {
43 }
44 
getFormat()45 sp<MetaData> ElementaryStreamQueue::getFormat() {
46     return mFormat;
47 }
48 
clear(bool clearFormat)49 void ElementaryStreamQueue::clear(bool clearFormat) {
50     if (mBuffer != NULL) {
51         mBuffer->setRange(0, 0);
52     }
53 
54     mRangeInfos.clear();
55 
56     if (clearFormat) {
57         mFormat.clear();
58     }
59 
60     mEOSReached = false;
61 }
62 
63 // Parse AC3 header assuming the current ptr is start position of syncframe,
64 // update metadata only applicable, and return the payload size
parseAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)65 static unsigned parseAC3SyncFrame(
66         const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
67     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
68     static const unsigned samplingRateTable[] = {48000, 44100, 32000};
69 
70     static const unsigned frameSizeTable[19][3] = {
71         { 64, 69, 96 },
72         { 80, 87, 120 },
73         { 96, 104, 144 },
74         { 112, 121, 168 },
75         { 128, 139, 192 },
76         { 160, 174, 240 },
77         { 192, 208, 288 },
78         { 224, 243, 336 },
79         { 256, 278, 384 },
80         { 320, 348, 480 },
81         { 384, 417, 576 },
82         { 448, 487, 672 },
83         { 512, 557, 768 },
84         { 640, 696, 960 },
85         { 768, 835, 1152 },
86         { 896, 975, 1344 },
87         { 1024, 1114, 1536 },
88         { 1152, 1253, 1728 },
89         { 1280, 1393, 1920 },
90     };
91 
92     ABitReader bits(ptr, size);
93     if (bits.numBitsLeft() < 16) {
94         return 0;
95     }
96     if (bits.getBits(16) != 0x0B77) {
97         return 0;
98     }
99 
100     if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
101         ALOGV("Not enough bits left for further parsing");
102         return 0;
103     }
104     bits.skipBits(16);  // crc1
105 
106     unsigned fscod = bits.getBits(2);
107     if (fscod == 3) {
108         ALOGW("Incorrect fscod in AC3 header");
109         return 0;
110     }
111 
112     unsigned frmsizecod = bits.getBits(6);
113     if (frmsizecod > 37) {
114         ALOGW("Incorrect frmsizecod in AC3 header");
115         return 0;
116     }
117 
118     unsigned bsid = bits.getBits(5);
119     if (bsid > 8) {
120         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
121         return 0;
122     }
123 
124     unsigned bsmod __unused = bits.getBits(3);
125     unsigned acmod = bits.getBits(3);
126     unsigned cmixlev __unused = 0;
127     unsigned surmixlev __unused = 0;
128     unsigned dsurmod __unused = 0;
129 
130     if ((acmod & 1) > 0 && acmod != 1) {
131         if (bits.numBitsLeft() < 2) {
132             return 0;
133         }
134         cmixlev = bits.getBits(2);
135     }
136     if ((acmod & 4) > 0) {
137         if (bits.numBitsLeft() < 2) {
138             return 0;
139         }
140         surmixlev = bits.getBits(2);
141     }
142     if (acmod == 2) {
143         if (bits.numBitsLeft() < 2) {
144             return 0;
145         }
146         dsurmod = bits.getBits(2);
147     }
148 
149     if (bits.numBitsLeft() < 1) {
150         return 0;
151     }
152     unsigned lfeon = bits.getBits(1);
153 
154     unsigned samplingRate = samplingRateTable[fscod];
155     unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
156     if (fscod == 1) {
157         payloadSize += frmsizecod & 1;
158     }
159     payloadSize <<= 1;  // convert from 16-bit words to bytes
160 
161     unsigned channelCount = channelCountTable[acmod] + lfeon;
162 
163     if (metaData != NULL) {
164         (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
165         (*metaData)->setInt32(kKeyChannelCount, channelCount);
166         (*metaData)->setInt32(kKeySampleRate, samplingRate);
167     }
168 
169     return payloadSize;
170 }
171 
IsSeeminglyValidAC3Header(const uint8_t * ptr,size_t size)172 static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
173     return parseAC3SyncFrame(ptr, size, NULL) > 0;
174 }
175 
IsSeeminglyValidADTSHeader(const uint8_t * ptr,size_t size,size_t * frameLength)176 static bool IsSeeminglyValidADTSHeader(
177         const uint8_t *ptr, size_t size, size_t *frameLength) {
178     if (size < 7) {
179         // Not enough data to verify header.
180         return false;
181     }
182 
183     if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
184         return false;
185     }
186 
187     unsigned layer = (ptr[1] >> 1) & 3;
188 
189     if (layer != 0) {
190         return false;
191     }
192 
193     unsigned ID = (ptr[1] >> 3) & 1;
194     unsigned profile_ObjectType = ptr[2] >> 6;
195 
196     if (ID == 1 && profile_ObjectType == 3) {
197         // MPEG-2 profile 3 is reserved.
198         return false;
199     }
200 
201     size_t frameLengthInHeader =
202             ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
203     if (frameLengthInHeader > size) {
204         return false;
205     }
206 
207     *frameLength = frameLengthInHeader;
208     return true;
209 }
210 
IsSeeminglyValidMPEGAudioHeader(const uint8_t * ptr,size_t size)211 static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
212     if (size < 3) {
213         // Not enough data to verify header.
214         return false;
215     }
216 
217     if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
218         return false;
219     }
220 
221     unsigned ID = (ptr[1] >> 3) & 3;
222 
223     if (ID == 1) {
224         return false;  // reserved
225     }
226 
227     unsigned layer = (ptr[1] >> 1) & 3;
228 
229     if (layer == 0) {
230         return false;  // reserved
231     }
232 
233     unsigned bitrateIndex = (ptr[2] >> 4);
234 
235     if (bitrateIndex == 0x0f) {
236         return false;  // reserved
237     }
238 
239     unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
240 
241     if (samplingRateIndex == 3) {
242         return false;  // reserved
243     }
244 
245     return true;
246 }
247 
appendData(const void * data,size_t size,int64_t timeUs)248 status_t ElementaryStreamQueue::appendData(
249         const void *data, size_t size, int64_t timeUs) {
250 
251     if (mEOSReached) {
252         ALOGE("appending data after EOS");
253         return ERROR_MALFORMED;
254     }
255     if (mBuffer == NULL || mBuffer->size() == 0) {
256         switch (mMode) {
257             case H264:
258             case MPEG_VIDEO:
259             {
260 #if 0
261                 if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
262                     return ERROR_MALFORMED;
263                 }
264 #else
265                 uint8_t *ptr = (uint8_t *)data;
266 
267                 ssize_t startOffset = -1;
268                 for (size_t i = 0; i + 2 < size; ++i) {
269                     if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
270                         startOffset = i;
271                         break;
272                     }
273                 }
274 
275                 if (startOffset < 0) {
276                     return ERROR_MALFORMED;
277                 }
278 
279                 if (startOffset > 0) {
280                     ALOGI("found something resembling an H.264/MPEG syncword "
281                           "at offset %zd",
282                           startOffset);
283                 }
284 
285                 data = &ptr[startOffset];
286                 size -= startOffset;
287 #endif
288                 break;
289             }
290 
291             case MPEG4_VIDEO:
292             {
293 #if 0
294                 if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
295                     return ERROR_MALFORMED;
296                 }
297 #else
298                 uint8_t *ptr = (uint8_t *)data;
299 
300                 ssize_t startOffset = -1;
301                 for (size_t i = 0; i + 2 < size; ++i) {
302                     if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
303                         startOffset = i;
304                         break;
305                     }
306                 }
307 
308                 if (startOffset < 0) {
309                     return ERROR_MALFORMED;
310                 }
311 
312                 if (startOffset > 0) {
313                     ALOGI("found something resembling an H.264/MPEG syncword "
314                           "at offset %zd",
315                           startOffset);
316                 }
317 
318                 data = &ptr[startOffset];
319                 size -= startOffset;
320 #endif
321                 break;
322             }
323 
324             case AAC:
325             {
326                 uint8_t *ptr = (uint8_t *)data;
327 
328 #if 0
329                 if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
330                     return ERROR_MALFORMED;
331                 }
332 #else
333                 ssize_t startOffset = -1;
334                 size_t frameLength;
335                 for (size_t i = 0; i < size; ++i) {
336                     if (IsSeeminglyValidADTSHeader(
337                             &ptr[i], size - i, &frameLength)) {
338                         startOffset = i;
339                         break;
340                     }
341                 }
342 
343                 if (startOffset < 0) {
344                     return ERROR_MALFORMED;
345                 }
346 
347                 if (startOffset > 0) {
348                     ALOGI("found something resembling an AAC syncword at "
349                           "offset %zd",
350                           startOffset);
351                 }
352 
353                 if (frameLength != size - startOffset) {
354                     ALOGV("First ADTS AAC frame length is %zd bytes, "
355                           "while the buffer size is %zd bytes.",
356                           frameLength, size - startOffset);
357                 }
358 
359                 data = &ptr[startOffset];
360                 size -= startOffset;
361 #endif
362                 break;
363             }
364 
365             case AC3:
366             {
367                 uint8_t *ptr = (uint8_t *)data;
368 
369                 ssize_t startOffset = -1;
370                 for (size_t i = 0; i < size; ++i) {
371                     if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
372                         startOffset = i;
373                         break;
374                     }
375                 }
376 
377                 if (startOffset < 0) {
378                     return ERROR_MALFORMED;
379                 }
380 
381                 if (startOffset > 0) {
382                     ALOGI("found something resembling an AC3 syncword at "
383                           "offset %zd",
384                           startOffset);
385                 }
386 
387                 data = &ptr[startOffset];
388                 size -= startOffset;
389                 break;
390             }
391 
392             case MPEG_AUDIO:
393             {
394                 uint8_t *ptr = (uint8_t *)data;
395 
396                 ssize_t startOffset = -1;
397                 for (size_t i = 0; i < size; ++i) {
398                     if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
399                         startOffset = i;
400                         break;
401                     }
402                 }
403 
404                 if (startOffset < 0) {
405                     return ERROR_MALFORMED;
406                 }
407 
408                 if (startOffset > 0) {
409                     ALOGI("found something resembling an MPEG audio "
410                           "syncword at offset %zd",
411                           startOffset);
412                 }
413 
414                 data = &ptr[startOffset];
415                 size -= startOffset;
416                 break;
417             }
418 
419             case PCM_AUDIO:
420             case METADATA:
421             {
422                 break;
423             }
424 
425             default:
426                 ALOGE("Unknown mode: %d", mMode);
427                 return ERROR_MALFORMED;
428         }
429     }
430 
431     size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
432     if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
433         neededSize = (neededSize + 65535) & ~65535;
434 
435         ALOGV("resizing buffer to size %zu", neededSize);
436 
437         sp<ABuffer> buffer = new ABuffer(neededSize);
438         if (mBuffer != NULL) {
439             memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
440             buffer->setRange(0, mBuffer->size());
441         } else {
442             buffer->setRange(0, 0);
443         }
444 
445         mBuffer = buffer;
446     }
447 
448     memcpy(mBuffer->data() + mBuffer->size(), data, size);
449     mBuffer->setRange(0, mBuffer->size() + size);
450 
451     RangeInfo info;
452     info.mLength = size;
453     info.mTimestampUs = timeUs;
454     mRangeInfos.push_back(info);
455 
456 #if 0
457     if (mMode == AAC) {
458         ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
459         hexdump(data, size);
460     }
461 #endif
462 
463     return OK;
464 }
465 
dequeueAccessUnit()466 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
467     if ((mFlags & kFlag_AlignedData) && mMode == H264) {
468         if (mRangeInfos.empty()) {
469             return NULL;
470         }
471 
472         RangeInfo info = *mRangeInfos.begin();
473         mRangeInfos.erase(mRangeInfos.begin());
474 
475         sp<ABuffer> accessUnit = new ABuffer(info.mLength);
476         memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
477         accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
478 
479         memmove(mBuffer->data(),
480                 mBuffer->data() + info.mLength,
481                 mBuffer->size() - info.mLength);
482 
483         mBuffer->setRange(0, mBuffer->size() - info.mLength);
484 
485         if (mFormat == NULL) {
486             mFormat = MakeAVCCodecSpecificData(accessUnit);
487         }
488 
489         return accessUnit;
490     }
491 
492     switch (mMode) {
493         case H264:
494             return dequeueAccessUnitH264();
495         case AAC:
496             return dequeueAccessUnitAAC();
497         case AC3:
498             return dequeueAccessUnitAC3();
499         case MPEG_VIDEO:
500             return dequeueAccessUnitMPEGVideo();
501         case MPEG4_VIDEO:
502             return dequeueAccessUnitMPEG4Video();
503         case PCM_AUDIO:
504             return dequeueAccessUnitPCMAudio();
505         case METADATA:
506             return dequeueAccessUnitMetadata();
507         default:
508             if (mMode != MPEG_AUDIO) {
509                 ALOGE("Unknown mode");
510                 return NULL;
511             }
512             return dequeueAccessUnitMPEGAudio();
513     }
514 }
515 
dequeueAccessUnitAC3()516 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
517     unsigned syncStartPos = 0;  // in bytes
518     unsigned payloadSize = 0;
519     sp<MetaData> format = new MetaData;
520     while (true) {
521         if (syncStartPos + 2 >= mBuffer->size()) {
522             return NULL;
523         }
524 
525         payloadSize = parseAC3SyncFrame(
526                 mBuffer->data() + syncStartPos,
527                 mBuffer->size() - syncStartPos,
528                 &format);
529         if (payloadSize > 0) {
530             break;
531         }
532         ++syncStartPos;
533     }
534 
535     if (mBuffer->size() < syncStartPos + payloadSize) {
536         ALOGV("Not enough buffer size for AC3");
537         return NULL;
538     }
539 
540     if (mFormat == NULL) {
541         mFormat = format;
542     }
543 
544     sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
545     memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
546 
547     int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
548     if (timeUs < 0ll) {
549         ALOGE("negative timeUs");
550         return NULL;
551     }
552     accessUnit->meta()->setInt64("timeUs", timeUs);
553     accessUnit->meta()->setInt32("isSync", 1);
554 
555     memmove(
556             mBuffer->data(),
557             mBuffer->data() + syncStartPos + payloadSize,
558             mBuffer->size() - syncStartPos - payloadSize);
559 
560     mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
561 
562     return accessUnit;
563 }
564 
dequeueAccessUnitPCMAudio()565 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
566     if (mBuffer->size() < 4) {
567         return NULL;
568     }
569 
570     ABitReader bits(mBuffer->data(), 4);
571     if (bits.getBits(8) != 0xa0) {
572         ALOGE("Unexpected bit values");
573         return NULL;
574     }
575     unsigned numAUs = bits.getBits(8);
576     bits.skipBits(8);
577     unsigned quantization_word_length __unused = bits.getBits(2);
578     unsigned audio_sampling_frequency = bits.getBits(3);
579     unsigned num_channels = bits.getBits(3);
580 
581     if (audio_sampling_frequency != 2) {
582         ALOGE("Wrong sampling freq");
583         return NULL;
584     }
585     if (num_channels != 1u) {
586         ALOGE("Wrong channel #");
587         return NULL;
588     }
589 
590     if (mFormat == NULL) {
591         mFormat = new MetaData;
592         mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
593         mFormat->setInt32(kKeyChannelCount, 2);
594         mFormat->setInt32(kKeySampleRate, 48000);
595         mFormat->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
596     }
597 
598     static const size_t kFramesPerAU = 80;
599     size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
600 
601     size_t payloadSize = numAUs * frameSize * kFramesPerAU;
602 
603     if (mBuffer->size() < 4 + payloadSize) {
604         return NULL;
605     }
606 
607     sp<ABuffer> accessUnit = new ABuffer(payloadSize);
608     memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
609 
610     int64_t timeUs = fetchTimestamp(payloadSize + 4);
611     if (timeUs < 0ll) {
612         ALOGE("Negative timeUs");
613         return NULL;
614     }
615     accessUnit->meta()->setInt64("timeUs", timeUs);
616     accessUnit->meta()->setInt32("isSync", 1);
617 
618     int16_t *ptr = (int16_t *)accessUnit->data();
619     for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
620         ptr[i] = ntohs(ptr[i]);
621     }
622 
623     memmove(
624             mBuffer->data(),
625             mBuffer->data() + 4 + payloadSize,
626             mBuffer->size() - 4 - payloadSize);
627 
628     mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
629 
630     return accessUnit;
631 }
632 
dequeueAccessUnitAAC()633 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
634     if (mBuffer->size() == 0) {
635         return NULL;
636     }
637 
638     if (mRangeInfos.empty()) {
639         return NULL;
640     }
641 
642     const RangeInfo &info = *mRangeInfos.begin();
643     if (mBuffer->size() < info.mLength) {
644         return NULL;
645     }
646 
647     if (info.mTimestampUs < 0ll) {
648         ALOGE("Negative info.mTimestampUs");
649         return NULL;
650     }
651 
652     // The idea here is consume all AAC frames starting at offsets before
653     // info.mLength so we can assign a meaningful timestamp without
654     // having to interpolate.
655     // The final AAC frame may well extend into the next RangeInfo but
656     // that's ok.
657     size_t offset = 0;
658     while (offset < info.mLength) {
659         if (offset + 7 > mBuffer->size()) {
660             return NULL;
661         }
662 
663         ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
664 
665         // adts_fixed_header
666 
667         if (bits.getBits(12) != 0xfffu) {
668             ALOGE("Wrong atds_fixed_header");
669             return NULL;
670         }
671         bits.skipBits(3);  // ID, layer
672         bool protection_absent __unused = bits.getBits(1) != 0;
673 
674         if (mFormat == NULL) {
675             unsigned profile = bits.getBits(2);
676             if (profile == 3u) {
677                 ALOGE("profile should not be 3");
678                 return NULL;
679             }
680             unsigned sampling_freq_index = bits.getBits(4);
681             bits.getBits(1);  // private_bit
682             unsigned channel_configuration = bits.getBits(3);
683             if (channel_configuration == 0u) {
684                 ALOGE("channel_config should not be 0");
685                 return NULL;
686             }
687             bits.skipBits(2);  // original_copy, home
688 
689             mFormat = MakeAACCodecSpecificData(
690                     profile, sampling_freq_index, channel_configuration);
691 
692             mFormat->setInt32(kKeyIsADTS, true);
693 
694             int32_t sampleRate;
695             int32_t numChannels;
696             if (!mFormat->findInt32(kKeySampleRate, &sampleRate)) {
697                 ALOGE("SampleRate not found");
698                 return NULL;
699             }
700             if (!mFormat->findInt32(kKeyChannelCount, &numChannels)) {
701                 ALOGE("ChannelCount not found");
702                 return NULL;
703             }
704 
705             ALOGI("found AAC codec config (%d Hz, %d channels)",
706                  sampleRate, numChannels);
707         } else {
708             // profile_ObjectType, sampling_frequency_index, private_bits,
709             // channel_configuration, original_copy, home
710             bits.skipBits(12);
711         }
712 
713         // adts_variable_header
714 
715         // copyright_identification_bit, copyright_identification_start
716         bits.skipBits(2);
717 
718         unsigned aac_frame_length = bits.getBits(13);
719 
720         bits.skipBits(11);  // adts_buffer_fullness
721 
722         unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
723 
724         if (number_of_raw_data_blocks_in_frame != 0) {
725             // To be implemented.
726             ALOGE("Should not reach here.");
727             return NULL;
728         }
729 
730         if (offset + aac_frame_length > mBuffer->size()) {
731             return NULL;
732         }
733 
734         size_t headerSize __unused = protection_absent ? 7 : 9;
735 
736         offset += aac_frame_length;
737     }
738 
739     int64_t timeUs = fetchTimestamp(offset);
740 
741     sp<ABuffer> accessUnit = new ABuffer(offset);
742     memcpy(accessUnit->data(), mBuffer->data(), offset);
743 
744     memmove(mBuffer->data(), mBuffer->data() + offset,
745             mBuffer->size() - offset);
746     mBuffer->setRange(0, mBuffer->size() - offset);
747 
748     accessUnit->meta()->setInt64("timeUs", timeUs);
749     accessUnit->meta()->setInt32("isSync", 1);
750 
751     return accessUnit;
752 }
753 
fetchTimestamp(size_t size)754 int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
755     int64_t timeUs = -1;
756     bool first = true;
757 
758     while (size > 0) {
759         if (mRangeInfos.empty()) {
760             return timeUs;
761         }
762 
763         RangeInfo *info = &*mRangeInfos.begin();
764 
765         if (first) {
766             timeUs = info->mTimestampUs;
767             first = false;
768         }
769 
770         if (info->mLength > size) {
771             info->mLength -= size;
772             size = 0;
773         } else {
774             size -= info->mLength;
775 
776             mRangeInfos.erase(mRangeInfos.begin());
777             info = NULL;
778         }
779 
780     }
781 
782     if (timeUs == 0ll) {
783         ALOGV("Returning 0 timestamp");
784     }
785 
786     return timeUs;
787 }
788 
dequeueAccessUnitH264()789 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
790     const uint8_t *data = mBuffer->data();
791 
792     size_t size = mBuffer->size();
793     Vector<NALPosition> nals;
794 
795     size_t totalSize = 0;
796     size_t seiCount = 0;
797 
798     status_t err;
799     const uint8_t *nalStart;
800     size_t nalSize;
801     bool foundSlice = false;
802     bool foundIDR = false;
803     while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
804         if (nalSize == 0) continue;
805 
806         unsigned nalType = nalStart[0] & 0x1f;
807         bool flush = false;
808 
809         if (nalType == 1 || nalType == 5) {
810             if (nalType == 5) {
811                 foundIDR = true;
812             }
813             if (foundSlice) {
814                 ABitReader br(nalStart + 1, nalSize);
815                 unsigned first_mb_in_slice = parseUE(&br);
816 
817                 if (first_mb_in_slice == 0) {
818                     // This slice starts a new frame.
819 
820                     flush = true;
821                 }
822             }
823 
824             foundSlice = true;
825         } else if ((nalType == 9 || nalType == 7) && foundSlice) {
826             // Access unit delimiter and SPS will be associated with the
827             // next frame.
828 
829             flush = true;
830         } else if (nalType == 6 && nalSize > 0) {
831             // found non-zero sized SEI
832             ++seiCount;
833         }
834 
835         if (flush) {
836             // The access unit will contain all nal units up to, but excluding
837             // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
838 
839             size_t auSize = 4 * nals.size() + totalSize;
840             sp<ABuffer> accessUnit = new ABuffer(auSize);
841             sp<ABuffer> sei;
842 
843             if (seiCount > 0) {
844                 sei = new ABuffer(seiCount * sizeof(NALPosition));
845                 accessUnit->meta()->setBuffer("sei", sei);
846             }
847 
848 #if !LOG_NDEBUG
849             AString out;
850 #endif
851 
852             size_t dstOffset = 0;
853             size_t seiIndex = 0;
854             for (size_t i = 0; i < nals.size(); ++i) {
855                 const NALPosition &pos = nals.itemAt(i);
856 
857                 unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
858 
859                 if (nalType == 6 && pos.nalSize > 0) {
860                     if (seiIndex >= sei->size() / sizeof(NALPosition)) {
861                         ALOGE("Wrong seiIndex");
862                         return NULL;
863                     }
864                     NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
865                     seiPos.nalOffset = dstOffset + 4;
866                     seiPos.nalSize = pos.nalSize;
867                 }
868 
869 #if !LOG_NDEBUG
870                 char tmp[128];
871                 sprintf(tmp, "0x%02x", nalType);
872                 if (i > 0) {
873                     out.append(", ");
874                 }
875                 out.append(tmp);
876 #endif
877 
878                 memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
879 
880                 memcpy(accessUnit->data() + dstOffset + 4,
881                        mBuffer->data() + pos.nalOffset,
882                        pos.nalSize);
883 
884                 dstOffset += pos.nalSize + 4;
885             }
886 
887 #if !LOG_NDEBUG
888             ALOGV("accessUnit contains nal types %s", out.c_str());
889 #endif
890 
891             const NALPosition &pos = nals.itemAt(nals.size() - 1);
892             size_t nextScan = pos.nalOffset + pos.nalSize;
893 
894             memmove(mBuffer->data(),
895                     mBuffer->data() + nextScan,
896                     mBuffer->size() - nextScan);
897 
898             mBuffer->setRange(0, mBuffer->size() - nextScan);
899 
900             int64_t timeUs = fetchTimestamp(nextScan);
901             if (timeUs < 0ll) {
902                 ALOGE("Negative timeUs");
903                 return NULL;
904             }
905 
906             accessUnit->meta()->setInt64("timeUs", timeUs);
907             if (foundIDR) {
908                 accessUnit->meta()->setInt32("isSync", 1);
909             }
910 
911             if (mFormat == NULL) {
912                 mFormat = MakeAVCCodecSpecificData(accessUnit);
913             }
914 
915             return accessUnit;
916         }
917 
918         NALPosition pos;
919         pos.nalOffset = nalStart - mBuffer->data();
920         pos.nalSize = nalSize;
921 
922         nals.push(pos);
923 
924         totalSize += nalSize;
925     }
926     if (err != (status_t)-EAGAIN) {
927         ALOGE("Unexpeted err");
928         return NULL;
929     }
930 
931     return NULL;
932 }
933 
dequeueAccessUnitMPEGAudio()934 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
935     const uint8_t *data = mBuffer->data();
936     size_t size = mBuffer->size();
937 
938     if (size < 4) {
939         return NULL;
940     }
941 
942     uint32_t header = U32_AT(data);
943 
944     size_t frameSize;
945     int samplingRate, numChannels, bitrate, numSamples;
946     if (!GetMPEGAudioFrameSize(
947                 header, &frameSize, &samplingRate, &numChannels,
948                 &bitrate, &numSamples)) {
949         ALOGE("Failed to get audio frame size");
950         return NULL;
951     }
952 
953     if (size < frameSize) {
954         return NULL;
955     }
956 
957     unsigned layer = 4 - ((header >> 17) & 3);
958 
959     sp<ABuffer> accessUnit = new ABuffer(frameSize);
960     memcpy(accessUnit->data(), data, frameSize);
961 
962     memmove(mBuffer->data(),
963             mBuffer->data() + frameSize,
964             mBuffer->size() - frameSize);
965 
966     mBuffer->setRange(0, mBuffer->size() - frameSize);
967 
968     int64_t timeUs = fetchTimestamp(frameSize);
969     if (timeUs < 0ll) {
970         ALOGE("Negative timeUs");
971         return NULL;
972     }
973 
974     accessUnit->meta()->setInt64("timeUs", timeUs);
975     accessUnit->meta()->setInt32("isSync", 1);
976 
977     if (mFormat == NULL) {
978         mFormat = new MetaData;
979 
980         switch (layer) {
981             case 1:
982                 mFormat->setCString(
983                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
984                 break;
985             case 2:
986                 mFormat->setCString(
987                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
988                 break;
989             case 3:
990                 mFormat->setCString(
991                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
992                 break;
993             default:
994                 return NULL;
995         }
996 
997         mFormat->setInt32(kKeySampleRate, samplingRate);
998         mFormat->setInt32(kKeyChannelCount, numChannels);
999     }
1000 
1001     return accessUnit;
1002 }
1003 
EncodeSize14(uint8_t ** _ptr,size_t size)1004 static void EncodeSize14(uint8_t **_ptr, size_t size) {
1005     if (size > 0x3fff) {
1006         ALOGE("Wrong size");
1007         return;
1008     }
1009 
1010     uint8_t *ptr = *_ptr;
1011 
1012     *ptr++ = 0x80 | (size >> 7);
1013     *ptr++ = size & 0x7f;
1014 
1015     *_ptr = ptr;
1016 }
1017 
MakeMPEGVideoESDS(const sp<ABuffer> & csd)1018 static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
1019     sp<ABuffer> esds = new ABuffer(csd->size() + 25);
1020 
1021     uint8_t *ptr = esds->data();
1022     *ptr++ = 0x03;
1023     EncodeSize14(&ptr, 22 + csd->size());
1024 
1025     *ptr++ = 0x00;  // ES_ID
1026     *ptr++ = 0x00;
1027 
1028     *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
1029 
1030     *ptr++ = 0x04;
1031     EncodeSize14(&ptr, 16 + csd->size());
1032 
1033     *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
1034 
1035     for (size_t i = 0; i < 12; ++i) {
1036         *ptr++ = 0x00;
1037     }
1038 
1039     *ptr++ = 0x05;
1040     EncodeSize14(&ptr, csd->size());
1041 
1042     memcpy(ptr, csd->data(), csd->size());
1043 
1044     return esds;
1045 }
1046 
dequeueAccessUnitMPEGVideo()1047 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1048     const uint8_t *data = mBuffer->data();
1049     size_t size = mBuffer->size();
1050 
1051     Vector<size_t> userDataPositions;
1052 
1053     bool sawPictureStart = false;
1054     int pprevStartCode = -1;
1055     int prevStartCode = -1;
1056     int currentStartCode = -1;
1057     bool gopFound = false;
1058     bool isClosedGop = false;
1059     bool brokenLink = false;
1060 
1061     size_t offset = 0;
1062     while (offset + 3 < size) {
1063         if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1064             ++offset;
1065             continue;
1066         }
1067 
1068         pprevStartCode = prevStartCode;
1069         prevStartCode = currentStartCode;
1070         currentStartCode = data[offset + 3];
1071 
1072         if (currentStartCode == 0xb3 && mFormat == NULL) {
1073             memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1074             size -= offset;
1075             (void)fetchTimestamp(offset);
1076             offset = 0;
1077             mBuffer->setRange(0, size);
1078         }
1079 
1080         if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1081                 || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1082             // seqHeader without/with extension
1083 
1084             if (mFormat == NULL) {
1085                 if (size < 7u) {
1086                     ALOGE("Size too small");
1087                     return NULL;
1088                 }
1089 
1090                 unsigned width =
1091                     (data[4] << 4) | data[5] >> 4;
1092 
1093                 unsigned height =
1094                     ((data[5] & 0x0f) << 8) | data[6];
1095 
1096                 mFormat = new MetaData;
1097                 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1098                 mFormat->setInt32(kKeyWidth, width);
1099                 mFormat->setInt32(kKeyHeight, height);
1100 
1101                 ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1102 
1103                 sp<ABuffer> csd = new ABuffer(offset);
1104                 memcpy(csd->data(), data, offset);
1105 
1106                 memmove(mBuffer->data(),
1107                         mBuffer->data() + offset,
1108                         mBuffer->size() - offset);
1109 
1110                 mBuffer->setRange(0, mBuffer->size() - offset);
1111                 size -= offset;
1112                 (void)fetchTimestamp(offset);
1113                 offset = 0;
1114 
1115                 // hexdump(csd->data(), csd->size());
1116 
1117                 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1118                 mFormat->setData(
1119                         kKeyESDS, kTypeESDS, esds->data(), esds->size());
1120 
1121                 return NULL;
1122             }
1123         }
1124 
1125         if (mFormat != NULL && currentStartCode == 0xb8) {
1126             // GOP layer
1127             if (offset + 7 >= size) {
1128                 ALOGE("Size too small");
1129                 return NULL;
1130             }
1131             gopFound = true;
1132             isClosedGop = (data[offset + 7] & 0x40) != 0;
1133             brokenLink = (data[offset + 7] & 0x20) != 0;
1134         }
1135 
1136         if (mFormat != NULL && currentStartCode == 0xb2) {
1137             userDataPositions.add(offset);
1138         }
1139 
1140         if (mFormat != NULL && currentStartCode == 0x00) {
1141             // Picture start
1142 
1143             if (!sawPictureStart) {
1144                 sawPictureStart = true;
1145             } else {
1146                 sp<ABuffer> accessUnit = new ABuffer(offset);
1147                 memcpy(accessUnit->data(), data, offset);
1148 
1149                 memmove(mBuffer->data(),
1150                         mBuffer->data() + offset,
1151                         mBuffer->size() - offset);
1152 
1153                 mBuffer->setRange(0, mBuffer->size() - offset);
1154 
1155                 int64_t timeUs = fetchTimestamp(offset);
1156                 if (timeUs < 0ll) {
1157                     ALOGE("Negative timeUs");
1158                     return NULL;
1159                 }
1160 
1161                 offset = 0;
1162 
1163                 accessUnit->meta()->setInt64("timeUs", timeUs);
1164                 if (gopFound && (!brokenLink || isClosedGop)) {
1165                     accessUnit->meta()->setInt32("isSync", 1);
1166                 }
1167 
1168                 ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1169                       timeUs);
1170 
1171                 // hexdump(accessUnit->data(), accessUnit->size());
1172 
1173                 if (userDataPositions.size() > 0) {
1174                     sp<ABuffer> mpegUserData =
1175                         new ABuffer(userDataPositions.size() * sizeof(size_t));
1176                     if (mpegUserData != NULL && mpegUserData->data() != NULL) {
1177                         for (size_t i = 0; i < userDataPositions.size(); ++i) {
1178                             memcpy(
1179                                     mpegUserData->data() + i * sizeof(size_t),
1180                                     &userDataPositions[i], sizeof(size_t));
1181                         }
1182                         accessUnit->meta()->setBuffer("mpegUserData", mpegUserData);
1183                     }
1184                 }
1185 
1186                 return accessUnit;
1187             }
1188         }
1189 
1190         ++offset;
1191     }
1192 
1193     return NULL;
1194 }
1195 
getNextChunkSize(const uint8_t * data,size_t size)1196 static ssize_t getNextChunkSize(
1197         const uint8_t *data, size_t size) {
1198     static const char kStartCode[] = "\x00\x00\x01";
1199 
1200     if (size < 3) {
1201         return -EAGAIN;
1202     }
1203 
1204     if (memcmp(kStartCode, data, 3)) {
1205         return -EAGAIN;
1206     }
1207 
1208     size_t offset = 3;
1209     while (offset + 2 < size) {
1210         if (!memcmp(&data[offset], kStartCode, 3)) {
1211             return offset;
1212         }
1213 
1214         ++offset;
1215     }
1216 
1217     return -EAGAIN;
1218 }
1219 
dequeueAccessUnitMPEG4Video()1220 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1221     uint8_t *data = mBuffer->data();
1222     size_t size = mBuffer->size();
1223 
1224     enum {
1225         SKIP_TO_VISUAL_OBJECT_SEQ_START,
1226         EXPECT_VISUAL_OBJECT_START,
1227         EXPECT_VO_START,
1228         EXPECT_VOL_START,
1229         WAIT_FOR_VOP_START,
1230         SKIP_TO_VOP_START,
1231 
1232     } state;
1233 
1234     if (mFormat == NULL) {
1235         state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1236     } else {
1237         state = SKIP_TO_VOP_START;
1238     }
1239 
1240     int32_t width = -1, height = -1;
1241 
1242     size_t offset = 0;
1243     ssize_t chunkSize;
1244     while ((chunkSize = getNextChunkSize(
1245                     &data[offset], size - offset)) > 0) {
1246         bool discard = false;
1247 
1248         unsigned chunkType = data[offset + 3];
1249 
1250         switch (state) {
1251             case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1252             {
1253                 if (chunkType == 0xb0) {
1254                     // Discard anything before this marker.
1255 
1256                     state = EXPECT_VISUAL_OBJECT_START;
1257                 } else {
1258                     discard = true;
1259                 }
1260                 break;
1261             }
1262 
1263             case EXPECT_VISUAL_OBJECT_START:
1264             {
1265                 if (chunkType != 0xb5) {
1266                     ALOGE("Unexpected chunkType");
1267                     return NULL;
1268                 }
1269                 state = EXPECT_VO_START;
1270                 break;
1271             }
1272 
1273             case EXPECT_VO_START:
1274             {
1275                 if (chunkType > 0x1f) {
1276                     ALOGE("Unexpected chunkType");
1277                     return NULL;
1278                 }
1279                 state = EXPECT_VOL_START;
1280                 break;
1281             }
1282 
1283             case EXPECT_VOL_START:
1284             {
1285                 if ((chunkType & 0xf0) != 0x20) {
1286                     ALOGE("Wrong chunkType");
1287                     return NULL;
1288                 }
1289 
1290                 if (!ExtractDimensionsFromVOLHeader(
1291                             &data[offset], chunkSize,
1292                             &width, &height)) {
1293                     ALOGE("Failed to get dimension");
1294                     return NULL;
1295                 }
1296 
1297                 state = WAIT_FOR_VOP_START;
1298                 break;
1299             }
1300 
1301             case WAIT_FOR_VOP_START:
1302             {
1303                 if (chunkType == 0xb3 || chunkType == 0xb6) {
1304                     // group of VOP or VOP start.
1305 
1306                     mFormat = new MetaData;
1307                     mFormat->setCString(
1308                             kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1309 
1310                     mFormat->setInt32(kKeyWidth, width);
1311                     mFormat->setInt32(kKeyHeight, height);
1312 
1313                     ALOGI("found MPEG4 video codec config (%d x %d)",
1314                          width, height);
1315 
1316                     sp<ABuffer> csd = new ABuffer(offset);
1317                     memcpy(csd->data(), data, offset);
1318 
1319                     // hexdump(csd->data(), csd->size());
1320 
1321                     sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1322                     mFormat->setData(
1323                             kKeyESDS, kTypeESDS,
1324                             esds->data(), esds->size());
1325 
1326                     discard = true;
1327                     state = SKIP_TO_VOP_START;
1328                 }
1329 
1330                 break;
1331             }
1332 
1333             case SKIP_TO_VOP_START:
1334             {
1335                 if (chunkType == 0xb6) {
1336                     int vopCodingType = (data[offset + 4] & 0xc0) >> 6;
1337 
1338                     offset += chunkSize;
1339 
1340                     sp<ABuffer> accessUnit = new ABuffer(offset);
1341                     memcpy(accessUnit->data(), data, offset);
1342 
1343                     memmove(data, &data[offset], size - offset);
1344                     size -= offset;
1345                     mBuffer->setRange(0, size);
1346 
1347                     int64_t timeUs = fetchTimestamp(offset);
1348                     if (timeUs < 0ll) {
1349                         ALOGE("Negative timeus");
1350                         return NULL;
1351                     }
1352 
1353                     offset = 0;
1354 
1355                     accessUnit->meta()->setInt64("timeUs", timeUs);
1356                     if (vopCodingType == 0) {  // intra-coded VOP
1357                         accessUnit->meta()->setInt32("isSync", 1);
1358                     }
1359 
1360                     ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1361                          timeUs);
1362 
1363                     // hexdump(accessUnit->data(), accessUnit->size());
1364 
1365                     return accessUnit;
1366                 } else if (chunkType != 0xb3) {
1367                     offset += chunkSize;
1368                     discard = true;
1369                 }
1370 
1371                 break;
1372             }
1373 
1374             default:
1375                 ALOGE("Unknown state: %d", state);
1376                 return NULL;
1377         }
1378 
1379         if (discard) {
1380             (void)fetchTimestamp(offset);
1381             memmove(data, &data[offset], size - offset);
1382             size -= offset;
1383             offset = 0;
1384             mBuffer->setRange(0, size);
1385         } else {
1386             offset += chunkSize;
1387         }
1388     }
1389 
1390     return NULL;
1391 }
1392 
signalEOS()1393 void ElementaryStreamQueue::signalEOS() {
1394     if (!mEOSReached) {
1395         if (mMode == MPEG_VIDEO) {
1396             const char *theEnd = "\x00\x00\x01\x00";
1397             appendData(theEnd, 4, 0);
1398         }
1399         mEOSReached = true;
1400     } else {
1401         ALOGW("EOS already signaled");
1402     }
1403 }
1404 
dequeueAccessUnitMetadata()1405 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMetadata() {
1406     size_t size = mBuffer->size();
1407     if (!size) {
1408         return NULL;
1409     }
1410 
1411     sp<ABuffer> accessUnit = new ABuffer(size);
1412     int64_t timeUs = fetchTimestamp(size);
1413     accessUnit->meta()->setInt64("timeUs", timeUs);
1414 
1415     memcpy(accessUnit->data(), mBuffer->data(), size);
1416     mBuffer->setRange(0, 0);
1417 
1418     if (mFormat == NULL) {
1419         mFormat = new MetaData;
1420         mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_DATA_TIMED_ID3);
1421     }
1422 
1423     return accessUnit;
1424 }
1425 
1426 }  // namespace android
1427