1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MP3Extractor"
19 #include <utils/Log.h>
20
21 #include "MP3Extractor.h"
22
23 #include "ID3.h"
24 #include "VBRISeeker.h"
25 #include "XINGSeeker.h"
26
27 #include <media/DataSourceBase.h>
28 #include <media/MediaTrack.h>
29 #include <media/stagefright/foundation/ADebug.h>
30 #include <media/stagefright/foundation/AMessage.h>
31 #include <media/stagefright/foundation/avc_utils.h>
32 #include <media/stagefright/foundation/ByteUtils.h>
33 #include <media/stagefright/MediaBufferBase.h>
34 #include <media/stagefright/MediaBufferGroup.h>
35 #include <media/stagefright/MediaDefs.h>
36 #include <media/stagefright/MediaErrors.h>
37 #include <media/stagefright/MetaData.h>
38 #include <utils/String8.h>
39
40 namespace android {
41
42 // Everything must match except for
43 // protection, bitrate, padding, private bits, mode, mode extension,
44 // copyright bit, original bit and emphasis.
45 // Yes ... there are things that must indeed match...
46 static const uint32_t kMask = 0xfffe0c00;
47
Resync(DataSourceBase * source,uint32_t match_header,off64_t * inout_pos,off64_t * post_id3_pos,uint32_t * out_header)48 static bool Resync(
49 DataSourceBase *source, uint32_t match_header,
50 off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
51 if (post_id3_pos != NULL) {
52 *post_id3_pos = 0;
53 }
54
55 if (*inout_pos == 0) {
56 // Skip an optional ID3 header if syncing at the very beginning
57 // of the datasource.
58
59 for (;;) {
60 uint8_t id3header[10];
61 if (source->readAt(*inout_pos, id3header, sizeof(id3header))
62 < (ssize_t)sizeof(id3header)) {
63 // If we can't even read these 10 bytes, we might as well bail
64 // out, even if there _were_ 10 bytes of valid mp3 audio data...
65 return false;
66 }
67
68 if (memcmp("ID3", id3header, 3)) {
69 break;
70 }
71
72 // Skip the ID3v2 header.
73
74 size_t len =
75 ((id3header[6] & 0x7f) << 21)
76 | ((id3header[7] & 0x7f) << 14)
77 | ((id3header[8] & 0x7f) << 7)
78 | (id3header[9] & 0x7f);
79
80 len += 10;
81
82 *inout_pos += len;
83
84 ALOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)",
85 (long long)*inout_pos, (long long)*inout_pos);
86 }
87
88 if (post_id3_pos != NULL) {
89 *post_id3_pos = *inout_pos;
90 }
91 }
92
93 off64_t pos = *inout_pos;
94 bool valid = false;
95
96 const size_t kMaxReadBytes = 1024;
97 const size_t kMaxBytesChecked = 128 * 1024;
98 uint8_t buf[kMaxReadBytes];
99 ssize_t bytesToRead = kMaxReadBytes;
100 ssize_t totalBytesRead = 0;
101 ssize_t remainingBytes = 0;
102 bool reachEOS = false;
103 uint8_t *tmp = buf;
104
105 do {
106 if (pos >= (off64_t)(*inout_pos + kMaxBytesChecked)) {
107 // Don't scan forever.
108 ALOGV("giving up at offset %lld", (long long)pos);
109 break;
110 }
111
112 if (remainingBytes < 4) {
113 if (reachEOS) {
114 break;
115 } else {
116 memcpy(buf, tmp, remainingBytes);
117 bytesToRead = kMaxReadBytes - remainingBytes;
118
119 /*
120 * The next read position should start from the end of
121 * the last buffer, and thus should include the remaining
122 * bytes in the buffer.
123 */
124 totalBytesRead = source->readAt(pos + remainingBytes,
125 buf + remainingBytes,
126 bytesToRead);
127 if (totalBytesRead <= 0) {
128 break;
129 }
130 reachEOS = (totalBytesRead != bytesToRead);
131 totalBytesRead += remainingBytes;
132 remainingBytes = totalBytesRead;
133 tmp = buf;
134 continue;
135 }
136 }
137
138 uint32_t header = U32_AT(tmp);
139
140 if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
141 ++pos;
142 ++tmp;
143 --remainingBytes;
144 continue;
145 }
146
147 size_t frame_size;
148 int sample_rate, num_channels, bitrate;
149 if (!GetMPEGAudioFrameSize(
150 header, &frame_size,
151 &sample_rate, &num_channels, &bitrate)) {
152 ++pos;
153 ++tmp;
154 --remainingBytes;
155 continue;
156 }
157
158 ALOGV("found possible 1st frame at %lld (header = 0x%08x)", (long long)pos, header);
159
160 // We found what looks like a valid frame,
161 // now find its successors.
162
163 off64_t test_pos = pos + frame_size;
164
165 valid = true;
166 for (int j = 0; j < 3; ++j) {
167 uint8_t tmp[4];
168 if (source->readAt(test_pos, tmp, 4) < 4) {
169 valid = false;
170 break;
171 }
172
173 uint32_t test_header = U32_AT(tmp);
174
175 ALOGV("subsequent header is %08x", test_header);
176
177 if ((test_header & kMask) != (header & kMask)) {
178 valid = false;
179 break;
180 }
181
182 size_t test_frame_size;
183 if (!GetMPEGAudioFrameSize(
184 test_header, &test_frame_size)) {
185 valid = false;
186 break;
187 }
188
189 ALOGV("found subsequent frame #%d at %lld", j + 2, (long long)test_pos);
190
191 test_pos += test_frame_size;
192 }
193
194 if (valid) {
195 *inout_pos = pos;
196
197 if (out_header != NULL) {
198 *out_header = header;
199 }
200 } else {
201 ALOGV("no dice, no valid sequence of frames found.");
202 }
203
204 ++pos;
205 ++tmp;
206 --remainingBytes;
207 } while (!valid);
208
209 return valid;
210 }
211
212 class MP3Source : public MediaTrack {
213 public:
214 MP3Source(
215 MetaDataBase &meta, DataSourceBase *source,
216 off64_t first_frame_pos, uint32_t fixed_header,
217 MP3Seeker *seeker);
218
219 virtual status_t start(MetaDataBase *params = NULL);
220 virtual status_t stop();
221
222 virtual status_t getFormat(MetaDataBase &meta);
223
224 virtual status_t read(
225 MediaBufferBase **buffer, const ReadOptions *options = NULL);
226
227 protected:
228 virtual ~MP3Source();
229
230 private:
231 static const size_t kMaxFrameSize;
232 MetaDataBase &mMeta;
233 DataSourceBase *mDataSource;
234 off64_t mFirstFramePos;
235 uint32_t mFixedHeader;
236 off64_t mCurrentPos;
237 int64_t mCurrentTimeUs;
238 bool mStarted;
239 MP3Seeker *mSeeker;
240 MediaBufferGroup *mGroup;
241
242 int64_t mBasisTimeUs;
243 int64_t mSamplesRead;
244
245 MP3Source(const MP3Source &);
246 MP3Source &operator=(const MP3Source &);
247 };
248
249 struct Mp3Meta {
250 off64_t pos;
251 off64_t post_id3_pos;
252 uint32_t header;
253 };
254
MP3Extractor(DataSourceBase * source,Mp3Meta * meta)255 MP3Extractor::MP3Extractor(
256 DataSourceBase *source, Mp3Meta *meta)
257 : mInitCheck(NO_INIT),
258 mDataSource(source),
259 mFirstFramePos(-1),
260 mFixedHeader(0),
261 mSeeker(NULL) {
262
263 off64_t pos = 0;
264 off64_t post_id3_pos;
265 uint32_t header;
266 bool success;
267
268 if (meta != NULL) {
269 // The sniffer has already done all the hard work for us, simply
270 // accept its judgement.
271 pos = meta->pos;
272 header = meta->header;
273 post_id3_pos = meta->post_id3_pos;
274 success = true;
275 } else {
276 success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
277 }
278
279 if (!success) {
280 // mInitCheck will remain NO_INIT
281 return;
282 }
283
284 mFirstFramePos = pos;
285 mFixedHeader = header;
286 XINGSeeker *seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
287
288 if (seeker == NULL) {
289 mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
290 } else {
291 mSeeker = seeker;
292 int encd = seeker->getEncoderDelay();
293 int encp = seeker->getEncoderPadding();
294 if (encd != 0 || encp != 0) {
295 mMeta.setInt32(kKeyEncoderDelay, encd);
296 mMeta.setInt32(kKeyEncoderPadding, encp);
297 }
298 }
299
300 if (mSeeker != NULL) {
301 // While it is safe to send the XING/VBRI frame to the decoder, this will
302 // result in an extra 1152 samples being output. In addition, the bitrate
303 // of the Xing header might not match the rest of the file, which could
304 // lead to problems when seeking. The real first frame to decode is after
305 // the XING/VBRI frame, so skip there.
306 size_t frame_size;
307 int sample_rate;
308 int num_channels;
309 int bitrate;
310 GetMPEGAudioFrameSize(
311 header, &frame_size, &sample_rate, &num_channels, &bitrate);
312 pos += frame_size;
313 if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) {
314 // mInitCheck will remain NO_INIT
315 return;
316 }
317 mFirstFramePos = pos;
318 mFixedHeader = header;
319 }
320
321 size_t frame_size;
322 int sample_rate;
323 int num_channels;
324 int bitrate;
325 GetMPEGAudioFrameSize(
326 header, &frame_size, &sample_rate, &num_channels, &bitrate);
327
328 unsigned layer = 4 - ((header >> 17) & 3);
329
330 switch (layer) {
331 case 1:
332 mMeta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
333 break;
334 case 2:
335 mMeta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
336 break;
337 case 3:
338 mMeta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
339 break;
340 default:
341 TRESPASS();
342 }
343
344 mMeta.setInt32(kKeySampleRate, sample_rate);
345 mMeta.setInt32(kKeyBitRate, bitrate * 1000);
346 mMeta.setInt32(kKeyChannelCount, num_channels);
347
348 int64_t durationUs;
349
350 if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
351 off64_t fileSize;
352 if (mDataSource->getSize(&fileSize) == OK) {
353 off64_t dataLength = fileSize - mFirstFramePos;
354 if (dataLength > INT64_MAX / 8000LL) {
355 // duration would overflow
356 durationUs = INT64_MAX;
357 } else {
358 durationUs = 8000LL * dataLength / bitrate;
359 }
360 } else {
361 durationUs = -1;
362 }
363 }
364
365 if (durationUs >= 0) {
366 mMeta.setInt64(kKeyDuration, durationUs);
367 }
368
369 mInitCheck = OK;
370
371 // Get iTunes-style gapless info if present.
372 // When getting the id3 tag, skip the V1 tags to prevent the source cache
373 // from being iterated to the end of the file.
374 ID3 id3(mDataSource, true);
375 if (id3.isValid()) {
376 ID3::Iterator *com = new ID3::Iterator(id3, "COM");
377 if (com->done()) {
378 delete com;
379 com = new ID3::Iterator(id3, "COMM");
380 }
381 while(!com->done()) {
382 String8 commentdesc;
383 String8 commentvalue;
384 com->getString(&commentdesc, &commentvalue);
385 const char * desc = commentdesc.string();
386 const char * value = commentvalue.string();
387
388 // first 3 characters are the language, which we don't care about
389 if(strlen(desc) > 3 && strcmp(desc + 3, "iTunSMPB") == 0) {
390
391 int32_t delay, padding;
392 if (sscanf(value, " %*x %x %x %*x", &delay, &padding) == 2) {
393 mMeta.setInt32(kKeyEncoderDelay, delay);
394 mMeta.setInt32(kKeyEncoderPadding, padding);
395 }
396 break;
397 }
398 com->next();
399 }
400 delete com;
401 com = NULL;
402 }
403 }
404
~MP3Extractor()405 MP3Extractor::~MP3Extractor() {
406 delete mSeeker;
407 }
408
countTracks()409 size_t MP3Extractor::countTracks() {
410 return mInitCheck != OK ? 0 : 1;
411 }
412
getTrack(size_t index)413 MediaTrack *MP3Extractor::getTrack(size_t index) {
414 if (mInitCheck != OK || index != 0) {
415 return NULL;
416 }
417
418 return new MP3Source(
419 mMeta, mDataSource, mFirstFramePos, mFixedHeader,
420 mSeeker);
421 }
422
getTrackMetaData(MetaDataBase & meta,size_t index,uint32_t)423 status_t MP3Extractor::getTrackMetaData(
424 MetaDataBase &meta,
425 size_t index, uint32_t /* flags */) {
426 if (mInitCheck != OK || index != 0) {
427 return UNKNOWN_ERROR;
428 }
429 meta = mMeta;
430 return OK;
431 }
432
433 ////////////////////////////////////////////////////////////////////////////////
434
435 // The theoretical maximum frame size for an MPEG audio stream should occur
436 // while playing a Layer 2, MPEGv2.5 audio stream at 160kbps (with padding).
437 // The size of this frame should be...
438 // ((1152 samples/frame * 160000 bits/sec) /
439 // (8000 samples/sec * 8 bits/byte)) + 1 padding byte/frame = 2881 bytes/frame.
440 // Set our max frame size to the nearest power of 2 above this size (aka, 4kB)
441 const size_t MP3Source::kMaxFrameSize = (1 << 12); /* 4096 bytes */
MP3Source(MetaDataBase & meta,DataSourceBase * source,off64_t first_frame_pos,uint32_t fixed_header,MP3Seeker * seeker)442 MP3Source::MP3Source(
443 MetaDataBase &meta, DataSourceBase *source,
444 off64_t first_frame_pos, uint32_t fixed_header,
445 MP3Seeker *seeker)
446 : mMeta(meta),
447 mDataSource(source),
448 mFirstFramePos(first_frame_pos),
449 mFixedHeader(fixed_header),
450 mCurrentPos(0),
451 mCurrentTimeUs(0),
452 mStarted(false),
453 mSeeker(seeker),
454 mGroup(NULL),
455 mBasisTimeUs(0),
456 mSamplesRead(0) {
457 }
458
~MP3Source()459 MP3Source::~MP3Source() {
460 if (mStarted) {
461 stop();
462 }
463 }
464
start(MetaDataBase *)465 status_t MP3Source::start(MetaDataBase *) {
466 CHECK(!mStarted);
467
468 mGroup = new MediaBufferGroup;
469
470 mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize));
471
472 mCurrentPos = mFirstFramePos;
473 mCurrentTimeUs = 0;
474
475 mBasisTimeUs = mCurrentTimeUs;
476 mSamplesRead = 0;
477
478 mStarted = true;
479
480 return OK;
481 }
482
stop()483 status_t MP3Source::stop() {
484 CHECK(mStarted);
485
486 delete mGroup;
487 mGroup = NULL;
488
489 mStarted = false;
490
491 return OK;
492 }
493
getFormat(MetaDataBase & meta)494 status_t MP3Source::getFormat(MetaDataBase &meta) {
495 meta = mMeta;
496 return OK;
497 }
498
read(MediaBufferBase ** out,const ReadOptions * options)499 status_t MP3Source::read(
500 MediaBufferBase **out, const ReadOptions *options) {
501 *out = NULL;
502
503 int64_t seekTimeUs;
504 ReadOptions::SeekMode mode;
505 bool seekCBR = false;
506
507 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
508 int64_t actualSeekTimeUs = seekTimeUs;
509 if (mSeeker == NULL
510 || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
511 int32_t bitrate;
512 if (!mMeta.findInt32(kKeyBitRate, &bitrate)) {
513 // bitrate is in bits/sec.
514 ALOGI("no bitrate");
515
516 return ERROR_UNSUPPORTED;
517 }
518
519 mCurrentTimeUs = seekTimeUs;
520 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
521 seekCBR = true;
522 } else {
523 mCurrentTimeUs = actualSeekTimeUs;
524 }
525
526 mBasisTimeUs = mCurrentTimeUs;
527 mSamplesRead = 0;
528 }
529
530 MediaBufferBase *buffer;
531 status_t err = mGroup->acquire_buffer(&buffer);
532 if (err != OK) {
533 return err;
534 }
535
536 size_t frame_size;
537 int bitrate;
538 int num_samples;
539 int sample_rate;
540 for (;;) {
541 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
542 if (n < 4) {
543 buffer->release();
544 buffer = NULL;
545
546 return (n < 0 ? n : ERROR_END_OF_STREAM);
547 }
548
549 uint32_t header = U32_AT((const uint8_t *)buffer->data());
550
551 if ((header & kMask) == (mFixedHeader & kMask)
552 && GetMPEGAudioFrameSize(
553 header, &frame_size, &sample_rate, NULL,
554 &bitrate, &num_samples)) {
555
556 // re-calculate mCurrentTimeUs because we might have called Resync()
557 if (seekCBR) {
558 mCurrentTimeUs = (mCurrentPos - mFirstFramePos) * 8000 / bitrate;
559 mBasisTimeUs = mCurrentTimeUs;
560 }
561
562 break;
563 }
564
565 // Lost sync.
566 ALOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
567
568 off64_t pos = mCurrentPos;
569 if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
570 ALOGE("Unable to resync. Signalling end of stream.");
571
572 buffer->release();
573 buffer = NULL;
574
575 return ERROR_END_OF_STREAM;
576 }
577
578 mCurrentPos = pos;
579
580 // Try again with the new position.
581 }
582
583 CHECK(frame_size <= buffer->size());
584
585 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
586 if (n < (ssize_t)frame_size) {
587 buffer->release();
588 buffer = NULL;
589
590 return (n < 0 ? n : ERROR_END_OF_STREAM);
591 }
592
593 buffer->set_range(0, frame_size);
594
595 buffer->meta_data().setInt64(kKeyTime, mCurrentTimeUs);
596 buffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
597
598 mCurrentPos += frame_size;
599
600 mSamplesRead += num_samples;
601 mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate);
602
603 *out = buffer;
604
605 return OK;
606 }
607
getMetaData(MetaDataBase & meta)608 status_t MP3Extractor::getMetaData(MetaDataBase &meta) {
609 meta.clear();
610 if (mInitCheck != OK) {
611 return UNKNOWN_ERROR;
612 }
613 meta.setCString(kKeyMIMEType, "audio/mpeg");
614
615 ID3 id3(mDataSource);
616
617 if (!id3.isValid()) {
618 return OK;
619 }
620
621 struct Map {
622 int key;
623 const char *tag1;
624 const char *tag2;
625 };
626 static const Map kMap[] = {
627 { kKeyAlbum, "TALB", "TAL" },
628 { kKeyArtist, "TPE1", "TP1" },
629 { kKeyAlbumArtist, "TPE2", "TP2" },
630 { kKeyComposer, "TCOM", "TCM" },
631 { kKeyGenre, "TCON", "TCO" },
632 { kKeyTitle, "TIT2", "TT2" },
633 { kKeyYear, "TYE", "TYER" },
634 { kKeyAuthor, "TXT", "TEXT" },
635 { kKeyCDTrackNumber, "TRK", "TRCK" },
636 { kKeyDiscNumber, "TPA", "TPOS" },
637 { kKeyCompilation, "TCP", "TCMP" },
638 };
639 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
640
641 for (size_t i = 0; i < kNumMapEntries; ++i) {
642 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
643 if (it->done()) {
644 delete it;
645 it = new ID3::Iterator(id3, kMap[i].tag2);
646 }
647
648 if (it->done()) {
649 delete it;
650 continue;
651 }
652
653 String8 s;
654 it->getString(&s);
655 delete it;
656
657 meta.setCString(kMap[i].key, s);
658 }
659
660 size_t dataSize;
661 String8 mime;
662 const void *data = id3.getAlbumArt(&dataSize, &mime);
663
664 if (data) {
665 meta.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
666 meta.setCString(kKeyAlbumArtMIME, mime.string());
667 }
668
669 return OK;
670 }
671
CreateExtractor(DataSourceBase * source,void * meta)672 static MediaExtractor* CreateExtractor(
673 DataSourceBase *source,
674 void *meta) {
675 Mp3Meta *metaData = static_cast<Mp3Meta *>(meta);
676 return new MP3Extractor(source, metaData);
677 }
678
Sniff(DataSourceBase * source,float * confidence,void ** meta,MediaExtractor::FreeMetaFunc * freeMeta)679 static MediaExtractor::CreatorFunc Sniff(
680 DataSourceBase *source, float *confidence, void **meta,
681 MediaExtractor::FreeMetaFunc *freeMeta) {
682 off64_t pos = 0;
683 off64_t post_id3_pos;
684 uint32_t header;
685 uint8_t mpeg_header[5];
686 if (source->readAt(0, mpeg_header, sizeof(mpeg_header)) < (ssize_t)sizeof(mpeg_header)) {
687 return NULL;
688 }
689
690 if (!memcmp("\x00\x00\x01\xba", mpeg_header, 4) && (mpeg_header[4] >> 4) == 2) {
691 ALOGV("MPEG1PS container is not supported!");
692 return NULL;
693 }
694 if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
695 return NULL;
696 }
697
698 Mp3Meta *mp3Meta = new Mp3Meta;
699 mp3Meta->pos = pos;
700 mp3Meta->header = header;
701 mp3Meta->post_id3_pos = post_id3_pos;
702 *meta = mp3Meta;
703 *freeMeta = ::free;
704
705 *confidence = 0.2f;
706
707 return CreateExtractor;
708 }
709
710 extern "C" {
711 // This is the only symbol that needs to be exported
712 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()713 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
714 return {
715 MediaExtractor::EXTRACTORDEF_VERSION,
716 UUID("812a3f6c-c8cf-46de-b529-3774b14103d4"),
717 1, // version
718 "MP3 Extractor",
719 Sniff
720 };
721 }
722
723 } // extern "C"
724
725 } // namespace android
726