1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MP3Extractor"
19 #include <utils/Log.h>
20 
21 #include "include/MP3Extractor.h"
22 
23 #include "include/avc_utils.h"
24 #include "include/ID3.h"
25 #include "include/VBRISeeker.h"
26 #include "include/XINGSeeker.h"
27 
28 #include <media/stagefright/foundation/ADebug.h>
29 #include <media/stagefright/foundation/AMessage.h>
30 #include <media/stagefright/DataSource.h>
31 #include <media/stagefright/MediaBuffer.h>
32 #include <media/stagefright/MediaBufferGroup.h>
33 #include <media/stagefright/MediaDefs.h>
34 #include <media/stagefright/MediaErrors.h>
35 #include <media/stagefright/MediaSource.h>
36 #include <media/stagefright/MetaData.h>
37 #include <media/stagefright/Utils.h>
38 #include <utils/String8.h>
39 
40 namespace android {
41 
42 // Everything must match except for
43 // protection, bitrate, padding, private bits, mode, mode extension,
44 // copyright bit, original bit and emphasis.
45 // Yes ... there are things that must indeed match...
46 static const uint32_t kMask = 0xfffe0c00;
47 
Resync(const sp<DataSource> & source,uint32_t match_header,off64_t * inout_pos,off64_t * post_id3_pos,uint32_t * out_header)48 static bool Resync(
49         const sp<DataSource> &source, uint32_t match_header,
50         off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
51     if (post_id3_pos != NULL) {
52         *post_id3_pos = 0;
53     }
54 
55     if (*inout_pos == 0) {
56         // Skip an optional ID3 header if syncing at the very beginning
57         // of the datasource.
58 
59         for (;;) {
60             uint8_t id3header[10];
61             if (source->readAt(*inout_pos, id3header, sizeof(id3header))
62                     < (ssize_t)sizeof(id3header)) {
63                 // If we can't even read these 10 bytes, we might as well bail
64                 // out, even if there _were_ 10 bytes of valid mp3 audio data...
65                 return false;
66             }
67 
68             if (memcmp("ID3", id3header, 3)) {
69                 break;
70             }
71 
72             // Skip the ID3v2 header.
73 
74             size_t len =
75                 ((id3header[6] & 0x7f) << 21)
76                 | ((id3header[7] & 0x7f) << 14)
77                 | ((id3header[8] & 0x7f) << 7)
78                 | (id3header[9] & 0x7f);
79 
80             len += 10;
81 
82             *inout_pos += len;
83 
84             ALOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)",
85                     (long long)*inout_pos, (long long)*inout_pos);
86         }
87 
88         if (post_id3_pos != NULL) {
89             *post_id3_pos = *inout_pos;
90         }
91     }
92 
93     off64_t pos = *inout_pos;
94     bool valid = false;
95 
96     const size_t kMaxReadBytes = 1024;
97     const size_t kMaxBytesChecked = 128 * 1024;
98     uint8_t buf[kMaxReadBytes];
99     ssize_t bytesToRead = kMaxReadBytes;
100     ssize_t totalBytesRead = 0;
101     ssize_t remainingBytes = 0;
102     bool reachEOS = false;
103     uint8_t *tmp = buf;
104 
105     do {
106         if (pos >= (off64_t)(*inout_pos + kMaxBytesChecked)) {
107             // Don't scan forever.
108             ALOGV("giving up at offset %lld", (long long)pos);
109             break;
110         }
111 
112         if (remainingBytes < 4) {
113             if (reachEOS) {
114                 break;
115             } else {
116                 memcpy(buf, tmp, remainingBytes);
117                 bytesToRead = kMaxReadBytes - remainingBytes;
118 
119                 /*
120                  * The next read position should start from the end of
121                  * the last buffer, and thus should include the remaining
122                  * bytes in the buffer.
123                  */
124                 totalBytesRead = source->readAt(pos + remainingBytes,
125                                                 buf + remainingBytes,
126                                                 bytesToRead);
127                 if (totalBytesRead <= 0) {
128                     break;
129                 }
130                 reachEOS = (totalBytesRead != bytesToRead);
131                 totalBytesRead += remainingBytes;
132                 remainingBytes = totalBytesRead;
133                 tmp = buf;
134                 continue;
135             }
136         }
137 
138         uint32_t header = U32_AT(tmp);
139 
140         if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
141             ++pos;
142             ++tmp;
143             --remainingBytes;
144             continue;
145         }
146 
147         size_t frame_size;
148         int sample_rate, num_channels, bitrate;
149         if (!GetMPEGAudioFrameSize(
150                     header, &frame_size,
151                     &sample_rate, &num_channels, &bitrate)) {
152             ++pos;
153             ++tmp;
154             --remainingBytes;
155             continue;
156         }
157 
158         ALOGV("found possible 1st frame at %lld (header = 0x%08x)", (long long)pos, header);
159 
160         // We found what looks like a valid frame,
161         // now find its successors.
162 
163         off64_t test_pos = pos + frame_size;
164 
165         valid = true;
166         for (int j = 0; j < 3; ++j) {
167             uint8_t tmp[4];
168             if (source->readAt(test_pos, tmp, 4) < 4) {
169                 valid = false;
170                 break;
171             }
172 
173             uint32_t test_header = U32_AT(tmp);
174 
175             ALOGV("subsequent header is %08x", test_header);
176 
177             if ((test_header & kMask) != (header & kMask)) {
178                 valid = false;
179                 break;
180             }
181 
182             size_t test_frame_size;
183             if (!GetMPEGAudioFrameSize(
184                         test_header, &test_frame_size)) {
185                 valid = false;
186                 break;
187             }
188 
189             ALOGV("found subsequent frame #%d at %lld", j + 2, (long long)test_pos);
190 
191             test_pos += test_frame_size;
192         }
193 
194         if (valid) {
195             *inout_pos = pos;
196 
197             if (out_header != NULL) {
198                 *out_header = header;
199             }
200         } else {
201             ALOGV("no dice, no valid sequence of frames found.");
202         }
203 
204         ++pos;
205         ++tmp;
206         --remainingBytes;
207     } while (!valid);
208 
209     return valid;
210 }
211 
212 class MP3Source : public MediaSource {
213 public:
214     MP3Source(
215             const sp<MetaData> &meta, const sp<DataSource> &source,
216             off64_t first_frame_pos, uint32_t fixed_header,
217             const sp<MP3Seeker> &seeker);
218 
219     virtual status_t start(MetaData *params = NULL);
220     virtual status_t stop();
221 
222     virtual sp<MetaData> getFormat();
223 
224     virtual status_t read(
225             MediaBuffer **buffer, const ReadOptions *options = NULL);
226 
227 protected:
228     virtual ~MP3Source();
229 
230 private:
231     static const size_t kMaxFrameSize;
232     sp<MetaData> mMeta;
233     sp<DataSource> mDataSource;
234     off64_t mFirstFramePos;
235     uint32_t mFixedHeader;
236     off64_t mCurrentPos;
237     int64_t mCurrentTimeUs;
238     bool mStarted;
239     sp<MP3Seeker> mSeeker;
240     MediaBufferGroup *mGroup;
241 
242     int64_t mBasisTimeUs;
243     int64_t mSamplesRead;
244 
245     MP3Source(const MP3Source &);
246     MP3Source &operator=(const MP3Source &);
247 };
248 
MP3Extractor(const sp<DataSource> & source,const sp<AMessage> & meta)249 MP3Extractor::MP3Extractor(
250         const sp<DataSource> &source, const sp<AMessage> &meta)
251     : mInitCheck(NO_INIT),
252       mDataSource(source),
253       mFirstFramePos(-1),
254       mFixedHeader(0) {
255 
256     off64_t pos = 0;
257     off64_t post_id3_pos;
258     uint32_t header;
259     bool success;
260 
261     int64_t meta_offset;
262     uint32_t meta_header;
263     int64_t meta_post_id3_offset;
264     if (meta != NULL
265             && meta->findInt64("offset", &meta_offset)
266             && meta->findInt32("header", (int32_t *)&meta_header)
267             && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) {
268         // The sniffer has already done all the hard work for us, simply
269         // accept its judgement.
270         pos = (off64_t)meta_offset;
271         header = meta_header;
272         post_id3_pos = (off64_t)meta_post_id3_offset;
273 
274         success = true;
275     } else {
276         success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
277     }
278 
279     if (!success) {
280         // mInitCheck will remain NO_INIT
281         return;
282     }
283 
284     mFirstFramePos = pos;
285     mFixedHeader = header;
286     mMeta = new MetaData;
287     sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
288 
289     if (seeker == NULL) {
290         mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
291     } else {
292         mSeeker = seeker;
293         int encd = seeker->getEncoderDelay();
294         int encp = seeker->getEncoderPadding();
295         if (encd != 0 || encp != 0) {
296             mMeta->setInt32(kKeyEncoderDelay, encd);
297             mMeta->setInt32(kKeyEncoderPadding, encp);
298         }
299     }
300 
301     if (mSeeker != NULL) {
302         // While it is safe to send the XING/VBRI frame to the decoder, this will
303         // result in an extra 1152 samples being output. In addition, the bitrate
304         // of the Xing header might not match the rest of the file, which could
305         // lead to problems when seeking. The real first frame to decode is after
306         // the XING/VBRI frame, so skip there.
307         size_t frame_size;
308         int sample_rate;
309         int num_channels;
310         int bitrate;
311         GetMPEGAudioFrameSize(
312                 header, &frame_size, &sample_rate, &num_channels, &bitrate);
313         pos += frame_size;
314         if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) {
315             // mInitCheck will remain NO_INIT
316             return;
317         }
318         mFirstFramePos = pos;
319         mFixedHeader = header;
320     }
321 
322     size_t frame_size;
323     int sample_rate;
324     int num_channels;
325     int bitrate;
326     GetMPEGAudioFrameSize(
327             header, &frame_size, &sample_rate, &num_channels, &bitrate);
328 
329     unsigned layer = 4 - ((header >> 17) & 3);
330 
331     switch (layer) {
332         case 1:
333             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
334             break;
335         case 2:
336             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
337             break;
338         case 3:
339             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
340             break;
341         default:
342             TRESPASS();
343     }
344 
345     mMeta->setInt32(kKeySampleRate, sample_rate);
346     mMeta->setInt32(kKeyBitRate, bitrate * 1000);
347     mMeta->setInt32(kKeyChannelCount, num_channels);
348 
349     int64_t durationUs;
350 
351     if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
352         off64_t fileSize;
353         if (mDataSource->getSize(&fileSize) == OK) {
354             off64_t dataLength = fileSize - mFirstFramePos;
355             if (dataLength > INT64_MAX / 8000LL) {
356                 // duration would overflow
357                 durationUs = INT64_MAX;
358             } else {
359                 durationUs = 8000LL * dataLength / bitrate;
360             }
361         } else {
362             durationUs = -1;
363         }
364     }
365 
366     if (durationUs >= 0) {
367         mMeta->setInt64(kKeyDuration, durationUs);
368     }
369 
370     mInitCheck = OK;
371 
372     // Get iTunes-style gapless info if present.
373     // When getting the id3 tag, skip the V1 tags to prevent the source cache
374     // from being iterated to the end of the file.
375     ID3 id3(mDataSource, true);
376     if (id3.isValid()) {
377         ID3::Iterator *com = new ID3::Iterator(id3, "COM");
378         if (com->done()) {
379             delete com;
380             com = new ID3::Iterator(id3, "COMM");
381         }
382         while(!com->done()) {
383             String8 commentdesc;
384             String8 commentvalue;
385             com->getString(&commentdesc, &commentvalue);
386             const char * desc = commentdesc.string();
387             const char * value = commentvalue.string();
388 
389             // first 3 characters are the language, which we don't care about
390             if(strlen(desc) > 3 && strcmp(desc + 3, "iTunSMPB") == 0) {
391 
392                 int32_t delay, padding;
393                 if (sscanf(value, " %*x %x %x %*x", &delay, &padding) == 2) {
394                     mMeta->setInt32(kKeyEncoderDelay, delay);
395                     mMeta->setInt32(kKeyEncoderPadding, padding);
396                 }
397                 break;
398             }
399             com->next();
400         }
401         delete com;
402         com = NULL;
403     }
404 }
405 
countTracks()406 size_t MP3Extractor::countTracks() {
407     return mInitCheck != OK ? 0 : 1;
408 }
409 
getTrack(size_t index)410 sp<IMediaSource> MP3Extractor::getTrack(size_t index) {
411     if (mInitCheck != OK || index != 0) {
412         return NULL;
413     }
414 
415     return new MP3Source(
416             mMeta, mDataSource, mFirstFramePos, mFixedHeader,
417             mSeeker);
418 }
419 
getTrackMetaData(size_t index,uint32_t)420 sp<MetaData> MP3Extractor::getTrackMetaData(
421         size_t index, uint32_t /* flags */) {
422     if (mInitCheck != OK || index != 0) {
423         return NULL;
424     }
425 
426     return mMeta;
427 }
428 
429 ////////////////////////////////////////////////////////////////////////////////
430 
431 // The theoretical maximum frame size for an MPEG audio stream should occur
432 // while playing a Layer 2, MPEGv2.5 audio stream at 160kbps (with padding).
433 // The size of this frame should be...
434 // ((1152 samples/frame * 160000 bits/sec) /
435 //  (8000 samples/sec * 8 bits/byte)) + 1 padding byte/frame = 2881 bytes/frame.
436 // Set our max frame size to the nearest power of 2 above this size (aka, 4kB)
437 const size_t MP3Source::kMaxFrameSize = (1 << 12); /* 4096 bytes */
MP3Source(const sp<MetaData> & meta,const sp<DataSource> & source,off64_t first_frame_pos,uint32_t fixed_header,const sp<MP3Seeker> & seeker)438 MP3Source::MP3Source(
439         const sp<MetaData> &meta, const sp<DataSource> &source,
440         off64_t first_frame_pos, uint32_t fixed_header,
441         const sp<MP3Seeker> &seeker)
442     : mMeta(meta),
443       mDataSource(source),
444       mFirstFramePos(first_frame_pos),
445       mFixedHeader(fixed_header),
446       mCurrentPos(0),
447       mCurrentTimeUs(0),
448       mStarted(false),
449       mSeeker(seeker),
450       mGroup(NULL),
451       mBasisTimeUs(0),
452       mSamplesRead(0) {
453 }
454 
~MP3Source()455 MP3Source::~MP3Source() {
456     if (mStarted) {
457         stop();
458     }
459 }
460 
start(MetaData *)461 status_t MP3Source::start(MetaData *) {
462     CHECK(!mStarted);
463 
464     mGroup = new MediaBufferGroup;
465 
466     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
467 
468     mCurrentPos = mFirstFramePos;
469     mCurrentTimeUs = 0;
470 
471     mBasisTimeUs = mCurrentTimeUs;
472     mSamplesRead = 0;
473 
474     mStarted = true;
475 
476     return OK;
477 }
478 
stop()479 status_t MP3Source::stop() {
480     CHECK(mStarted);
481 
482     delete mGroup;
483     mGroup = NULL;
484 
485     mStarted = false;
486 
487     return OK;
488 }
489 
getFormat()490 sp<MetaData> MP3Source::getFormat() {
491     return mMeta;
492 }
493 
read(MediaBuffer ** out,const ReadOptions * options)494 status_t MP3Source::read(
495         MediaBuffer **out, const ReadOptions *options) {
496     *out = NULL;
497 
498     int64_t seekTimeUs;
499     ReadOptions::SeekMode mode;
500     bool seekCBR = false;
501 
502     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
503         int64_t actualSeekTimeUs = seekTimeUs;
504         if (mSeeker == NULL
505                 || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
506             int32_t bitrate;
507             if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
508                 // bitrate is in bits/sec.
509                 ALOGI("no bitrate");
510 
511                 return ERROR_UNSUPPORTED;
512             }
513 
514             mCurrentTimeUs = seekTimeUs;
515             mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
516             seekCBR = true;
517         } else {
518             mCurrentTimeUs = actualSeekTimeUs;
519         }
520 
521         mBasisTimeUs = mCurrentTimeUs;
522         mSamplesRead = 0;
523     }
524 
525     MediaBuffer *buffer;
526     status_t err = mGroup->acquire_buffer(&buffer);
527     if (err != OK) {
528         return err;
529     }
530 
531     size_t frame_size;
532     int bitrate;
533     int num_samples;
534     int sample_rate;
535     for (;;) {
536         ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
537         if (n < 4) {
538             buffer->release();
539             buffer = NULL;
540 
541             return ERROR_END_OF_STREAM;
542         }
543 
544         uint32_t header = U32_AT((const uint8_t *)buffer->data());
545 
546         if ((header & kMask) == (mFixedHeader & kMask)
547             && GetMPEGAudioFrameSize(
548                 header, &frame_size, &sample_rate, NULL,
549                 &bitrate, &num_samples)) {
550 
551             // re-calculate mCurrentTimeUs because we might have called Resync()
552             if (seekCBR) {
553                 mCurrentTimeUs = (mCurrentPos - mFirstFramePos) * 8000 / bitrate;
554                 mBasisTimeUs = mCurrentTimeUs;
555             }
556 
557             break;
558         }
559 
560         // Lost sync.
561         ALOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
562 
563         off64_t pos = mCurrentPos;
564         if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
565             ALOGE("Unable to resync. Signalling end of stream.");
566 
567             buffer->release();
568             buffer = NULL;
569 
570             return ERROR_END_OF_STREAM;
571         }
572 
573         mCurrentPos = pos;
574 
575         // Try again with the new position.
576     }
577 
578     CHECK(frame_size <= buffer->size());
579 
580     ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
581     if (n < (ssize_t)frame_size) {
582         buffer->release();
583         buffer = NULL;
584 
585         return ERROR_END_OF_STREAM;
586     }
587 
588     buffer->set_range(0, frame_size);
589 
590     buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
591     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
592 
593     mCurrentPos += frame_size;
594 
595     mSamplesRead += num_samples;
596     mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate);
597 
598     *out = buffer;
599 
600     return OK;
601 }
602 
getMetaData()603 sp<MetaData> MP3Extractor::getMetaData() {
604     sp<MetaData> meta = new MetaData;
605 
606     if (mInitCheck != OK) {
607         return meta;
608     }
609 
610     meta->setCString(kKeyMIMEType, "audio/mpeg");
611 
612     ID3 id3(mDataSource);
613 
614     if (!id3.isValid()) {
615         return meta;
616     }
617 
618     struct Map {
619         int key;
620         const char *tag1;
621         const char *tag2;
622     };
623     static const Map kMap[] = {
624         { kKeyAlbum, "TALB", "TAL" },
625         { kKeyArtist, "TPE1", "TP1" },
626         { kKeyAlbumArtist, "TPE2", "TP2" },
627         { kKeyComposer, "TCOM", "TCM" },
628         { kKeyGenre, "TCON", "TCO" },
629         { kKeyTitle, "TIT2", "TT2" },
630         { kKeyYear, "TYE", "TYER" },
631         { kKeyAuthor, "TXT", "TEXT" },
632         { kKeyCDTrackNumber, "TRK", "TRCK" },
633         { kKeyDiscNumber, "TPA", "TPOS" },
634         { kKeyCompilation, "TCP", "TCMP" },
635     };
636     static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
637 
638     for (size_t i = 0; i < kNumMapEntries; ++i) {
639         ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
640         if (it->done()) {
641             delete it;
642             it = new ID3::Iterator(id3, kMap[i].tag2);
643         }
644 
645         if (it->done()) {
646             delete it;
647             continue;
648         }
649 
650         String8 s;
651         it->getString(&s);
652         delete it;
653 
654         meta->setCString(kMap[i].key, s);
655     }
656 
657     size_t dataSize;
658     String8 mime;
659     const void *data = id3.getAlbumArt(&dataSize, &mime);
660 
661     if (data) {
662         meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
663         meta->setCString(kKeyAlbumArtMIME, mime.string());
664     }
665 
666     return meta;
667 }
668 
SniffMP3(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)669 bool SniffMP3(
670         const sp<DataSource> &source, String8 *mimeType,
671         float *confidence, sp<AMessage> *meta) {
672     off64_t pos = 0;
673     off64_t post_id3_pos;
674     uint32_t header;
675     if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
676         return false;
677     }
678 
679     *meta = new AMessage;
680     (*meta)->setInt64("offset", pos);
681     (*meta)->setInt32("header", header);
682     (*meta)->setInt64("post-id3-offset", post_id3_pos);
683 
684     *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
685     *confidence = 0.2f;
686 
687     return true;
688 }
689 
690 }  // namespace android
691