1 /*
2  * audio_parser.c, helper parser for audio codec data
3  *
4  * Copyright (c) 2009-2010 Wind River Systems, Inc.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #include <endian.h>
20 
21 //#define LOG_NDEBUG 0
22 
23 #define LOG_TAG "audio_parser"
24 #include <log.h>
25 
26 /*
27  * MP3
28  */
29 
30 struct mp3_frame_header_s {
31     union {
32 #if (__BYTE_ORDER == __LITTLE_ENDIAN)
33         struct {
34             unsigned int
35                 emphasis : 2,
36                 original : 1,
37                 copyright : 1,
38                 mode_extension : 2,
39                 channel_mode : 2,
40                 private_bit : 1,
41                 padding_bit : 1,
42                 samplingrate_index : 2,
43                 bitrate_index : 4,
44                 protected : 1,
45                 layer_index : 2,
46                 version_index : 2,
47                 sync : 11;
48         };
49         struct {
50             unsigned char h0, h1, h2, h3;
51         };
52 #elif (__BYTE_ORDER == __BIG_ENDIAN)
53         struct {
54             unsigned int
55                 sync : 11,
56                 version_index : 2,
57                 layer_index : 2,
58                 protected : 1,
59                 bitrate_index : 4,
60                 samplingrate_index : 2,
61                 padding_bit : 1,
62                 private_bit : 1,
63                 channel_mode : 2,
64                 mode_extension : 2,
65                 copyright : 1,
66                 original : 1,
67                 emphasis : 2;
68         };
69         struct {
70             unsigned char h3, h2, h1, h0;
71         };
72 #endif
73     };
74 } __attribute__ ((packed));
75 
76 /* index : sampling rate index */
77 static const int sampling_rate_table_version_1[4] = {
78     [0] = 44100,
79     [1] = 48000,
80     [2] = 32000,
81     [3] = 0,
82 };
83 
84 /* index : sampling rate index */
85 static const int sampling_rate_table_version_2[4] = {
86     [0] = 22050,
87     [1] = 24000,
88     [2] = 16000,
89     [3] = 0,
90 };
91 
92 /* index : sampling rate index */
93 static const int sampling_rate_table_version_25[4] = {
94     [0] = 11025,
95     [1] = 12000,
96     [2] = 8000,
97     [3] = 0,
98 };
99 
100 /* index : version index */
101 static const int *sampling_rate_table[4] = {
102     [0] = &sampling_rate_table_version_25[0],
103     [1] = NULL,
104     [2] = &sampling_rate_table_version_2[0],
105     [3] = &sampling_rate_table_version_1[0],
106 };
107 
108 /* index : bitrate index */
109 static const int bitrate_table_version_1_layer_1[16] = {
110     [0] = 0,
111     [1] = 32,
112     [2] = 64,
113     [3] = 96,
114     [4] = 128,
115     [5] = 160,
116     [6] = 192,
117     [7] = 224,
118     [8] = 256,
119     [9] = 288,
120     [10] = 320,
121     [11] = 352,
122     [12] = 384,
123     [13] = 416,
124     [14] = 448,
125     [15] = 0,
126 };
127 
128 /* index : bitrate index */
129 static const int bitrate_table_version_1_layer_2[16] = {
130     [0] = 0,
131     [1] = 32,
132     [2] = 48,
133     [3] = 56,
134     [4] = 64,
135     [5] = 80,
136     [6] = 96,
137     [7] = 112,
138     [8] = 128,
139     [9] = 160,
140     [10] = 192,
141     [11] = 224,
142     [12] = 256,
143     [13] = 320,
144     [14] = 384,
145     [15] = 0,
146 };
147 
148 /* index : bitrate index */
149 static const int bitrate_table_version_1_layer_3[16] = {
150     [0] = 0,
151     [1] = 32,
152     [2] = 40,
153     [3] = 48,
154     [4] = 56,
155     [5] = 64,
156     [6] = 80,
157     [7] = 96,
158     [8] = 112,
159     [9] = 128,
160     [10] = 160,
161     [11] = 192,
162     [12] = 224,
163     [13] = 256,
164     [14] = 320,
165     [15] = 0,
166 };
167 
168 /* index : bitrate index */
169 static const int bitrate_table_version_2_25_layer_1[16] = {
170     [0] = 0,
171     [1] = 32,
172     [2] = 48,
173     [3] = 56,
174     [4] = 64,
175     [5] = 80,
176     [6] = 96,
177     [7] = 112,
178     [8] = 128,
179     [9] = 144,
180     [10] = 160,
181     [11] = 176,
182     [12] = 192,
183     [13] = 224,
184     [14] = 256,
185     [15] = 0,
186 };
187 
188 /* index : bitrate index */
189 static const int bitrate_table_version_2_25_layer_2_3[16] = {
190     [0] = 0,
191     [1] = 8,
192     [2] = 16,
193     [3] = 24,
194     [4] = 32,
195     [5] = 40,
196     [6] = 48,
197     [7] = 56,
198     [8] = 64,
199     [9] = 80,
200     [10] = 96,
201     [11] = 112,
202     [12] = 128,
203     [13] = 144,
204     [14] = 160,
205     [15] = 0,
206 };
207 
208 /* index : layer index */
209 static const int *bitrate_table_version_1[4] = {
210     [0] = NULL,
211     [1] = &bitrate_table_version_1_layer_3[0],
212     [2] = &bitrate_table_version_1_layer_2[0],
213     [3] = &bitrate_table_version_1_layer_1[0]
214 };
215 
216 /* index : layer index */
217 static const int *bitrate_table_version_2_25[4] = {
218     [0] = NULL,
219     [1] = &bitrate_table_version_2_25_layer_2_3[0],
220     [2] = &bitrate_table_version_2_25_layer_2_3[0],
221     [3] = &bitrate_table_version_2_25_layer_1[0],
222 };
223 
224 /* index : version index */
225 static const int **bitrate_table[4] = {
226     [0] = &bitrate_table_version_2_25[0],
227     [1] = NULL,
228     [2] = &bitrate_table_version_2_25[0],
229     [3] = &bitrate_table_version_1[0],
230 };
231 
232 /* index : version index */
233 static const char *version_string[4] = {
234     "MPEG Version 2.5", "reserved", "MPEG Version 2", "MPEG Version 1"
235 };
236 
237 /* index : layer index */
238 static const char *layer_string[4] = {
239     "reserved", "Layer III", "Layer II", "Layer I"
240 };
241 
242 /* index : crc index */
243 static const char *protection_string[2] = {
244     "Protected by CRC", "Not protected"
245 };
246 
247 /* index : padding bit */
248 static const char *padding_string[2] = {
249     "frame is not padded", "frame is padded with one extra slot"
250 };
251 
252 /* index : channel mode */
253 static const char *channel_mode_string[4] = {
254     "Stereo", "Joint Stereo (Stereo)", "Dual Channel (2 mono channels)",
255     "Single Channel (Mono)"
256 };
257 
258 /* index : layer index, mode extension */
259 static const char *mode_extention_string[4][4] = {
260     [0] = {NULL, NULL, NULL, NULL},
261     [1] = {"intensity:off, MS stereo:off", "intensity:on, MS stereo:off",
262            "intensity:off, MS stereo:on", "intensity:on, MS stereo:on"},
263     [2] = {"bands 4 to 31", "bands 8 to 31",
264            "bands 12 to 31", "bands 16 to 31"},
265     [3] = { "bands 4 to 31", "bands 8 to 31",
266             "bands 12 to 31", "bands 16 to 31"},
267 };
268 
269 /* index : copyright bit */
270 static const char *copyright_string[2] = {
271     "Audio is not copyrighted", "Audio is copyrighted"
272 };
273 
274 /* index : original bit */
275 static const char *original_string[2] = {
276     "Copy of original media", "Original media"
277 };
278 
279 /* index : emphasis */
280 static const char *emphasis_string[4] = {
281     "none", "50/15 ms", "reserved", "CCIT J.17"
282 };
283 
284 /* index : layer */
285 static const int one_slot_length_table[4] = {
286     [0] = -1,
287     [1] = 1,
288     [2] = 1,
289     [3] = 4,
290 };
291 
292 static const int bitrate_coeff_table[4] = {
293     [0] = -1,
294     [1] = 144,
295     [2] = 144,
296     [3] = 12,
297 };
298 
mp3_calculate_frame_length(int bitrate,int samplingrate,int layer,int extraslot)299 static inline int mp3_calculate_frame_length(int bitrate, int samplingrate,
300                                              int layer, int extraslot)
301 {
302     int one_slot_length;
303     int coeff;
304     int frame_length;
305 
306     if (layer < 1 || layer > 3)
307         return -1;
308 
309     if (extraslot)
310         one_slot_length = one_slot_length_table[layer];
311     else
312         one_slot_length = 0;
313 
314     coeff = bitrate_coeff_table[layer];
315 
316     frame_length = coeff * bitrate * 1000 / samplingrate + one_slot_length;
317 
318     /* layer I */
319     if (layer == 3)
320         frame_length *= 4;
321 
322     return frame_length;
323 }
324 
325 /*
326  * FIXME
327  *   - It's hard coded for version 1, layer 3
328  */
mp3_calculate_frame_duration(int frequency)329 static inline int mp3_calculate_frame_duration(int frequency)
330 {
331     return 1152 * 1000 / frequency;
332 }
333 
mp3_header_parse(const unsigned char * buffer,int * version,int * layer,int * crc,int * bitrate,int * frequency,int * channel,int * mode_extension,int * frame_length,int * frame_duration)334 int mp3_header_parse(const unsigned char *buffer,
335                      int *version, int *layer, int *crc, int *bitrate,
336                      int *frequency, int *channel, int *mode_extension,
337                      int *frame_length, int *frame_duration)
338 {
339     const unsigned char *p = buffer;
340     struct mp3_frame_header_s header;
341     unsigned int version_index, layer_index, bitrate_index, samplingrate_index;
342     const int *psampling_rate_table;
343     const int *pbitrate_table, **ppbitrate_table;
344 
345     if (!p || !(p + 1) || !(p + 2) || !(p + 3))
346         return -1;
347 
348     if (!version || !layer || !crc || !bitrate || !frequency ||
349         !channel || !mode_extension)
350         return -1;
351 
352     header.h0 = *(p + 3);
353     header.h1 = *(p + 2);
354     header.h2 = *(p + 1);
355     header.h3 = *(p + 0);
356 
357     if (header.sync != 0x7ff) {
358         LOGE("cannot find sync (0x%03x)\n", header.sync);
359         return -1;
360     }
361 
362     version_index = header.version_index;
363     layer_index = header.layer_index;
364     bitrate_index = header.bitrate_index;
365     samplingrate_index = header.samplingrate_index;
366 
367     if ((version_index > 0x3) || (version_index == 0x1)) {
368         LOGE("invalid version index (%d)\n", version_index);
369         return -1;
370     }
371 
372     if (layer_index > 0x3 || layer_index < 0x1) {
373         LOGE("invalid layer index (%d)\n", layer_index);
374         return -1;
375     }
376 
377     if (bitrate_index > 0xe) {
378         LOGE("invalid bitrate index (%d)\n", bitrate_index);
379         return -1;
380     }
381 
382     if (samplingrate_index > 0x2) {
383         LOGE("invalid sampling rate index (%d)\n", samplingrate_index);
384         return -1;
385     }
386 
387     psampling_rate_table = sampling_rate_table[version_index];
388 
389     ppbitrate_table = bitrate_table[version_index];
390     pbitrate_table = ppbitrate_table[layer_index];
391 
392     *version = version_index;
393     *layer = layer_index;
394     *crc = header.protected;
395     *bitrate = pbitrate_table[bitrate_index];
396     *frequency = psampling_rate_table[samplingrate_index];
397     *channel = header.channel_mode;
398     *mode_extension = header.mode_extension;
399     *frame_length = mp3_calculate_frame_length(*bitrate, *frequency,
400                                                *layer, header.padding_bit);
401     *frame_duration = mp3_calculate_frame_duration(*frequency);
402 
403     LOGV("mp3 frame header\n");
404     LOGV("  sync: 0x%x\n", header.sync);
405     LOGV("  version: 0x%x, %s\n", header.version_index,
406          version_string[header.version_index]);
407     LOGV("  layer: 0x%x, %s\n", header.layer_index,
408          layer_string[header.layer_index]);
409     LOGV("  protection: 0x%x, %s\n", header.protected,
410          protection_string[header.protected]);
411     LOGV("  bitrate: 0x%x, %u\n", header.bitrate_index, *bitrate);
412     LOGV("  sampling rate: 0x%x, %u\n", header.samplingrate_index, *frequency);
413     LOGV("  padding bit: 0x%x, %s\n", header.padding_bit,
414          padding_string[header.padding_bit]);
415     LOGV("  private bit: 0x%x\n", header.private_bit);
416     LOGV("  channel mode: 0x%x, %s\n", header.channel_mode,
417          channel_mode_string[header.channel_mode]);
418     LOGV("  mode extension: 0x%x, %s\n", header.mode_extension,
419          mode_extention_string[header.layer_index][header.mode_extension]);
420     LOGV("  copyright: 0x%x, %s\n", header.copyright,
421          copyright_string[header.copyright]);
422     LOGV("  original: 0x%x, %s\n", header.original,
423          original_string[header.original]);
424     LOGV("  emphasis: 0x%x, %s\n", header.emphasis,
425          emphasis_string[header.emphasis]);
426     LOGV("  frame length: %d\n", *frame_length);
427     LOGV("  frame duration: %d\n", *frame_duration);
428 
429     return 0;
430 }
431 
432 /* end of MP3 */
433 
434 /*
435  * MP4
436  *   FIXME
437  *     - aot escape, explicit frequency
438  */
439 
440 struct audio_specific_config_s {
441     union {
442 #if (__BYTE_ORDER == __LITTLE_ENDIAN)
443         struct {
444             unsigned int
445                 extension_flag : 1,
446                 dependson_corecoder : 1,
447                 frame_length_flag : 1,
448                 channel_config : 4,
449                 frequency_index : 4,
450                 object_type : 5;
451         };
452         struct {
453             unsigned char h0, h1;
454         };
455 #elif (__BYTE_ORDER == __BIG_ENDIAN)
456         struct {
457             unsigned int
458                 object_type : 5,
459                 frequency_index : 4,
460                 channel_config : 4,
461                 frame_length_flag : 1,
462                 dependson_corecoder : 1,
463                 extension_flag : 1;
464         };
465         struct {
466             unsigned char h1, h0;
467         };
468 #endif
469     };
470 } __attribute__ ((packed));
471 
472 /* index : frequecy_index */
473 static const unsigned int frequency_table[16] = {
474     [0] = 96000,
475     [1] = 88200,
476     [2] = 64000,
477     [3] = 48000,
478     [4] = 44100,
479     [5] = 32000,
480     [6] = 24000,
481     [7] = 22050,
482     [8] = 16000,
483     [9] = 12000,
484     [10] = 11025,
485     [11] = 8000,
486     [12] = 7350,
487     [13] = 0,
488     [14] = 0,
489     [15] = 0, /* explicit specified ? */
490 };
491 
492 static const char *aot_string[46] = {
493     [0] = "Null",
494     [1] = "AAC Main",
495     [2] = "AAC LC (Low Complexity)",
496     [3] = "AAC SSR (Scalable Sample Rate)",
497     [4] = "AAC LTP (Long Term Prediction)",
498     [5] = "SBR (Spectral Band Replication)",
499     [6] = "AAC Scalable",
500     [7] = "TwinVQ",
501     [8] = "CELP (Code Excited Linear Prediction)",
502     [9] = "HXVC (Harmonic Vector eXcitation Coding)",
503     [10] = "Reserved",
504     [11] = "Reserved",
505     [12] = "TTSI (Text-To-Speech Interface)",
506     [13] = "Main Synthesis",
507     [14] = "Wavetable Synthesis",
508     [15] = "General MIDI",
509     [16] = "Algorithmic Synthesis and Audio Effects",
510     [17] = "ER (Error Resilient) AAC LC",
511     [18] = "Reserved",
512     [19] = "ER AAC LTP",
513     [20] = "ER AAC Scalable",
514     [21] = "ER TwinVQ",
515     [22] = "ER BSAC (Bit-Sliced Arithmetic Coding)",
516     [23] = "ER AAC LD (Low Delay)",
517     [24] = "ER CELP",
518     [25] = "ER HVXC",
519     [26] = "ER HILN (Harmonic and Individual Lines plus Noise)",
520     [27] = "ER Parametric",
521     [28] = "SSC (SinuSoidal Coding)",
522     [29] = "PS (Parametric Stereo)",
523     [30] = "MPEG Surround",
524     [31] = "(Escape value)",
525     [32] = "Layer-1",
526     [33] = "Layer-2",
527     [34] = "Layer-3",
528     [35] = "DST (Direct Stream Transfer)",
529     [36] = "ALS (Audio Lossless)",
530     [37] = "SLS (Scalable LosslesS)",
531     [38] = "SLS non-core",
532     [39] = "ER AAC ELD (Enhanced Low Delay)",
533     [40] = "SMR (Symbolic Music Representation) Simple",
534     [41] = "SMR Main",
535     [42] = "USAC (Unified Speech and Audio Coding) (no SBR)",
536     [43] = "SAOC (Spatial Audio Object Coding)",
537     [44] = "Reserved",
538     [45] = "USAC",
539 };
540 
541 /* index  = channel_index */
542 static const char *channel_string[16] = {
543     [0] = "Defined in AOT Specifc Config",
544     [1] = "front-center",
545     [2] = "front-left, front-right",
546     [3] = "front-center, front-left, front-right",
547     [4] = "front-center, front-left, front-right, back-center",
548     [5] = "front-center, front-left, front-right, back-left, back-right",
549     [6] = "front-center, front-left, front-right, back-left, back-right, LFE-channel",
550     [7] = "front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel",
551     [8] = "Reserved",
552     [9] = "Reserved",
553     [10] = "Reserved",
554     [11] = "Reserved",
555     [12] = "Reserved",
556     [13] = "Reserved",
557     [14] = "Reserved",
558     [15] = "Reserved",
559 };
560 
audio_specific_config_parse(const unsigned char * buffer,int * aot,int * frequency,int * channel)561 int audio_specific_config_parse(const unsigned char *buffer,
562                                 int *aot, int *frequency, int *channel)
563 {
564     const unsigned char *p = buffer;
565     struct audio_specific_config_s config;
566 
567     if (!p || !(p + 1))
568         return -1;
569 
570     if (!aot || !frequency || !channel)
571         return -1;
572 
573     config.h0 = *(p + 1);
574     config.h1 = *(p + 0);
575 
576     *aot = config.object_type;
577     *frequency = frequency_table[config.frequency_index];
578     *channel = config.channel_config;
579 
580     LOGV("audio specific config\n");
581     LOGV("  aot: 0x%x, %s\n", config.object_type,
582          aot_string[config.object_type]);
583     LOGV("  frequency: 0x%x, %u\n", config.frequency_index,
584          frequency_table[config.frequency_index]);
585     LOGV("  channel: %d, %s\n", config.channel_config,
586          channel_string[config.channel_config]);
587 
588     return 0;
589 }
590 
audio_specific_config_bitcoding(unsigned char * buffer,int aot,int frequency,int channel)591 int audio_specific_config_bitcoding(unsigned char *buffer,
592                                     int aot, int frequency, int channel)
593 {
594     unsigned char *p = buffer;
595     struct audio_specific_config_s config;
596     int i;
597 
598     if (!p)
599         return -1;
600 
601     for (i = 0; i < 16; i++) {
602         if ((int)frequency_table[i] == frequency) {
603             frequency = i;
604             break;
605         }
606     }
607     if (i > 12)
608         return -1;
609 
610     config.object_type = aot;
611     config.frequency_index = frequency;
612     config.channel_config = channel;
613 
614     *(p + 0) = config.h1;
615     *(p + 1) = config.h0;
616 
617     LOGV("bitfield coding for audio specific config\n");
618     LOGV("  aot : %d, %s\n", config.object_type,
619          aot_string[config.object_type]);
620     LOGV("  frequency : %d\n", frequency_table[config.frequency_index]);
621     LOGV("  channel : %d, %s\n", config.channel_config,
622          channel_string[config.channel_config]);
623 
624     return 0;
625 }
626 
627 /* end of MP4 */
628