1 /*
2 * audio_parser.c, helper parser for audio codec data
3 *
4 * Copyright (c) 2009-2010 Wind River Systems, Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include <endian.h>
20
21 //#define LOG_NDEBUG 0
22
23 #define LOG_TAG "audio_parser"
24 #include <log.h>
25
26 /*
27 * MP3
28 */
29
30 struct mp3_frame_header_s {
31 union {
32 #if (__BYTE_ORDER == __LITTLE_ENDIAN)
33 struct {
34 unsigned int
35 emphasis : 2,
36 original : 1,
37 copyright : 1,
38 mode_extension : 2,
39 channel_mode : 2,
40 private_bit : 1,
41 padding_bit : 1,
42 samplingrate_index : 2,
43 bitrate_index : 4,
44 protected : 1,
45 layer_index : 2,
46 version_index : 2,
47 sync : 11;
48 };
49 struct {
50 unsigned char h0, h1, h2, h3;
51 };
52 #elif (__BYTE_ORDER == __BIG_ENDIAN)
53 struct {
54 unsigned int
55 sync : 11,
56 version_index : 2,
57 layer_index : 2,
58 protected : 1,
59 bitrate_index : 4,
60 samplingrate_index : 2,
61 padding_bit : 1,
62 private_bit : 1,
63 channel_mode : 2,
64 mode_extension : 2,
65 copyright : 1,
66 original : 1,
67 emphasis : 2;
68 };
69 struct {
70 unsigned char h3, h2, h1, h0;
71 };
72 #endif
73 };
74 } __attribute__ ((packed));
75
76 /* index : sampling rate index */
77 static const int sampling_rate_table_version_1[4] = {
78 [0] = 44100,
79 [1] = 48000,
80 [2] = 32000,
81 [3] = 0,
82 };
83
84 /* index : sampling rate index */
85 static const int sampling_rate_table_version_2[4] = {
86 [0] = 22050,
87 [1] = 24000,
88 [2] = 16000,
89 [3] = 0,
90 };
91
92 /* index : sampling rate index */
93 static const int sampling_rate_table_version_25[4] = {
94 [0] = 11025,
95 [1] = 12000,
96 [2] = 8000,
97 [3] = 0,
98 };
99
100 /* index : version index */
101 static const int *sampling_rate_table[4] = {
102 [0] = &sampling_rate_table_version_25[0],
103 [1] = NULL,
104 [2] = &sampling_rate_table_version_2[0],
105 [3] = &sampling_rate_table_version_1[0],
106 };
107
108 /* index : bitrate index */
109 static const int bitrate_table_version_1_layer_1[16] = {
110 [0] = 0,
111 [1] = 32,
112 [2] = 64,
113 [3] = 96,
114 [4] = 128,
115 [5] = 160,
116 [6] = 192,
117 [7] = 224,
118 [8] = 256,
119 [9] = 288,
120 [10] = 320,
121 [11] = 352,
122 [12] = 384,
123 [13] = 416,
124 [14] = 448,
125 [15] = 0,
126 };
127
128 /* index : bitrate index */
129 static const int bitrate_table_version_1_layer_2[16] = {
130 [0] = 0,
131 [1] = 32,
132 [2] = 48,
133 [3] = 56,
134 [4] = 64,
135 [5] = 80,
136 [6] = 96,
137 [7] = 112,
138 [8] = 128,
139 [9] = 160,
140 [10] = 192,
141 [11] = 224,
142 [12] = 256,
143 [13] = 320,
144 [14] = 384,
145 [15] = 0,
146 };
147
148 /* index : bitrate index */
149 static const int bitrate_table_version_1_layer_3[16] = {
150 [0] = 0,
151 [1] = 32,
152 [2] = 40,
153 [3] = 48,
154 [4] = 56,
155 [5] = 64,
156 [6] = 80,
157 [7] = 96,
158 [8] = 112,
159 [9] = 128,
160 [10] = 160,
161 [11] = 192,
162 [12] = 224,
163 [13] = 256,
164 [14] = 320,
165 [15] = 0,
166 };
167
168 /* index : bitrate index */
169 static const int bitrate_table_version_2_25_layer_1[16] = {
170 [0] = 0,
171 [1] = 32,
172 [2] = 48,
173 [3] = 56,
174 [4] = 64,
175 [5] = 80,
176 [6] = 96,
177 [7] = 112,
178 [8] = 128,
179 [9] = 144,
180 [10] = 160,
181 [11] = 176,
182 [12] = 192,
183 [13] = 224,
184 [14] = 256,
185 [15] = 0,
186 };
187
188 /* index : bitrate index */
189 static const int bitrate_table_version_2_25_layer_2_3[16] = {
190 [0] = 0,
191 [1] = 8,
192 [2] = 16,
193 [3] = 24,
194 [4] = 32,
195 [5] = 40,
196 [6] = 48,
197 [7] = 56,
198 [8] = 64,
199 [9] = 80,
200 [10] = 96,
201 [11] = 112,
202 [12] = 128,
203 [13] = 144,
204 [14] = 160,
205 [15] = 0,
206 };
207
208 /* index : layer index */
209 static const int *bitrate_table_version_1[4] = {
210 [0] = NULL,
211 [1] = &bitrate_table_version_1_layer_3[0],
212 [2] = &bitrate_table_version_1_layer_2[0],
213 [3] = &bitrate_table_version_1_layer_1[0]
214 };
215
216 /* index : layer index */
217 static const int *bitrate_table_version_2_25[4] = {
218 [0] = NULL,
219 [1] = &bitrate_table_version_2_25_layer_2_3[0],
220 [2] = &bitrate_table_version_2_25_layer_2_3[0],
221 [3] = &bitrate_table_version_2_25_layer_1[0],
222 };
223
224 /* index : version index */
225 static const int **bitrate_table[4] = {
226 [0] = &bitrate_table_version_2_25[0],
227 [1] = NULL,
228 [2] = &bitrate_table_version_2_25[0],
229 [3] = &bitrate_table_version_1[0],
230 };
231
232 /* index : version index */
233 static const char *version_string[4] = {
234 "MPEG Version 2.5", "reserved", "MPEG Version 2", "MPEG Version 1"
235 };
236
237 /* index : layer index */
238 static const char *layer_string[4] = {
239 "reserved", "Layer III", "Layer II", "Layer I"
240 };
241
242 /* index : crc index */
243 static const char *protection_string[2] = {
244 "Protected by CRC", "Not protected"
245 };
246
247 /* index : padding bit */
248 static const char *padding_string[2] = {
249 "frame is not padded", "frame is padded with one extra slot"
250 };
251
252 /* index : channel mode */
253 static const char *channel_mode_string[4] = {
254 "Stereo", "Joint Stereo (Stereo)", "Dual Channel (2 mono channels)",
255 "Single Channel (Mono)"
256 };
257
258 /* index : layer index, mode extension */
259 static const char *mode_extention_string[4][4] = {
260 [0] = {NULL, NULL, NULL, NULL},
261 [1] = {"intensity:off, MS stereo:off", "intensity:on, MS stereo:off",
262 "intensity:off, MS stereo:on", "intensity:on, MS stereo:on"},
263 [2] = {"bands 4 to 31", "bands 8 to 31",
264 "bands 12 to 31", "bands 16 to 31"},
265 [3] = { "bands 4 to 31", "bands 8 to 31",
266 "bands 12 to 31", "bands 16 to 31"},
267 };
268
269 /* index : copyright bit */
270 static const char *copyright_string[2] = {
271 "Audio is not copyrighted", "Audio is copyrighted"
272 };
273
274 /* index : original bit */
275 static const char *original_string[2] = {
276 "Copy of original media", "Original media"
277 };
278
279 /* index : emphasis */
280 static const char *emphasis_string[4] = {
281 "none", "50/15 ms", "reserved", "CCIT J.17"
282 };
283
284 /* index : layer */
285 static const int one_slot_length_table[4] = {
286 [0] = -1,
287 [1] = 1,
288 [2] = 1,
289 [3] = 4,
290 };
291
292 static const int bitrate_coeff_table[4] = {
293 [0] = -1,
294 [1] = 144,
295 [2] = 144,
296 [3] = 12,
297 };
298
mp3_calculate_frame_length(int bitrate,int samplingrate,int layer,int extraslot)299 static inline int mp3_calculate_frame_length(int bitrate, int samplingrate,
300 int layer, int extraslot)
301 {
302 int one_slot_length;
303 int coeff;
304 int frame_length;
305
306 if (layer < 1 || layer > 3)
307 return -1;
308
309 if (extraslot)
310 one_slot_length = one_slot_length_table[layer];
311 else
312 one_slot_length = 0;
313
314 coeff = bitrate_coeff_table[layer];
315
316 frame_length = coeff * bitrate * 1000 / samplingrate + one_slot_length;
317
318 /* layer I */
319 if (layer == 3)
320 frame_length *= 4;
321
322 return frame_length;
323 }
324
325 /*
326 * FIXME
327 * - It's hard coded for version 1, layer 3
328 */
mp3_calculate_frame_duration(int frequency)329 static inline int mp3_calculate_frame_duration(int frequency)
330 {
331 return 1152 * 1000 / frequency;
332 }
333
mp3_header_parse(const unsigned char * buffer,int * version,int * layer,int * crc,int * bitrate,int * frequency,int * channel,int * mode_extension,int * frame_length,int * frame_duration)334 int mp3_header_parse(const unsigned char *buffer,
335 int *version, int *layer, int *crc, int *bitrate,
336 int *frequency, int *channel, int *mode_extension,
337 int *frame_length, int *frame_duration)
338 {
339 const unsigned char *p = buffer;
340 struct mp3_frame_header_s header;
341 unsigned int version_index, layer_index, bitrate_index, samplingrate_index;
342 const int *psampling_rate_table;
343 const int *pbitrate_table, **ppbitrate_table;
344
345 if (!p || !(p + 1) || !(p + 2) || !(p + 3))
346 return -1;
347
348 if (!version || !layer || !crc || !bitrate || !frequency ||
349 !channel || !mode_extension)
350 return -1;
351
352 header.h0 = *(p + 3);
353 header.h1 = *(p + 2);
354 header.h2 = *(p + 1);
355 header.h3 = *(p + 0);
356
357 if (header.sync != 0x7ff) {
358 LOGE("cannot find sync (0x%03x)\n", header.sync);
359 return -1;
360 }
361
362 version_index = header.version_index;
363 layer_index = header.layer_index;
364 bitrate_index = header.bitrate_index;
365 samplingrate_index = header.samplingrate_index;
366
367 if ((version_index > 0x3) || (version_index == 0x1)) {
368 LOGE("invalid version index (%d)\n", version_index);
369 return -1;
370 }
371
372 if (layer_index > 0x3 || layer_index < 0x1) {
373 LOGE("invalid layer index (%d)\n", layer_index);
374 return -1;
375 }
376
377 if (bitrate_index > 0xe) {
378 LOGE("invalid bitrate index (%d)\n", bitrate_index);
379 return -1;
380 }
381
382 if (samplingrate_index > 0x2) {
383 LOGE("invalid sampling rate index (%d)\n", samplingrate_index);
384 return -1;
385 }
386
387 psampling_rate_table = sampling_rate_table[version_index];
388
389 ppbitrate_table = bitrate_table[version_index];
390 pbitrate_table = ppbitrate_table[layer_index];
391
392 *version = version_index;
393 *layer = layer_index;
394 *crc = header.protected;
395 *bitrate = pbitrate_table[bitrate_index];
396 *frequency = psampling_rate_table[samplingrate_index];
397 *channel = header.channel_mode;
398 *mode_extension = header.mode_extension;
399 *frame_length = mp3_calculate_frame_length(*bitrate, *frequency,
400 *layer, header.padding_bit);
401 *frame_duration = mp3_calculate_frame_duration(*frequency);
402
403 LOGV("mp3 frame header\n");
404 LOGV(" sync: 0x%x\n", header.sync);
405 LOGV(" version: 0x%x, %s\n", header.version_index,
406 version_string[header.version_index]);
407 LOGV(" layer: 0x%x, %s\n", header.layer_index,
408 layer_string[header.layer_index]);
409 LOGV(" protection: 0x%x, %s\n", header.protected,
410 protection_string[header.protected]);
411 LOGV(" bitrate: 0x%x, %u\n", header.bitrate_index, *bitrate);
412 LOGV(" sampling rate: 0x%x, %u\n", header.samplingrate_index, *frequency);
413 LOGV(" padding bit: 0x%x, %s\n", header.padding_bit,
414 padding_string[header.padding_bit]);
415 LOGV(" private bit: 0x%x\n", header.private_bit);
416 LOGV(" channel mode: 0x%x, %s\n", header.channel_mode,
417 channel_mode_string[header.channel_mode]);
418 LOGV(" mode extension: 0x%x, %s\n", header.mode_extension,
419 mode_extention_string[header.layer_index][header.mode_extension]);
420 LOGV(" copyright: 0x%x, %s\n", header.copyright,
421 copyright_string[header.copyright]);
422 LOGV(" original: 0x%x, %s\n", header.original,
423 original_string[header.original]);
424 LOGV(" emphasis: 0x%x, %s\n", header.emphasis,
425 emphasis_string[header.emphasis]);
426 LOGV(" frame length: %d\n", *frame_length);
427 LOGV(" frame duration: %d\n", *frame_duration);
428
429 return 0;
430 }
431
432 /* end of MP3 */
433
434 /*
435 * MP4
436 * FIXME
437 * - aot escape, explicit frequency
438 */
439
440 struct audio_specific_config_s {
441 union {
442 #if (__BYTE_ORDER == __LITTLE_ENDIAN)
443 struct {
444 unsigned int
445 extension_flag : 1,
446 dependson_corecoder : 1,
447 frame_length_flag : 1,
448 channel_config : 4,
449 frequency_index : 4,
450 object_type : 5;
451 };
452 struct {
453 unsigned char h0, h1;
454 };
455 #elif (__BYTE_ORDER == __BIG_ENDIAN)
456 struct {
457 unsigned int
458 object_type : 5,
459 frequency_index : 4,
460 channel_config : 4,
461 frame_length_flag : 1,
462 dependson_corecoder : 1,
463 extension_flag : 1;
464 };
465 struct {
466 unsigned char h1, h0;
467 };
468 #endif
469 };
470 } __attribute__ ((packed));
471
472 /* index : frequecy_index */
473 static const unsigned int frequency_table[16] = {
474 [0] = 96000,
475 [1] = 88200,
476 [2] = 64000,
477 [3] = 48000,
478 [4] = 44100,
479 [5] = 32000,
480 [6] = 24000,
481 [7] = 22050,
482 [8] = 16000,
483 [9] = 12000,
484 [10] = 11025,
485 [11] = 8000,
486 [12] = 7350,
487 [13] = 0,
488 [14] = 0,
489 [15] = 0, /* explicit specified ? */
490 };
491
492 static const char *aot_string[46] = {
493 [0] = "Null",
494 [1] = "AAC Main",
495 [2] = "AAC LC (Low Complexity)",
496 [3] = "AAC SSR (Scalable Sample Rate)",
497 [4] = "AAC LTP (Long Term Prediction)",
498 [5] = "SBR (Spectral Band Replication)",
499 [6] = "AAC Scalable",
500 [7] = "TwinVQ",
501 [8] = "CELP (Code Excited Linear Prediction)",
502 [9] = "HXVC (Harmonic Vector eXcitation Coding)",
503 [10] = "Reserved",
504 [11] = "Reserved",
505 [12] = "TTSI (Text-To-Speech Interface)",
506 [13] = "Main Synthesis",
507 [14] = "Wavetable Synthesis",
508 [15] = "General MIDI",
509 [16] = "Algorithmic Synthesis and Audio Effects",
510 [17] = "ER (Error Resilient) AAC LC",
511 [18] = "Reserved",
512 [19] = "ER AAC LTP",
513 [20] = "ER AAC Scalable",
514 [21] = "ER TwinVQ",
515 [22] = "ER BSAC (Bit-Sliced Arithmetic Coding)",
516 [23] = "ER AAC LD (Low Delay)",
517 [24] = "ER CELP",
518 [25] = "ER HVXC",
519 [26] = "ER HILN (Harmonic and Individual Lines plus Noise)",
520 [27] = "ER Parametric",
521 [28] = "SSC (SinuSoidal Coding)",
522 [29] = "PS (Parametric Stereo)",
523 [30] = "MPEG Surround",
524 [31] = "(Escape value)",
525 [32] = "Layer-1",
526 [33] = "Layer-2",
527 [34] = "Layer-3",
528 [35] = "DST (Direct Stream Transfer)",
529 [36] = "ALS (Audio Lossless)",
530 [37] = "SLS (Scalable LosslesS)",
531 [38] = "SLS non-core",
532 [39] = "ER AAC ELD (Enhanced Low Delay)",
533 [40] = "SMR (Symbolic Music Representation) Simple",
534 [41] = "SMR Main",
535 [42] = "USAC (Unified Speech and Audio Coding) (no SBR)",
536 [43] = "SAOC (Spatial Audio Object Coding)",
537 [44] = "Reserved",
538 [45] = "USAC",
539 };
540
541 /* index = channel_index */
542 static const char *channel_string[16] = {
543 [0] = "Defined in AOT Specifc Config",
544 [1] = "front-center",
545 [2] = "front-left, front-right",
546 [3] = "front-center, front-left, front-right",
547 [4] = "front-center, front-left, front-right, back-center",
548 [5] = "front-center, front-left, front-right, back-left, back-right",
549 [6] = "front-center, front-left, front-right, back-left, back-right, LFE-channel",
550 [7] = "front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel",
551 [8] = "Reserved",
552 [9] = "Reserved",
553 [10] = "Reserved",
554 [11] = "Reserved",
555 [12] = "Reserved",
556 [13] = "Reserved",
557 [14] = "Reserved",
558 [15] = "Reserved",
559 };
560
audio_specific_config_parse(const unsigned char * buffer,int * aot,int * frequency,int * channel)561 int audio_specific_config_parse(const unsigned char *buffer,
562 int *aot, int *frequency, int *channel)
563 {
564 const unsigned char *p = buffer;
565 struct audio_specific_config_s config;
566
567 if (!p || !(p + 1))
568 return -1;
569
570 if (!aot || !frequency || !channel)
571 return -1;
572
573 config.h0 = *(p + 1);
574 config.h1 = *(p + 0);
575
576 *aot = config.object_type;
577 *frequency = frequency_table[config.frequency_index];
578 *channel = config.channel_config;
579
580 LOGV("audio specific config\n");
581 LOGV(" aot: 0x%x, %s\n", config.object_type,
582 aot_string[config.object_type]);
583 LOGV(" frequency: 0x%x, %u\n", config.frequency_index,
584 frequency_table[config.frequency_index]);
585 LOGV(" channel: %d, %s\n", config.channel_config,
586 channel_string[config.channel_config]);
587
588 return 0;
589 }
590
audio_specific_config_bitcoding(unsigned char * buffer,int aot,int frequency,int channel)591 int audio_specific_config_bitcoding(unsigned char *buffer,
592 int aot, int frequency, int channel)
593 {
594 unsigned char *p = buffer;
595 struct audio_specific_config_s config;
596 int i;
597
598 if (!p)
599 return -1;
600
601 for (i = 0; i < 16; i++) {
602 if ((int)frequency_table[i] == frequency) {
603 frequency = i;
604 break;
605 }
606 }
607 if (i > 12)
608 return -1;
609
610 config.object_type = aot;
611 config.frequency_index = frequency;
612 config.channel_config = channel;
613
614 *(p + 0) = config.h1;
615 *(p + 1) = config.h0;
616
617 LOGV("bitfield coding for audio specific config\n");
618 LOGV(" aot : %d, %s\n", config.object_type,
619 aot_string[config.object_type]);
620 LOGV(" frequency : %d\n", frequency_table[config.frequency_index]);
621 LOGV(" channel : %d, %s\n", config.channel_config,
622 channel_string[config.channel_config]);
623
624 return 0;
625 }
626
627 /* end of MP4 */
628