1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 /* maximum audio device address length */
23 #define AUDIO_DEVICE_MAX_ADDRESS_LEN 32
24 
25 /* Audio attributes */
26 #define AUDIO_ATTRIBUTES_TAGS_MAX_SIZE 256
27 
28 static const char AUDIO_ATTRIBUTES_TAGS_SEPARATOR = ';';
29 
30 typedef enum {
31   AUDIO_SOURCE_DEFAULT = 0,
32   AUDIO_SOURCE_MIC = 1,
33   AUDIO_SOURCE_VOICE_UPLINK = 2,
34   AUDIO_SOURCE_VOICE_DOWNLINK = 3,
35   AUDIO_SOURCE_VOICE_CALL = 4,
36   AUDIO_SOURCE_CAMCORDER = 5,
37   AUDIO_SOURCE_VOICE_RECOGNITION = 6,
38   AUDIO_SOURCE_VOICE_COMMUNICATION = 7,
39   AUDIO_SOURCE_REMOTE_SUBMIX = 8,
40   AUDIO_SOURCE_UNPROCESSED = 9,
41   AUDIO_SOURCE_VOICE_PERFORMANCE = 10,
42   AUDIO_SOURCE_ECHO_REFERENCE = 1997,
43   AUDIO_SOURCE_FM_TUNER = 1998,
44   AUDIO_SOURCE_HOTWORD = 1999,
45   AUDIO_SOURCE_INVALID = -1,
46 } audio_source_t;
47 
48 typedef enum {
49   AUDIO_CONTENT_TYPE_UNKNOWN = 0u,
50   AUDIO_CONTENT_TYPE_SPEECH = 1u,
51   AUDIO_CONTENT_TYPE_MUSIC = 2u,
52   AUDIO_CONTENT_TYPE_MOVIE = 3u,
53   AUDIO_CONTENT_TYPE_SONIFICATION = 4u,
54 } audio_content_type_t;
55 
56 typedef enum {
57   AUDIO_USAGE_UNKNOWN = 0,
58   AUDIO_USAGE_MEDIA = 1,
59   AUDIO_USAGE_VOICE_COMMUNICATION = 2,
60   AUDIO_USAGE_VOICE_COMMUNICATION_SIGNALLING = 3,
61   AUDIO_USAGE_ALARM = 4,
62   AUDIO_USAGE_NOTIFICATION = 5,
63   AUDIO_USAGE_NOTIFICATION_TELEPHONY_RINGTONE = 6,
64   AUDIO_USAGE_NOTIFICATION_COMMUNICATION_REQUEST = 7,
65   AUDIO_USAGE_NOTIFICATION_COMMUNICATION_INSTANT = 8,
66   AUDIO_USAGE_NOTIFICATION_COMMUNICATION_DELAYED = 9,
67   AUDIO_USAGE_NOTIFICATION_EVENT = 10,
68   AUDIO_USAGE_ASSISTANCE_ACCESSIBILITY = 11,
69   AUDIO_USAGE_ASSISTANCE_NAVIGATION_GUIDANCE = 12,
70   AUDIO_USAGE_ASSISTANCE_SONIFICATION = 13,
71   AUDIO_USAGE_GAME = 14,
72   AUDIO_USAGE_VIRTUAL_SOURCE = 15,
73   AUDIO_USAGE_ASSISTANT = 16,
74   AUDIO_USAGE_CALL_ASSISTANT = 17,
75   AUDIO_USAGE_EMERGENCY = 1000,
76   AUDIO_USAGE_SAFETY = 1001,
77   AUDIO_USAGE_VEHICLE_STATUS = 1002,
78   AUDIO_USAGE_ANNOUNCEMENT = 1003,
79 } audio_usage_t;
80 
81 // Flags that never appear on their own.
82 enum {
83   AUDIO_DEVICE_BIT_IN = 0x80000000u,
84   AUDIO_DEVICE_BIT_DEFAULT = 0x40000000u,
85 };
86 
87 // The exact device types are not of interest at the moment.
88 typedef enum {
89   AUDIO_DEVICE_OUT_DEFAULT = AUDIO_DEVICE_BIT_DEFAULT,
90   AUDIO_DEVICE_IN_DEFAULT = AUDIO_DEVICE_BIT_IN | AUDIO_DEVICE_BIT_DEFAULT,
91 } audio_devices_t;
92 
93 // The "channel mask" enum is comprised of discrete channels,
94 // their combinations (masks), and special values.
95 typedef enum : uint32_t {
96   AUDIO_CHANNEL_REPRESENTATION_POSITION = 0x0u,
97   AUDIO_CHANNEL_REPRESENTATION_INDEX = 0x2u,
98   AUDIO_CHANNEL_NONE = 0x0u,
99   AUDIO_CHANNEL_INVALID = 0xC0000000u,
100 
101   AUDIO_CHANNEL_OUT_FRONT_LEFT = 0x1u,
102   AUDIO_CHANNEL_OUT_FRONT_RIGHT = 0x2u,
103   AUDIO_CHANNEL_OUT_FRONT_CENTER = 0x4u,
104   AUDIO_CHANNEL_OUT_LOW_FREQUENCY = 0x8u,
105   AUDIO_CHANNEL_OUT_BACK_LEFT = 0x10u,
106   AUDIO_CHANNEL_OUT_BACK_RIGHT = 0x20u,
107   AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER = 0x40u,
108   AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER = 0x80u,
109   AUDIO_CHANNEL_OUT_BACK_CENTER = 0x100u,
110   AUDIO_CHANNEL_OUT_SIDE_LEFT = 0x200u,
111   AUDIO_CHANNEL_OUT_SIDE_RIGHT = 0x400u,
112   AUDIO_CHANNEL_OUT_TOP_CENTER = 0x800u,
113   AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT = 0x1000u,
114   AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER = 0x2000u,
115   AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT = 0x4000u,
116   AUDIO_CHANNEL_OUT_TOP_BACK_LEFT = 0x8000u,
117   AUDIO_CHANNEL_OUT_TOP_BACK_CENTER = 0x10000u,
118   AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT = 0x20000u,
119   AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT = 0x40000u,
120   AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT = 0x80000u,
121   AUDIO_CHANNEL_OUT_HAPTIC_A = 0x20000000u,
122   AUDIO_CHANNEL_OUT_HAPTIC_B = 0x10000000u,
123   AUDIO_CHANNEL_OUT_MONO = 0x1u,    // OUT_FRONT_LEFT
124   AUDIO_CHANNEL_OUT_STEREO = 0x3u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT
125   AUDIO_CHANNEL_OUT_2POINT1 =
126       0xBu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_LOW_FREQUENCY
127   AUDIO_CHANNEL_OUT_2POINT0POINT2 =
128       0xC0003u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_TOP_SIDE_LEFT |
129                  // OUT_TOP_SIDE_RIGHT
130   AUDIO_CHANNEL_OUT_2POINT1POINT2 =
131       0xC000Bu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_TOP_SIDE_LEFT |
132                  // OUT_TOP_SIDE_RIGHT | OUT_LOW_FREQUENCY
133   AUDIO_CHANNEL_OUT_3POINT0POINT2 =
134       0xC0007u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
135                  // OUT_TOP_SIDE_LEFT | OUT_TOP_SIDE_RIGHT
136   AUDIO_CHANNEL_OUT_3POINT1POINT2 =
137       0xC000Fu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
138                  // OUT_TOP_SIDE_LEFT | OUT_TOP_SIDE_RIGHT | OUT_LOW_FREQUENCY
139   AUDIO_CHANNEL_OUT_QUAD = 0x33u,        // OUT_FRONT_LEFT | OUT_FRONT_RIGHT |
140                                          // OUT_BACK_LEFT | OUT_BACK_RIGHT
141   AUDIO_CHANNEL_OUT_QUAD_BACK = 0x33u,   // OUT_QUAD
142   AUDIO_CHANNEL_OUT_QUAD_SIDE = 0x603u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT |
143                                          // OUT_SIDE_LEFT | OUT_SIDE_RIGHT
144   AUDIO_CHANNEL_OUT_SURROUND = 0x107u,   // OUT_FRONT_LEFT | OUT_FRONT_RIGHT |
145                                          // OUT_FRONT_CENTER | OUT_BACK_CENTER
146   AUDIO_CHANNEL_OUT_PENTA = 0x37u,       // OUT_QUAD | OUT_FRONT_CENTER
147   AUDIO_CHANNEL_OUT_5POINT1 =
148       0x3Fu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
149               // OUT_LOW_FREQUENCY | OUT_BACK_LEFT | OUT_BACK_RIGHT
150   AUDIO_CHANNEL_OUT_5POINT1_BACK = 0x3Fu,  // OUT_5POINT1
151   AUDIO_CHANNEL_OUT_5POINT1_SIDE =
152       0x60Fu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
153                // OUT_LOW_FREQUENCY | OUT_SIDE_LEFT | OUT_SIDE_RIGHT
154   AUDIO_CHANNEL_OUT_5POINT1POINT2 =
155       0xC003Fu,  // OUT_5POINT1 | OUT_TOP_SIDE_LEFT | OUT_TOP_SIDE_RIGHT
156   AUDIO_CHANNEL_OUT_5POINT1POINT4 =
157       0x2D03Fu,  // OUT_5POINT1 | OUT_TOP_FRONT_LEFT | OUT_TOP_FRONT_RIGHT |
158                  // OUT_TOP_BACK_LEFT | OUT_TOP_BACK_RIGHT
159   AUDIO_CHANNEL_OUT_6POINT1 =
160       0x13Fu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
161                // OUT_LOW_FREQUENCY | OUT_BACK_LEFT | OUT_BACK_RIGHT |
162                // OUT_BACK_CENTER
163   AUDIO_CHANNEL_OUT_7POINT1 =
164       0x63Fu,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_FRONT_CENTER |
165                // OUT_LOW_FREQUENCY | OUT_BACK_LEFT | OUT_BACK_RIGHT |
166                // OUT_SIDE_LEFT | OUT_SIDE_RIGHT
167   AUDIO_CHANNEL_OUT_7POINT1POINT2 =
168       0xC063Fu,  // OUT_7POINT1 | OUT_TOP_SIDE_LEFT | OUT_TOP_SIDE_RIGHT
169   AUDIO_CHANNEL_OUT_7POINT1POINT4 =
170       0x2D63Fu,  // OUT_7POINT1 | OUT_TOP_FRONT_LEFT | OUT_TOP_FRONT_RIGHT |
171                  // OUT_TOP_BACK_LEFT | OUT_TOP_BACK_RIGHT
172   AUDIO_CHANNEL_OUT_MONO_HAPTIC_A =
173       0x20000001u,  // OUT_FRONT_LEFT | OUT_HAPTIC_A
174   AUDIO_CHANNEL_OUT_STEREO_HAPTIC_A =
175       0x20000003u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_HAPTIC_A
176   AUDIO_CHANNEL_OUT_HAPTIC_AB = 0x30000000u,  // OUT_HAPTIC_A | OUT_HAPTIC_B
177   AUDIO_CHANNEL_OUT_MONO_HAPTIC_AB =
178       0x30000001u,  // OUT_FRONT_LEFT | OUT_HAPTIC_A | OUT_HAPTIC_B
179   AUDIO_CHANNEL_OUT_STEREO_HAPTIC_AB =
180       0x30000003u,  // OUT_FRONT_LEFT | OUT_FRONT_RIGHT | OUT_HAPTIC_A |
181                     // OUT_HAPTIC_B
182 
183   AUDIO_CHANNEL_IN_LEFT = 0x4u,
184   AUDIO_CHANNEL_IN_RIGHT = 0x8u,
185   AUDIO_CHANNEL_IN_FRONT = 0x10u,
186   AUDIO_CHANNEL_IN_BACK = 0x20u,
187   AUDIO_CHANNEL_IN_LEFT_PROCESSED = 0x40u,
188   AUDIO_CHANNEL_IN_RIGHT_PROCESSED = 0x80u,
189   AUDIO_CHANNEL_IN_FRONT_PROCESSED = 0x100u,
190   AUDIO_CHANNEL_IN_BACK_PROCESSED = 0x200u,
191   AUDIO_CHANNEL_IN_PRESSURE = 0x400u,
192   AUDIO_CHANNEL_IN_X_AXIS = 0x800u,
193   AUDIO_CHANNEL_IN_Y_AXIS = 0x1000u,
194   AUDIO_CHANNEL_IN_Z_AXIS = 0x2000u,
195   AUDIO_CHANNEL_IN_BACK_LEFT = 0x10000u,
196   AUDIO_CHANNEL_IN_BACK_RIGHT = 0x20000u,
197   AUDIO_CHANNEL_IN_CENTER = 0x40000u,
198   AUDIO_CHANNEL_IN_LOW_FREQUENCY = 0x100000u,
199   AUDIO_CHANNEL_IN_TOP_LEFT = 0x200000u,
200   AUDIO_CHANNEL_IN_TOP_RIGHT = 0x400000u,
201   AUDIO_CHANNEL_IN_VOICE_UPLINK = 0x4000u,
202   AUDIO_CHANNEL_IN_VOICE_DNLINK = 0x8000u,
203   AUDIO_CHANNEL_IN_MONO = 0x10u,        // IN_FRONT
204   AUDIO_CHANNEL_IN_STEREO = 0xCu,       // IN_LEFT | IN_RIGHT
205   AUDIO_CHANNEL_IN_FRONT_BACK = 0x30u,  // IN_FRONT | IN_BACK
206   AUDIO_CHANNEL_IN_6 = 0xFCu,  // IN_LEFT | IN_RIGHT | IN_FRONT | IN_BACK |
207                                // IN_LEFT_PROCESSED | IN_RIGHT_PROCESSED
208   AUDIO_CHANNEL_IN_2POINT0POINT2 =
209       0x60000Cu,  // IN_LEFT | IN_RIGHT | IN_TOP_LEFT | IN_TOP_RIGHT
210   AUDIO_CHANNEL_IN_2POINT1POINT2 =
211       0x70000Cu,  // IN_LEFT | IN_RIGHT | IN_TOP_LEFT | IN_TOP_RIGHT |
212                   // IN_LOW_FREQUENCY
213   AUDIO_CHANNEL_IN_3POINT0POINT2 =
214       0x64000Cu,  // IN_LEFT | IN_CENTER | IN_RIGHT | IN_TOP_LEFT | IN_TOP_RIGHT
215   AUDIO_CHANNEL_IN_3POINT1POINT2 =
216       0x74000Cu,  // IN_LEFT | IN_CENTER | IN_RIGHT | IN_TOP_LEFT | IN_TOP_RIGHT
217                   // | IN_LOW_FREQUENCY
218   AUDIO_CHANNEL_IN_5POINT1 =
219       0x17000Cu,  // IN_LEFT | IN_CENTER | IN_RIGHT | IN_BACK_LEFT |
220                   // IN_BACK_RIGHT | IN_LOW_FREQUENCY
221   AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO = 0x4010u,  // IN_VOICE_UPLINK | IN_MONO
222   AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO = 0x8010u,  // IN_VOICE_DNLINK | IN_MONO
223   AUDIO_CHANNEL_IN_VOICE_CALL_MONO =
224       0xC010u,  // IN_VOICE_UPLINK_MONO | IN_VOICE_DNLINK_MONO
225 
226   AUDIO_CHANNEL_COUNT_MAX = 30u,
227   AUDIO_CHANNEL_INDEX_HDR = 0x80000000u,  // REPRESENTATION_INDEX << COUNT_MAX
228   AUDIO_CHANNEL_INDEX_MASK_1 = 0x80000001u,   // INDEX_HDR | (1 << 1) - 1
229   AUDIO_CHANNEL_INDEX_MASK_2 = 0x80000003u,   // INDEX_HDR | (1 << 2) - 1
230   AUDIO_CHANNEL_INDEX_MASK_3 = 0x80000007u,   // INDEX_HDR | (1 << 3) - 1
231   AUDIO_CHANNEL_INDEX_MASK_4 = 0x8000000Fu,   // INDEX_HDR | (1 << 4) - 1
232   AUDIO_CHANNEL_INDEX_MASK_5 = 0x8000001Fu,   // INDEX_HDR | (1 << 5) - 1
233   AUDIO_CHANNEL_INDEX_MASK_6 = 0x8000003Fu,   // INDEX_HDR | (1 << 6) - 1
234   AUDIO_CHANNEL_INDEX_MASK_7 = 0x8000007Fu,   // INDEX_HDR | (1 << 7) - 1
235   AUDIO_CHANNEL_INDEX_MASK_8 = 0x800000FFu,   // INDEX_HDR | (1 << 8) - 1
236   AUDIO_CHANNEL_INDEX_MASK_9 = 0x800001FFu,   // INDEX_HDR | (1 << 9) - 1
237   AUDIO_CHANNEL_INDEX_MASK_10 = 0x800003FFu,  // INDEX_HDR | (1 << 10) - 1
238   AUDIO_CHANNEL_INDEX_MASK_11 = 0x800007FFu,  // INDEX_HDR | (1 << 11) - 1
239   AUDIO_CHANNEL_INDEX_MASK_12 = 0x80000FFFu,  // INDEX_HDR | (1 << 12) - 1
240   AUDIO_CHANNEL_INDEX_MASK_13 = 0x80001FFFu,  // INDEX_HDR | (1 << 13) - 1
241   AUDIO_CHANNEL_INDEX_MASK_14 = 0x80003FFFu,  // INDEX_HDR | (1 << 14) - 1
242   AUDIO_CHANNEL_INDEX_MASK_15 = 0x80007FFFu,  // INDEX_HDR | (1 << 15) - 1
243   AUDIO_CHANNEL_INDEX_MASK_16 = 0x8000FFFFu,  // INDEX_HDR | (1 << 16) - 1
244   AUDIO_CHANNEL_INDEX_MASK_17 = 0x8001FFFFu,  // INDEX_HDR | (1 << 17) - 1
245   AUDIO_CHANNEL_INDEX_MASK_18 = 0x8003FFFFu,  // INDEX_HDR | (1 << 18) - 1
246   AUDIO_CHANNEL_INDEX_MASK_19 = 0x8007FFFFu,  // INDEX_HDR | (1 << 19) - 1
247   AUDIO_CHANNEL_INDEX_MASK_20 = 0x800FFFFFu,  // INDEX_HDR | (1 << 20) - 1
248   AUDIO_CHANNEL_INDEX_MASK_21 = 0x801FFFFFu,  // INDEX_HDR | (1 << 21) - 1
249   AUDIO_CHANNEL_INDEX_MASK_22 = 0x803FFFFFu,  // INDEX_HDR | (1 << 22) - 1
250   AUDIO_CHANNEL_INDEX_MASK_23 = 0x807FFFFFu,  // INDEX_HDR | (1 << 23) - 1
251   AUDIO_CHANNEL_INDEX_MASK_24 = 0x80FFFFFFu,  // INDEX_HDR | (1 << 24) - 1
252 } audio_channel_mask_t;
253 
254 /** Metadata of a playback track for an in stream. */
255 typedef struct playback_track_metadata {
256   audio_usage_t usage;
257   audio_content_type_t content_type;
258   float gain;  // Normalized linear volume. 0=silence, 1=0dbfs...
259 } playback_track_metadata_t;
260 
261 /** Metadata of a record track for an out stream. */
262 typedef struct record_track_metadata {
263   audio_source_t source;
264   float gain;  // Normalized linear volume. 0=silence, 1=0dbfs...
265                // For record tracks originating from a software patch, the
266                // dest_device fields provide information about the downstream
267                // device.
268   audio_devices_t dest_device;
269   char dest_device_address[AUDIO_DEVICE_MAX_ADDRESS_LEN];
270 } record_track_metadata_t;
271 
272 typedef struct source_metadata {
273   size_t track_count;
274   /** Array of metadata of each track connected to this source. */
275   struct playback_track_metadata* tracks;
276 } source_metadata_t;
277 
278 typedef struct sink_metadata {
279   size_t track_count;
280   /** Array of metadata of each track connected to this sink. */
281   struct record_track_metadata* tracks;
282 } sink_metadata_t;
283 
284 /** Metadata of a playback track for an in stream. */
285 typedef struct playback_track_metadata_v7 {
286   struct playback_track_metadata base;
287   audio_channel_mask_t channel_mask;
288   char tags[AUDIO_ATTRIBUTES_TAGS_MAX_SIZE]; /* UTF8 */
289 } playback_track_metadata_v7_t;
290 
291 /** Metadata of a record track for an out stream. */
292 typedef struct record_track_metadata_v7 {
293   struct record_track_metadata base;
294   audio_channel_mask_t channel_mask;
295   char tags[AUDIO_ATTRIBUTES_TAGS_MAX_SIZE]; /* UTF8 */
296 } record_track_metadata_v7_t;
297 
298 /* HAL version 3.2 and higher only. */
299 typedef struct source_metadata_v7 {
300   size_t track_count;
301   /** Array of metadata of each track connected to this source. */
302   struct playback_track_metadata_v7* tracks;
303 } source_metadata_v7_t;
304 
305 /* HAL version 3.2 and higher only. */
306 typedef struct sink_metadata_v7 {
307   size_t track_count;
308   /** Array of metadata of each track connected to this sink. */
309   struct record_track_metadata_v7* tracks;
310 } sink_metadata_v7_t;
311