1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.example.android.voiceinteractor;
18 
19 import static android.service.voice.AlwaysOnHotwordDetector.STATE_HARDWARE_UNAVAILABLE;
20 import static android.service.voice.AlwaysOnHotwordDetector.STATE_KEYPHRASE_ENROLLED;
21 import static android.service.voice.AlwaysOnHotwordDetector.STATE_KEYPHRASE_UNENROLLED;
22 
23 import android.content.ComponentName;
24 import android.content.Intent;
25 import android.media.AudioAttributes;
26 import android.media.AudioFormat;
27 import android.media.AudioRecord;
28 import android.media.MediaRecorder;
29 import android.os.Binder;
30 import android.os.Bundle;
31 import android.os.IBinder;
32 import android.os.Trace;
33 import android.service.voice.AlwaysOnHotwordDetector;
34 import android.service.voice.AlwaysOnHotwordDetector.EventPayload;
35 import android.service.voice.HotwordDetector;
36 import android.service.voice.HotwordRejectedResult;
37 import android.service.voice.SandboxedDetectionInitializer;
38 import android.service.voice.VisualQueryDetectionServiceFailure;
39 import android.service.voice.VisualQueryDetector;
40 import android.service.voice.VoiceInteractionService;
41 import android.util.Log;
42 
43 import androidx.annotation.NonNull;
44 
45 import java.time.Duration;
46 import java.util.Locale;
47 import java.util.concurrent.Executors;
48 
49 public class SampleVoiceInteractionService extends VoiceInteractionService {
50     public static final String DSP_MODEL_KEYPHRASE = "X Google";
51     private static final String TAG = "VIS";
52 
53     // AudioRecord config
54     private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(5);
55     private static final Duration AUDIO_READ_DURATION = Duration.ofSeconds(3);
56 
57     // DSP model config
58     private static final Locale DSP_MODEL_LOCALE = Locale.US;
59 
60     private final IBinder binder = new LocalBinder();
61 
62     HotwordDetector mHotwordDetector;
63     VisualQueryDetector mVisualQueryDetector;
64     Callback mHotwordDetectorCallback;
65     VisualQueryDetector.Callback mVisualQueryDetectorCallback;
66     Bundle mData = new Bundle();
67     AudioFormat mAudioFormat;
68     EventPayload mLastPayload;
69 
createAudioRecord(EventPayload eventPayload, int bytesPerSecond)70     private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) {
71         int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond,
72                 AUDIO_RECORD_BUFFER_DURATION.getSeconds());
73         Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize
74                 + ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond));
75         return new AudioRecord.Builder()
76                 .setAudioAttributes(
77                         new AudioAttributes.Builder()
78                                 .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
79                                 .build())
80                 .setAudioFormat(eventPayload.getCaptureAudioFormat())
81                 .setBufferSizeInBytes(audioRecordBufferSize)
82                 .setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent())
83                 .build();
84     }
85 
getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds)86     private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) {
87         return (int) (bytesPerSecond * bufferLengthSeconds);
88     }
89 
90     @Override
onBind(Intent intent)91     public IBinder onBind(Intent intent) {
92         if ("local".equals(intent.getAction())) {
93             return binder;
94         }
95         return super.onBind(intent);
96     }
97 
98     @Override
onReady()99     public void onReady() {
100         super.onReady();
101         Log.i(TAG, "onReady");
102         mHotwordDetectorCallback = new Callback();
103         mVisualQueryDetectorCallback = new VisualQueryDetectorCallback();
104         mHotwordDetector = createAlwaysOnHotwordDetector(DSP_MODEL_KEYPHRASE,
105                         DSP_MODEL_LOCALE, null, null, mHotwordDetectorCallback);
106 
107     }
108 
109     @Override
onShutdown()110     public void onShutdown() {
111         super.onShutdown();
112         Log.i(TAG, "onShutdown");
113     }
114 
115     public class LocalBinder extends Binder {
getService()116         SampleVoiceInteractionService getService() {
117             // Return this instance of LocalService so clients can call public methods
118             return SampleVoiceInteractionService.this;
119         }
120     }
121 
122     class VisualQueryDetectorCallback implements VisualQueryDetector.Callback {
123             @Override
onQueryDetected(@onNull String partialQuery)124             public void onQueryDetected(@NonNull String partialQuery) {
125                 Log.i(TAG, "VQD partial query detected: "+ partialQuery);
126             }
127 
128             @Override
onQueryRejected()129             public void onQueryRejected() {
130                 Log.i(TAG, "VQD query rejected");
131             }
132 
133             @Override
onQueryFinished()134             public void onQueryFinished() {
135                 Log.i(TAG, "VQD query finished");
136             }
137 
138             @Override
onVisualQueryDetectionServiceInitialized(int status)139             public void onVisualQueryDetectionServiceInitialized(int status) {
140                 Log.i(TAG, "VQD init: "+ status);
141                 if (status == SandboxedDetectionInitializer.INITIALIZATION_STATUS_SUCCESS) {
142                     mVisualQueryDetector.startRecognition();
143                 }
144             }
145 
146             @Override
onVisualQueryDetectionServiceRestarted()147             public void onVisualQueryDetectionServiceRestarted() {
148                 Log.i(TAG, "VQD restarted");
149                 mVisualQueryDetector.startRecognition();
150             }
151 
152         @Override
onFailure( VisualQueryDetectionServiceFailure visualQueryDetectionServiceFailure)153         public void onFailure(
154                 VisualQueryDetectionServiceFailure visualQueryDetectionServiceFailure) {
155             Log.i(TAG, "VQD onFailure visualQueryDetectionServiceFailure: "
156                     + visualQueryDetectionServiceFailure);
157         }
158 
159         @Override
onUnknownFailure(String errorMessage)160         public void onUnknownFailure(String errorMessage) {
161             Log.i(TAG, "VQD onUnknownFailure errorMessage: " + errorMessage);
162         }
163         };
164 
165     class Callback extends AlwaysOnHotwordDetector.Callback {
166 
167         private boolean mAvailable = false;
168 
169         @Override
onAvailabilityChanged(int status)170         public void onAvailabilityChanged(int status) {
171             Log.i(TAG, "onAvailabilityChanged: " + status);
172             if (status == STATE_HARDWARE_UNAVAILABLE) {
173                 // adb shell dumpsys package com.example.android.voiceinteractor | grep HOTWO
174                 Log.w(
175                         TAG,
176                         "Hotword hardware unavailable. You may need to pre-grant "
177                                 + "CAPTURE_AUDIO_HOTWORD to this app, grant record audio to the app"
178                                 + "in settings, and/or change the keyphrase "
179                                 + "to one supported by the device's default assistant.");
180             }
181             if (status == STATE_KEYPHRASE_UNENROLLED) {
182                 Intent enrollIntent = null;
183                 enrollIntent = ((AlwaysOnHotwordDetector) mHotwordDetector).createEnrollIntent();
184                 if (enrollIntent == null) {
185                     Log.w(TAG, "No enroll intent found. Try enrolling the keyphrase using the"
186                             + " device's default assistant.");
187                     return;
188                 }
189                 ComponentName component = startForegroundService(enrollIntent);
190                 Log.i(TAG, "Start enroll intent: " + component);
191             }
192             if (status == STATE_KEYPHRASE_ENROLLED) {
193                 Log.i(TAG, "Keyphrase enrolled; ready to recognize.");
194                 mAvailable = true;
195             }
196         }
197 
198         @Override
onRejected(@onNull HotwordRejectedResult result)199         public void onRejected(@NonNull HotwordRejectedResult result) {
200             mHotwordDetector.startRecognition();
201         }
202 
203         @Override
onDetected(@onNull EventPayload eventPayload)204         public void onDetected(@NonNull EventPayload eventPayload) {
205             Trace.beginAsyncSection("VIS.onDetected", 0);
206             onDetected(eventPayload, false);
207             Trace.endAsyncSection("VIS.onDetected", 0);
208         }
209 
onDetected(@onNull EventPayload eventPayload, boolean generateSessionId)210         public void onDetected(@NonNull EventPayload eventPayload, boolean generateSessionId) {
211             Log.i(TAG, "onDetected: " + eventPayload);
212             Log.i(TAG, "minBufferSize: "
213                     + AudioRecord.getMinBufferSize(
214                     eventPayload.getCaptureAudioFormat().getSampleRate(),
215                     eventPayload.getCaptureAudioFormat().getChannelMask(),
216                     eventPayload.getCaptureAudioFormat().getEncoding()));
217 
218             int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate();
219             int bytesPerSecond =
220                     eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate;
221 
222             Trace.beginAsyncSection("VIS.createAudioRecord", 1);
223 
224             // For Non-trusted:
225 //            Integer captureSession = 0;
226 //            try {
227 //                Method getCaptureSessionMethod = eventPayload.getClass().getMethod("getCaptureSession");
228 //                captureSession = (Integer) getCaptureSessionMethod.invoke(eventPayload);
229 //            } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
230 //                e.printStackTrace();
231 //            }
232 //            int sessionId = generateSessionId ?
233 //                    AudioManager.AUDIO_SESSION_ID_GENERATE : captureSession;
234 //            AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond, sessionId);
235 
236             AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond);
237             Trace.endAsyncSection("VIS.createAudioRecord", 1);
238             if (record.getState() != AudioRecord.STATE_INITIALIZED) {
239                 Trace.setCounter("VIS AudioRecord.getState",
240                         record.getState());
241                 Log.e(TAG, "Failed to init first AudioRecord.");
242                 mHotwordDetector.startRecognition();
243                 return;
244             }
245 
246             byte[] buffer = new byte[bytesPerSecond * (int) AUDIO_READ_DURATION.getSeconds()];
247             Trace.beginAsyncSection("VIS.startRecording", 1);
248             record.startRecording();
249             Trace.endAsyncSection("VIS.startRecording", 1);
250             Trace.beginAsyncSection("AudioUtils.read", 1);
251             int numBytes = AudioUtils.read(record, bytesPerSecond, AUDIO_READ_DURATION.getSeconds(),
252                     buffer);
253             Trace.endAsyncSection("AudioUtils.read", 1);
254 
255 //            try {
256 //                Thread.sleep(2000);
257 //            } catch (InterruptedException e) {
258 //                Thread.interrupted();
259 //                throw new RuntimeException(e);
260 //            }
261 
262 
263             record.stop();
264             record.release();
265 
266             Log.i(TAG, "numBytes=" + numBytes + " audioSeconds=" + numBytes * 1.0 / bytesPerSecond);
267             mData.putByteArray("1", buffer);
268             mAudioFormat = eventPayload.getCaptureAudioFormat();
269             mLastPayload = eventPayload;
270             mHotwordDetector.startRecognition();
271         }
272 
273         @Override
onError()274         public void onError() {
275             Log.i(TAG, "onError");
276             mHotwordDetector.startRecognition();
277         }
278 
279         @Override
onRecognitionPaused()280         public void onRecognitionPaused() {
281             Log.i(TAG, "onRecognitionPaused");
282         }
283 
284         @Override
onRecognitionResumed()285         public void onRecognitionResumed() {
286             Log.i(TAG, "onRecognitionResumed");
287         }
288 
289         @Override
onHotwordDetectionServiceInitialized(int status)290         public void onHotwordDetectionServiceInitialized(int status) {
291             Log.i(TAG, "onHotwordDetectionServiceInitialized: " + status
292                     + ". mAvailable=" + mAvailable);
293             if (mAvailable) {
294                 mHotwordDetector.startRecognition();
295             }
296             //TODO(b/265535257): Provide two services independent lifecycle.
297             mVisualQueryDetector = createVisualQueryDetector(null, null,
298                 Executors.newSingleThreadExecutor(), mVisualQueryDetectorCallback);
299         }
300     }
301 }
302