1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.example.android.voiceinteractor; 18 19 import static android.service.voice.AlwaysOnHotwordDetector.STATE_HARDWARE_UNAVAILABLE; 20 import static android.service.voice.AlwaysOnHotwordDetector.STATE_KEYPHRASE_ENROLLED; 21 import static android.service.voice.AlwaysOnHotwordDetector.STATE_KEYPHRASE_UNENROLLED; 22 23 import android.content.ComponentName; 24 import android.content.Intent; 25 import android.media.AudioAttributes; 26 import android.media.AudioFormat; 27 import android.media.AudioRecord; 28 import android.media.MediaRecorder; 29 import android.os.Binder; 30 import android.os.Bundle; 31 import android.os.IBinder; 32 import android.os.Trace; 33 import android.service.voice.AlwaysOnHotwordDetector; 34 import android.service.voice.AlwaysOnHotwordDetector.EventPayload; 35 import android.service.voice.HotwordDetector; 36 import android.service.voice.HotwordRejectedResult; 37 import android.service.voice.SandboxedDetectionInitializer; 38 import android.service.voice.VisualQueryDetectionServiceFailure; 39 import android.service.voice.VisualQueryDetector; 40 import android.service.voice.VoiceInteractionService; 41 import android.util.Log; 42 43 import androidx.annotation.NonNull; 44 45 import java.time.Duration; 46 import java.util.Locale; 47 import java.util.concurrent.Executors; 48 49 public class SampleVoiceInteractionService extends VoiceInteractionService { 50 public static final String DSP_MODEL_KEYPHRASE = "X Google"; 51 private static final String TAG = "VIS"; 52 53 // AudioRecord config 54 private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(5); 55 private static final Duration AUDIO_READ_DURATION = Duration.ofSeconds(3); 56 57 // DSP model config 58 private static final Locale DSP_MODEL_LOCALE = Locale.US; 59 60 private final IBinder binder = new LocalBinder(); 61 62 HotwordDetector mHotwordDetector; 63 VisualQueryDetector mVisualQueryDetector; 64 Callback mHotwordDetectorCallback; 65 VisualQueryDetector.Callback mVisualQueryDetectorCallback; 66 Bundle mData = new Bundle(); 67 AudioFormat mAudioFormat; 68 EventPayload mLastPayload; 69 createAudioRecord(EventPayload eventPayload, int bytesPerSecond)70 private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) { 71 int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond, 72 AUDIO_RECORD_BUFFER_DURATION.getSeconds()); 73 Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize 74 + ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond)); 75 return new AudioRecord.Builder() 76 .setAudioAttributes( 77 new AudioAttributes.Builder() 78 .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) 79 .build()) 80 .setAudioFormat(eventPayload.getCaptureAudioFormat()) 81 .setBufferSizeInBytes(audioRecordBufferSize) 82 .setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent()) 83 .build(); 84 } 85 getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds)86 private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) { 87 return (int) (bytesPerSecond * bufferLengthSeconds); 88 } 89 90 @Override onBind(Intent intent)91 public IBinder onBind(Intent intent) { 92 if ("local".equals(intent.getAction())) { 93 return binder; 94 } 95 return super.onBind(intent); 96 } 97 98 @Override onReady()99 public void onReady() { 100 super.onReady(); 101 Log.i(TAG, "onReady"); 102 mHotwordDetectorCallback = new Callback(); 103 mVisualQueryDetectorCallback = new VisualQueryDetectorCallback(); 104 mHotwordDetector = createAlwaysOnHotwordDetector(DSP_MODEL_KEYPHRASE, 105 DSP_MODEL_LOCALE, null, null, mHotwordDetectorCallback); 106 107 } 108 109 @Override onShutdown()110 public void onShutdown() { 111 super.onShutdown(); 112 Log.i(TAG, "onShutdown"); 113 } 114 115 public class LocalBinder extends Binder { getService()116 SampleVoiceInteractionService getService() { 117 // Return this instance of LocalService so clients can call public methods 118 return SampleVoiceInteractionService.this; 119 } 120 } 121 122 class VisualQueryDetectorCallback implements VisualQueryDetector.Callback { 123 @Override onQueryDetected(@onNull String partialQuery)124 public void onQueryDetected(@NonNull String partialQuery) { 125 Log.i(TAG, "VQD partial query detected: "+ partialQuery); 126 } 127 128 @Override onQueryRejected()129 public void onQueryRejected() { 130 Log.i(TAG, "VQD query rejected"); 131 } 132 133 @Override onQueryFinished()134 public void onQueryFinished() { 135 Log.i(TAG, "VQD query finished"); 136 } 137 138 @Override onVisualQueryDetectionServiceInitialized(int status)139 public void onVisualQueryDetectionServiceInitialized(int status) { 140 Log.i(TAG, "VQD init: "+ status); 141 if (status == SandboxedDetectionInitializer.INITIALIZATION_STATUS_SUCCESS) { 142 mVisualQueryDetector.startRecognition(); 143 } 144 } 145 146 @Override onVisualQueryDetectionServiceRestarted()147 public void onVisualQueryDetectionServiceRestarted() { 148 Log.i(TAG, "VQD restarted"); 149 mVisualQueryDetector.startRecognition(); 150 } 151 152 @Override onFailure( VisualQueryDetectionServiceFailure visualQueryDetectionServiceFailure)153 public void onFailure( 154 VisualQueryDetectionServiceFailure visualQueryDetectionServiceFailure) { 155 Log.i(TAG, "VQD onFailure visualQueryDetectionServiceFailure: " 156 + visualQueryDetectionServiceFailure); 157 } 158 159 @Override onUnknownFailure(String errorMessage)160 public void onUnknownFailure(String errorMessage) { 161 Log.i(TAG, "VQD onUnknownFailure errorMessage: " + errorMessage); 162 } 163 }; 164 165 class Callback extends AlwaysOnHotwordDetector.Callback { 166 167 private boolean mAvailable = false; 168 169 @Override onAvailabilityChanged(int status)170 public void onAvailabilityChanged(int status) { 171 Log.i(TAG, "onAvailabilityChanged: " + status); 172 if (status == STATE_HARDWARE_UNAVAILABLE) { 173 // adb shell dumpsys package com.example.android.voiceinteractor | grep HOTWO 174 Log.w( 175 TAG, 176 "Hotword hardware unavailable. You may need to pre-grant " 177 + "CAPTURE_AUDIO_HOTWORD to this app, grant record audio to the app" 178 + "in settings, and/or change the keyphrase " 179 + "to one supported by the device's default assistant."); 180 } 181 if (status == STATE_KEYPHRASE_UNENROLLED) { 182 Intent enrollIntent = null; 183 enrollIntent = ((AlwaysOnHotwordDetector) mHotwordDetector).createEnrollIntent(); 184 if (enrollIntent == null) { 185 Log.w(TAG, "No enroll intent found. Try enrolling the keyphrase using the" 186 + " device's default assistant."); 187 return; 188 } 189 ComponentName component = startForegroundService(enrollIntent); 190 Log.i(TAG, "Start enroll intent: " + component); 191 } 192 if (status == STATE_KEYPHRASE_ENROLLED) { 193 Log.i(TAG, "Keyphrase enrolled; ready to recognize."); 194 mAvailable = true; 195 } 196 } 197 198 @Override onRejected(@onNull HotwordRejectedResult result)199 public void onRejected(@NonNull HotwordRejectedResult result) { 200 mHotwordDetector.startRecognition(); 201 } 202 203 @Override onDetected(@onNull EventPayload eventPayload)204 public void onDetected(@NonNull EventPayload eventPayload) { 205 Trace.beginAsyncSection("VIS.onDetected", 0); 206 onDetected(eventPayload, false); 207 Trace.endAsyncSection("VIS.onDetected", 0); 208 } 209 onDetected(@onNull EventPayload eventPayload, boolean generateSessionId)210 public void onDetected(@NonNull EventPayload eventPayload, boolean generateSessionId) { 211 Log.i(TAG, "onDetected: " + eventPayload); 212 Log.i(TAG, "minBufferSize: " 213 + AudioRecord.getMinBufferSize( 214 eventPayload.getCaptureAudioFormat().getSampleRate(), 215 eventPayload.getCaptureAudioFormat().getChannelMask(), 216 eventPayload.getCaptureAudioFormat().getEncoding())); 217 218 int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate(); 219 int bytesPerSecond = 220 eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate; 221 222 Trace.beginAsyncSection("VIS.createAudioRecord", 1); 223 224 // For Non-trusted: 225 // Integer captureSession = 0; 226 // try { 227 // Method getCaptureSessionMethod = eventPayload.getClass().getMethod("getCaptureSession"); 228 // captureSession = (Integer) getCaptureSessionMethod.invoke(eventPayload); 229 // } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) { 230 // e.printStackTrace(); 231 // } 232 // int sessionId = generateSessionId ? 233 // AudioManager.AUDIO_SESSION_ID_GENERATE : captureSession; 234 // AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond, sessionId); 235 236 AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond); 237 Trace.endAsyncSection("VIS.createAudioRecord", 1); 238 if (record.getState() != AudioRecord.STATE_INITIALIZED) { 239 Trace.setCounter("VIS AudioRecord.getState", 240 record.getState()); 241 Log.e(TAG, "Failed to init first AudioRecord."); 242 mHotwordDetector.startRecognition(); 243 return; 244 } 245 246 byte[] buffer = new byte[bytesPerSecond * (int) AUDIO_READ_DURATION.getSeconds()]; 247 Trace.beginAsyncSection("VIS.startRecording", 1); 248 record.startRecording(); 249 Trace.endAsyncSection("VIS.startRecording", 1); 250 Trace.beginAsyncSection("AudioUtils.read", 1); 251 int numBytes = AudioUtils.read(record, bytesPerSecond, AUDIO_READ_DURATION.getSeconds(), 252 buffer); 253 Trace.endAsyncSection("AudioUtils.read", 1); 254 255 // try { 256 // Thread.sleep(2000); 257 // } catch (InterruptedException e) { 258 // Thread.interrupted(); 259 // throw new RuntimeException(e); 260 // } 261 262 263 record.stop(); 264 record.release(); 265 266 Log.i(TAG, "numBytes=" + numBytes + " audioSeconds=" + numBytes * 1.0 / bytesPerSecond); 267 mData.putByteArray("1", buffer); 268 mAudioFormat = eventPayload.getCaptureAudioFormat(); 269 mLastPayload = eventPayload; 270 mHotwordDetector.startRecognition(); 271 } 272 273 @Override onError()274 public void onError() { 275 Log.i(TAG, "onError"); 276 mHotwordDetector.startRecognition(); 277 } 278 279 @Override onRecognitionPaused()280 public void onRecognitionPaused() { 281 Log.i(TAG, "onRecognitionPaused"); 282 } 283 284 @Override onRecognitionResumed()285 public void onRecognitionResumed() { 286 Log.i(TAG, "onRecognitionResumed"); 287 } 288 289 @Override onHotwordDetectionServiceInitialized(int status)290 public void onHotwordDetectionServiceInitialized(int status) { 291 Log.i(TAG, "onHotwordDetectionServiceInitialized: " + status 292 + ". mAvailable=" + mAvailable); 293 if (mAvailable) { 294 mHotwordDetector.startRecognition(); 295 } 296 //TODO(b/265535257): Provide two services independent lifecycle. 297 mVisualQueryDetector = createVisualQueryDetector(null, null, 298 Executors.newSingleThreadExecutor(), mVisualQueryDetectorCallback); 299 } 300 } 301 } 302