1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.voicedialer;
18 
19 import android.app.Activity;
20 import android.content.Intent;
21 import android.speech.srec.MicrophoneInputStream;
22 import android.speech.srec.Recognizer;
23 import android.speech.srec.WaveHeader;
24 import android.util.Log;
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.ArrayList;
30 
31 /**
32  * This class is a framework for recognizing speech.  It must be extended to use.
33  * The child class must timplement setupGrammar and onRecognitionSuccess.
34  * A usage cycle is as follows:
35  * <ul>
36  * <li>Create with a reference to the {@link VoiceDialerActivity}.
37  * <li>Signal the user to start speaking with the Vibrator or beep.
38  * <li>Start audio input by creating a {@link MicrophoneInputStream}.
39  * <li>Create and configure a {@link Recognizer}.
40  * <li>Set up the grammar using setupGrammar.
41  * <li>Start the {@link Recognizer} running using data already being
42  * collected by the microphone.
43  * <li>Wait for the {@link Recognizer} to complete.
44  * <li>Process the results using onRecognitionSuccess, which will pass
45  * a list of intents to the {@RecogizerClient}.
46  * <li>Shut down and clean up.
47  * </ul>
48  * Notes:
49  * <ul>
50  * <li>Audio many be read from a file.
51  * <li>A directory tree of audio files may be stepped through.
52  * <li>A contact list may be read from a file.
53  * <li>A {@link RecognizerLogger} may generate a set of log files from
54  * a recognition session.
55  * <li>A static instance of this class is held and reused by the
56  * {@link VoiceDialerActivity}, which saves setup time.
57  * </ul>
58  */
59 abstract public class RecognizerEngine {
60 
61     protected static final String TAG = "RecognizerEngine";
62 
63     protected static final String ACTION_RECOGNIZER_RESULT =
64             "com.android.voicedialer.ACTION_RECOGNIZER_RESULT";
65     public static final String SENTENCE_EXTRA = "sentence";
66     public static final String SEMANTIC_EXTRA = "semantic";
67 
68     protected final String SREC_DIR = Recognizer.getConfigDir(null);
69 
70     protected static final String OPEN_ENTRIES = "openentries.txt";
71 
72     protected static final int RESULT_LIMIT = 5;
73 
74     protected Activity mActivity;
75     protected Recognizer mSrec;
76     protected Recognizer.Grammar mSrecGrammar;
77     protected RecognizerLogger mLogger;
78     protected int mSampleRate;
79 
80     /**
81      * Constructor.
82      */
RecognizerEngine()83     public RecognizerEngine() {
84         mSampleRate = 0;
85     }
86 
setupGrammar()87     abstract protected void setupGrammar() throws IOException, InterruptedException;
88 
onRecognitionSuccess(RecognizerClient recognizerClient)89     abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient)
90             throws InterruptedException;
91 
92     /**
93      * Start the recognition process.
94      *
95      * <ul>
96      * <li>Create and start the microphone.
97      * <li>Create a Recognizer.
98      * <li>set up the grammar (implementation is in child class)
99      * <li>Start the Recognizer.
100      * <li>Feed the Recognizer audio until it provides a result.
101      * <li>Build a list of Intents corresponding to the results. (implementation
102      * is in child class)
103      * <li>Stop the microphone.
104      * <li>Stop the Recognizer.
105      * </ul>
106      *
107      * @param recognizerClient client to be given the results
108      * @param activity the Activity this recognition is being run from.
109      * @param micFile optional audio input from this file, or directory tree.
110      * @param sampleRate the same rate coming from the mic or micFile
111      */
recognize(RecognizerClient recognizerClient, Activity activity, File micFile, int sampleRate)112     public void recognize(RecognizerClient recognizerClient, Activity activity,
113             File micFile, int sampleRate) {
114         InputStream mic = null;
115         boolean recognizerStarted = false;
116         try {
117             mActivity = activity;
118             // set up logger
119             mLogger = null;
120             if (RecognizerLogger.isEnabled(mActivity)) {
121                 mLogger = new RecognizerLogger(mActivity);
122             }
123 
124             if (mSampleRate != sampleRate) {
125                 // sample rate has changed since we last used this recognizerEngine.
126                 // destroy the grammar and regenerate.
127                 if (mSrecGrammar != null) {
128                     mSrecGrammar.destroy();
129                 }
130                 mSrecGrammar = null;
131                 mSampleRate = sampleRate;
132             }
133 
134             // create a new recognizer
135             if (false) Log.d(TAG, "start new Recognizer");
136             if (mSrec == null) {
137                 String parFilePath = SREC_DIR + "/baseline11k.par";
138                 if (sampleRate == 8000) {
139                     parFilePath = SREC_DIR + "/baseline8k.par";
140                 }
141                 mSrec = new Recognizer(parFilePath);
142             }
143 
144             // start audio input
145             if (micFile != null) {
146                 if (false) Log.d(TAG, "using mic file");
147                 mic = new FileInputStream(micFile);
148                 WaveHeader hdr = new WaveHeader();
149                 hdr.read(mic);
150             } else {
151                 if (false) Log.d(TAG, "start new MicrophoneInputStream");
152                 mic = new MicrophoneInputStream(sampleRate, sampleRate * 15);
153             }
154 
155             // notify UI
156             recognizerClient.onMicrophoneStart(mic);
157 
158             // log audio if requested
159             if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate);
160 
161             setupGrammar();
162 
163             // start the recognition process
164             if (false) Log.d(TAG, "start mSrec.start");
165             mSrec.start();
166             recognizerStarted = true;
167 
168             // recognize
169             while (true) {
170                 if (Thread.interrupted()) throw new InterruptedException();
171                 int event = mSrec.advance();
172                 if (event != Recognizer.EVENT_INCOMPLETE &&
173                         event != Recognizer.EVENT_NEED_MORE_AUDIO) {
174                     Log.d(TAG, "start advance()=" +
175                             Recognizer.eventToString(event) +
176                             " avail " + mic.available());
177                 }
178                 switch (event) {
179                 case Recognizer.EVENT_INCOMPLETE:
180                 case Recognizer.EVENT_STARTED:
181                 case Recognizer.EVENT_START_OF_VOICING:
182                 case Recognizer.EVENT_END_OF_VOICING:
183                     continue;
184                 case Recognizer.EVENT_RECOGNITION_RESULT:
185                     onRecognitionSuccess(recognizerClient);
186                     break;
187                 case Recognizer.EVENT_NEED_MORE_AUDIO:
188                     mSrec.putAudio(mic);
189                     continue;
190                 default:
191                     Log.d(TAG, "unknown event " + event);
192                     recognizerClient.onRecognitionFailure(Recognizer.eventToString(event));
193                     break;
194                 }
195                 break;
196             }
197 
198         } catch (InterruptedException e) {
199             if (false) Log.d(TAG, "start interrupted " + e);
200             recognizerClient.onRecognitionError(e.toString());
201         } catch (IOException e) {
202             if (false) Log.d(TAG, "start new Srec failed " + e);
203             recognizerClient.onRecognitionError(e.toString());
204         } catch (Exception e) {
205             if (false) Log.d(TAG, "exception " + e);
206             recognizerClient.onRecognitionError(e.toString());
207         } finally {
208             if (false) Log.d(TAG, "start mSrec.stop");
209             if (mSrec != null && recognizerStarted) mSrec.stop();
210 
211             // stop microphone
212             try {
213                 if (mic != null) mic.close();
214             }
215             catch (IOException ex) {
216                 if (false) Log.d(TAG, "start - mic.close failed - " + ex);
217             }
218             mic = null;
219 
220             // close logger
221             try {
222                 if (mLogger != null) mLogger.close();
223             }
224             catch (IOException ex) {
225                 if (false) Log.d(TAG, "start - mLoggger.close failed - " + ex);
226             }
227             mLogger = null;
228         }
229         if (false) Log.d(TAG, "start bye");
230     }
231 
addIntent(ArrayList<Intent> intents, Intent intent)232     protected static void addIntent(ArrayList<Intent> intents, Intent intent) {
233         for (Intent in : intents) {
234             if (in.getAction() != null &&
235                     in.getAction().equals(intent.getAction()) &&
236                     in.getData() != null &&
237                     in.getData().equals(intent.getData())) {
238                 return;
239             }
240         }
241         intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK);
242         intents.add(intent);
243     }
244 }
245