1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ 17 #define TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ 18 19 #include <deque> 20 #include <unordered_set> 21 #include <vector> 22 23 #include "tensorflow/core/framework/tensor.h" 24 #include "tensorflow/core/platform/types.h" 25 26 namespace tensorflow { 27 28 // This class is designed to apply a very primitive decoding model on top of the 29 // instantaneous results from running an audio recognition model on a single 30 // window of samples. It applies smoothing over time so that noisy individual 31 // label scores are averaged, increasing the confidence that apparent matches 32 // are real. 33 // To use it, you should create a class object with the configuration you 34 // want, and then feed results from running a TensorFlow model into the 35 // processing method. The timestamp for each subsequent call should be 36 // increasing from the previous, since the class is designed to process a stream 37 // of data over time. 38 class RecognizeCommands { 39 public: 40 // labels should be a list of the strings associated with each one-hot score. 41 // The window duration controls the smoothing. Longer durations will give a 42 // higher confidence that the results are correct, but may miss some commands. 43 // The detection threshold has a similar effect, with high values increasing 44 // the precision at the cost of recall. The minimum count controls how many 45 // results need to be in the averaging window before it's seen as a reliable 46 // average. This prevents erroneous results when the averaging window is 47 // initially being populated for example. The suppression argument disables 48 // further recognitions for a set time after one has been triggered, which can 49 // help reduce spurious recognitions. 50 explicit RecognizeCommands(const std::vector<string>& labels, 51 int32 average_window_duration_ms = 1000, 52 float detection_threshold = 0.2, 53 int32 suppression_ms = 500, 54 int32 minimum_count = 3); 55 56 // Call this with the results of running a model on sample data. 57 Status ProcessLatestResults(const Tensor& latest_results, 58 const int64 current_time_ms, 59 string* found_command, float* score, 60 bool* is_new_command); 61 62 private: 63 // Configuration 64 std::vector<string> labels_; 65 int32 average_window_duration_ms_; 66 float detection_threshold_; 67 int32 suppression_ms_; 68 int32 minimum_count_; 69 70 // Working variables 71 std::deque<std::pair<int64, Tensor>> previous_results_; 72 string previous_top_label_; 73 int64 labels_count_; 74 int64 previous_top_label_time_; 75 }; 76 77 } // namespace tensorflow 78 79 #endif // TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_ 80