1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_NL_CLASSIFIER_H_
17 #define TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_NL_CLASSIFIER_H_
18 
19 #include <stddef.h>
20 #include <string.h>
21 
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "absl/status/status.h"
27 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
28 #include "tensorflow/lite/c/common.h"
29 #include "tensorflow/lite/core/api/op_resolver.h"
30 #include "tensorflow/lite/kernels/register.h"
31 #include "tensorflow/lite/string_type.h"
32 #include "tensorflow_lite_support/cc/common.h"
33 #include "tensorflow_lite_support/cc/port/statusor.h"
34 #include "tensorflow_lite_support/cc/task/core/base_task_api.h"
35 #include "tensorflow_lite_support/cc/task/core/category.h"
36 #include "tensorflow_lite_support/cc/text/tokenizers/regex_tokenizer.h"
37 
38 namespace tflite {
39 namespace task {
40 namespace text {
41 namespace nlclassifier {
42 
43 // Options to identify input and output tensors of the model
44 struct NLClassifierOptions {
45   int input_tensor_index = 0;
46   int output_score_tensor_index = 0;
47   // By default there is no output label tensor. The label file can be attached
48   // to the output score tensor metadata.
49   int output_label_tensor_index = -1;
50   std::string input_tensor_name = "INPUT";
51   std::string output_score_tensor_name = "OUTPUT_SCORE";
52   std::string output_label_tensor_name = "OUTPUT_LABEL";
53 };
54 
55 // Classifier API for NLClassification tasks, categorizes string into different
56 // classes.
57 //
58 // The API expects a TFLite model with the following input/output tensor:
59 // Input tensor:
60 //   (kTfLiteString) - input of the model, accepts a string.
61 //      or
62 //   (kTfLiteInt32) - input of the model, accepts a tokenized
63 //   indices of a string input. A RegexTokenizer needs to be set up in the input
64 //   tensor's metadata.
65 // Output score tensor:
66 //   (kTfLiteUInt8/kTfLiteInt8/kTfLiteInt16/kTfLiteFloat32/
67 //    kTfLiteFloat64/kTfLiteBool)
68 //    - output scores for each class, if type is one of the Int types,
69 //      dequantize it to double, if type is kTfLiteBool, convert the values to
70 //      0.0 and 1.0 respectively
71 //    - can have an optional associated file in metadata for labels, the file
72 //      should be a plain text file with one label per line, the number of
73 //      labels should match the number of categories the model outputs.
74 // Output label tensor: optional
75 //   (kTfLiteString/kTfLiteInt32)
76 //    - output classname for each class, should be of the same length with
77 //      scores. If this tensor is not present, the API uses score indices as
78 //      classnames.
79 //    - will be ignored if output score tensor already has an associated label
80 //      file.
81 //
82 // By default the API tries to find the input/output tensors with default
83 // configurations in NLClassifierOptions, with tensor name prioritized over
84 // tensor index. The option is configurable for different TFLite models.
85 class NLClassifier : public core::BaseTaskApi<std::vector<core::Category>,
86                                               const std::string&> {
87  public:
88   using BaseTaskApi::BaseTaskApi;
89 
90   // Creates a NLClassifier from TFLite model buffer.
91   static tflite::support::StatusOr<std::unique_ptr<NLClassifier>>
92   CreateFromBufferAndOptions(
93       const char* model_buffer_data, size_t model_buffer_size,
94       const NLClassifierOptions& options = {},
95       std::unique_ptr<tflite::OpResolver> resolver =
96           absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
97 
98   // Creates a NLClassifier from TFLite model file.
99   static tflite::support::StatusOr<std::unique_ptr<NLClassifier>>
100   CreateFromFileAndOptions(
101       const std::string& path_to_model, const NLClassifierOptions& options = {},
102       std::unique_ptr<tflite::OpResolver> resolver =
103           absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
104 
105   // Creates a NLClassifier from TFLite model file descriptor.
106   static tflite::support::StatusOr<std::unique_ptr<NLClassifier>>
107   CreateFromFdAndOptions(
108       int fd, const NLClassifierOptions& options = {},
109       std::unique_ptr<tflite::OpResolver> resolver =
110           absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
111 
112   // Performs classification on a string input, returns classified results.
113   std::vector<core::Category> Classify(const std::string& text);
114 
115  protected:
116   static constexpr int kOutputTensorIndex = 0;
117   static constexpr int kOutputTensorLabelFileIndex = 0;
118 
119   absl::Status Initialize(const NLClassifierOptions& options);
120   const NLClassifierOptions& GetOptions() const;
121 
122   // Try to extract attached label file from metadata and initialize
123   // labels_vector_, return error if metadata type is incorrect or no label file
124   // is attached in metadata.
125   absl::Status TrySetLabelFromMetadata(const TensorMetadata* metadata);
126 
127   // Pass through the input text into model's input tensor.
128   absl::Status Preprocess(const std::vector<TfLiteTensor*>& input_tensors,
129                           const std::string& input) override;
130 
131   // Extract model output and create results with output label tensor or label
132   // file attached in metadata. If no output label tensor or label file is
133   // found, use output score index as labels.
134   tflite::support::StatusOr<std::vector<core::Category>> Postprocess(
135       const std::vector<const TfLiteTensor*>& output_tensors,
136       const std::string& input) override;
137 
138   std::vector<core::Category> BuildResults(const TfLiteTensor* scores,
139                                            const TfLiteTensor* labels);
140 
141   // Gets the tensor from a vector of tensors by checking tensor name first and
142   // tensor index second, return nullptr if no tensor is found.
143   template <typename TensorType>
FindTensorWithNameOrIndex(const std::vector<TensorType * > & tensors,const flatbuffers::Vector<flatbuffers::Offset<TensorMetadata>> * metadata_array,const std::string & name,int index)144   static TensorType* FindTensorWithNameOrIndex(
145       const std::vector<TensorType*>& tensors,
146       const flatbuffers::Vector<flatbuffers::Offset<TensorMetadata>>*
147           metadata_array,
148       const std::string& name, int index) {
149     if (metadata_array != nullptr && metadata_array->size() == tensors.size()) {
150       for (int i = 0; i < metadata_array->size(); i++) {
151         if (strcmp(name.data(), metadata_array->Get(i)->name()->c_str()) == 0) {
152           return tensors[i];
153         }
154       }
155     }
156 
157     for (TensorType* tensor : tensors) {
158       if (tensor->name == name) {
159         return tensor;
160       }
161     }
162     return index >= 0 && index < tensors.size() ? tensors[index] : nullptr;
163   }
164 
165  private:
166   bool HasRegexTokenizerMetadata();
167   absl::Status SetupRegexTokenizer();
168 
169   NLClassifierOptions options_;
170   // labels vector initialized from output tensor's associated file, if one
171   // exists.
172   std::unique_ptr<std::vector<std::string>> labels_vector_;
173   std::unique_ptr<tflite::support::text::tokenizer::RegexTokenizer> tokenizer_;
174 };
175 
176 }  // namespace nlclassifier
177 }  // namespace text
178 }  // namespace task
179 }  // namespace tflite
180 
181 #endif  // TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_NL_CLASSIFIER_H_
182