1/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16syntax = "proto2";
17
18package tflite.evaluation;
19
20import "tensorflow/lite/tools/evaluation/proto/preprocessing_steps.proto";
21
22option cc_enable_arenas = true;
23option java_multiple_files = true;
24option java_package = "tflite.evaluation";
25
26// Defines the functionality executed by an EvaluationStage.
27//
28// Next ID: 7
29message ProcessSpecification {
30  oneof params {
31    ImagePreprocessingParams image_preprocessing_params = 1;
32    TopkAccuracyEvalParams topk_accuracy_eval_params = 2;
33    TfliteInferenceParams tflite_inference_params = 3;
34    ImageClassificationParams image_classification_params = 4;
35    ObjectDetectionAveragePrecisionParams
36        object_detection_average_precision_params = 5;
37    ObjectDetectionParams object_detection_params = 6;
38  }
39}
40
41// Latency numbers in microseconds, based on all EvaluationStage::Run() calls so
42// far.
43//
44// Next ID: 7
45message LatencyMetrics {
46  // Latency for the last Run.
47  optional int64 last_us = 1;
48  // Maximum latency observed for any Run.
49  optional int64 max_us = 2;
50  // Minimum latency observed for any Run.
51  optional int64 min_us = 3;
52  // Sum of all Run latencies.
53  optional int64 sum_us = 4;
54  // Average latency across all Runs.
55  optional double avg_us = 5;
56  // Standard deviation for latency across all Runs.
57  optional int64 std_deviation_us = 6;
58}
59
60// Statistics for an accuracy value over multiple runs of evaluation.
61//
62// Next ID: 5
63message AccuracyMetrics {
64  // Maximum value observed for any Run.
65  optional float max_value = 1;
66  // Minimum value observed for any Run.
67  optional float min_value = 2;
68  // Average value across all Runs.
69  optional double avg_value = 3;
70  // Standard deviation across all Runs.
71  optional float std_deviation = 4;
72}
73
74// Contains process-specific metrics, which may differ based on what an
75// EvaluationStage does.
76//
77// Next ID: 8
78message ProcessMetrics {
79  optional LatencyMetrics total_latency = 1;
80
81  oneof stage_metrics {
82    TopkAccuracyEvalMetrics topk_accuracy_metrics = 2;
83    TfliteInferenceMetrics tflite_inference_metrics = 3;
84    ImageClassificationMetrics image_classification_metrics = 4;
85    InferenceProfilerMetrics inference_profiler_metrics = 5;
86    ObjectDetectionAveragePrecisionMetrics
87        object_detection_average_precision_metrics = 6;
88    ObjectDetectionMetrics object_detection_metrics = 7;
89  }
90}
91
92// Parameters that define how images are preprocessed.
93//
94// Next ID: 3
95message ImagePreprocessingParams {
96  // Required.
97  repeated ImagePreprocessingStepParams steps = 1;
98  // Same as tflite::TfLiteType.
99  required int32 output_type = 2;
100}
101
102// Parameters that control TFLite inference.
103//
104// Next ID: 5
105message TfliteInferenceParams {
106  // Required
107  optional string model_file_path = 1;
108
109  enum Delegate {
110    NONE = 0;
111    NNAPI = 1;
112    GPU = 2;
113    HEXAGON = 3;
114    XNNPACK = 4;
115  }
116  optional Delegate delegate = 2;
117  // Number of threads available to the TFLite Interpreter.
118  optional int32 num_threads = 3 [default = 1];
119
120  // Defines how many times the TFLite Interpreter is invoked for every input.
121  // This helps benchmark cases where extensive pre-processing might not be
122  // required for every input.
123  optional int32 invocations_per_run = 4 [default = 1];
124}
125
126// Metrics specific to TFLite inference.
127//
128// Next ID: 2
129message TfliteInferenceMetrics {
130  // Number of times the interpreter is invoked.
131  optional int32 num_inferences = 1;
132}
133
134// Parameters that define how top-K accuracy is evaluated.
135//
136// Next ID: 2
137message TopkAccuracyEvalParams {
138  // Required.
139  optional int32 k = 1;
140}
141
142// Metrics from top-K accuracy evaluation.
143//
144// Next ID: 2
145message TopkAccuracyEvalMetrics {
146  // A repeated field of size |k| where the ith element denotes the fraction of
147  // samples for which the correct label was present in the top (i + 1) model
148  // outputs.
149  // For example, topk_accuracies(1) will contain the fraction of
150  // samples for which the model returned the correct label as the top first or
151  // second output.
152  repeated float topk_accuracies = 1;
153}
154
155// Parameters that define how the Image Classification task is evaluated
156// end-to-end.
157//
158// Next ID: 3
159message ImageClassificationParams {
160  // Required.
161  // TfLite model should have 1 input & 1 output tensor.
162  // Input shape: {1, image_height, image_width, 3}
163  // Output shape: {1, num_total_labels}
164  optional TfliteInferenceParams inference_params = 1;
165
166  // Optional.
167  // If not set, accuracy evaluation is not performed.
168  optional TopkAccuracyEvalParams topk_accuracy_eval_params = 2;
169}
170
171// Metrics from evaluation of the image classification task.
172//
173// Next ID: 5
174message ImageClassificationMetrics {
175  optional LatencyMetrics pre_processing_latency = 1;
176  optional LatencyMetrics inference_latency = 2;
177  optional TfliteInferenceMetrics inference_metrics = 3;
178  // Not set if topk_accuracy_eval_params was not populated in
179  // ImageClassificationParams.
180  optional TopkAccuracyEvalMetrics topk_accuracy_metrics = 4;
181}
182
183// Metrics computed from comparing TFLite execution in two settings:
184// 1. User-defined TfliteInferenceParams (The 'test' setting)
185// 2. Default TfliteInferenceParams (The 'reference' setting)
186//
187// Next ID: 4
188message InferenceProfilerMetrics {
189  // Latency metrics from Single-thread CPU inference.
190  optional LatencyMetrics reference_latency = 1;
191  // Latency from TfliteInferenceParams under test.
192  optional LatencyMetrics test_latency = 2;
193  // For reference & test output vectors {R, T}, the error is computed as:
194  // Mean([Abs(R[i] - T[i]) for i in num_elements])
195  // output_errors[v] : statistics for the error value of the vth output vector
196  //   across all Runs.
197  repeated AccuracyMetrics output_errors = 3;
198}
199
200// Proto containing information about all the objects (predicted or
201// ground-truth) contained in an image.
202//
203// Next ID: 4
204message ObjectDetectionResult {
205  // One instance of an object detected in an image.
206  // Next ID: 4
207  message ObjectInstance {
208    // Defines the bounding box for a detected object.
209    // Next ID: 5
210    message NormalizedBoundingBox {
211      // All boundaries defined below are required.
212      // Each boundary value should be normalized with respect to the image
213      // dimensions. This helps evaluate detections independent of image size.
214      // For example, normalized_top = top_boundary / image_height.
215      optional float normalized_top = 1;
216      optional float normalized_bottom = 2;
217      optional float normalized_left = 3;
218      optional float normalized_right = 4;
219    }
220
221    // Required.
222    optional int32 class_id = 1;
223    // Required
224    optional NormalizedBoundingBox bounding_box = 2;
225    // Value in (0, 1.0] denoting confidence in this prediction.
226    // Default value of 1.0 for ground-truth data.
227    optional float score = 3 [default = 1.0];
228  }
229
230  repeated ObjectInstance objects = 1;
231  // Filename of the image.
232  optional string image_name = 2;
233  // Unique id for the image.
234  optional int64 image_id = 3;
235}
236
237// Proto containing ground-truth ObjectsSets for all images in a COCO validation
238// set.
239//
240// Next ID: 2
241message ObjectDetectionGroundTruth {
242  repeated ObjectDetectionResult detection_results = 1;
243}
244
245// Parameters that define how Average Precision is computed for Object Detection
246// task.
247// Refer for details: http://cocodataset.org/#detection-eval
248//
249// Next ID: 4
250message ObjectDetectionAveragePrecisionParams {
251  // Total object classes. The AP value returned for each IoU threshold is an
252  // average over all classes encountered in predicted/ground truth sets.
253  optional int32 num_classes = 1;
254  // A predicted box matches a ground truth box if and only if
255  // IoU between these two are larger than an IoU threshold.
256  // AP is computed for all relevant {IoU threshold, class} combinations and
257  // averaged to get mAP.
258  // If left empty, evaluation is done for all IoU threshods in the range
259  // 0.5:0.05:0.95 (min:increment:max).
260  repeated float iou_thresholds = 2;
261  // AP is computed as the average of maximum precision at (1
262  // + num_recall_points) recall levels. E.g., if num_recall_points is 10,
263  // recall levels are 0., 0.1, 0.2, ..., 0.9, 1.0.
264  // Default: 100
265  optional int32 num_recall_points = 3 [default = 100];
266}
267
268// Average Precision metrics from Object Detection task.
269//
270// Next ID: 3
271message ObjectDetectionAveragePrecisionMetrics {
272  // Average Precision value for a particular IoU threshold.
273  // Next ID: 3
274  message AveragePrecision {
275    optional float iou_threshold = 1;
276    optional float average_precision = 2;
277  }
278
279  // One entry for each in
280  // ObjectDetectionAveragePrecisionParams::iou_thresholds, averaged over all
281  // classes.
282  repeated AveragePrecision individual_average_precisions = 1;
283  // Average of Average Precision across all IoU thresholds.
284  optional float overall_mean_average_precision = 2;
285}
286
287// Parameters that define how the Object Detection task is evaluated
288// end-to-end.
289//
290// Next ID: 4
291message ObjectDetectionParams {
292  // Required.
293  // Model's outputs should be same as a TFLite-compatible SSD model.
294  // Refer:
295  // https://www.tensorflow.org/lite/models/object_detection/overview#output
296  // TODO(b/133772912): Generalize support for other types of object detection
297  // models.
298  optional TfliteInferenceParams inference_params = 1;
299  // Optional. Used to match ground-truth categories with model output.
300  // SSD Mobilenet V1 Model trained on COCO assumes class 0 is background class
301  // in the label file and class labels start from 1 to number_of_classes+1.
302  // Therefore, default value is set as 1.
303  optional int32 class_offset = 2 [default = 1];
304  optional ObjectDetectionAveragePrecisionParams ap_params = 3;
305}
306
307// Metrics from evaluation of the object detection task.
308//
309// Next ID: 5
310message ObjectDetectionMetrics {
311  optional LatencyMetrics pre_processing_latency = 1;
312  optional LatencyMetrics inference_latency = 2;
313  optional TfliteInferenceMetrics inference_metrics = 3;
314  optional ObjectDetectionAveragePrecisionMetrics average_precision_metrics = 4;
315}
316