1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/operator_property.h"
16 
17 #include "tensorflow/lite/schema/schema_generated.h"
18 #include "tensorflow/lite/schema/schema_utils.h"
19 
20 namespace tflite {
21 namespace optimize {
22 namespace operator_property {
23 
24 namespace {
GetOperatorVariant(const ModelT * model,int subgraph_index,int op_index)25 const OpVariant GetOperatorVariant(const ModelT* model, int subgraph_index,
26                                    int op_index) {
27   OpVariant op_variant;
28   OperatorT* op =
29       model->subgraphs.at(subgraph_index)->operators[op_index].get();
30   op_variant.op_code =
31       GetBuiltinCode(model->operator_codes[op->opcode_index].get());
32   if (op_variant.op_code == BuiltinOperator_LSTM ||
33       op_variant.op_code == BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM) {
34     if (op->inputs.size() == 5) {
35       // The 5 input ("basic") LSTM is not supported in this tooling (yet).
36       op_variant.is_quantizable = false;
37       return op_variant;
38     }
39     const int cell_to_output_weight_index = 11;
40     const int forget_layer_norm_coefficients_index = 21;
41     const int projection_weights_index = 16;
42     op_variant.use_projection = op->inputs[projection_weights_index] != -1;
43     op_variant.use_peephole = op->inputs[cell_to_output_weight_index] != -1;
44     if (op->inputs.size() == 20) {
45       op_variant.use_layer_norm = false;
46     } else {
47       op_variant.use_layer_norm =
48           op->inputs[forget_layer_norm_coefficients_index] != -1;
49     }
50   }
51   return op_variant;
52 }
53 }  // namespace
54 
GetOperatorProperty(const ModelT * model,int subgraph_index,int op_index)55 OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
56                                      int op_index) {
57   OpVariant op_variant = GetOperatorVariant(model, subgraph_index, op_index);
58   return GetOperatorProperty(op_variant);
59 }
60 
61 // Update operation defintions in TensorFlow Lite dialect accordingly when there
62 // are any needs on updating the kernel support level.
63 // LINT.IfChange
GetOperatorProperty(OpVariant op_variant)64 OperatorProperty GetOperatorProperty(OpVariant op_variant) {
65   BuiltinOperator op_code = op_variant.op_code;
66   OperatorProperty property;
67   switch (op_code) {
68     case BuiltinOperator_ABS:
69       property.inputs = {{0, {}}};
70       property.outputs = {{0, {}}};
71       property.version = 2;
72       property.restrict_same_input_output_scale = true;
73       break;
74     case BuiltinOperator_RSQRT:
75       property.inputs = {{0, {}}};
76       property.outputs = {{0, {}}};
77       property.version = 2;
78       break;
79     case BuiltinOperator_ADD:
80       property.inputs = {{0, {}}, {1, {}}};
81       property.outputs = {{0, {}}};
82       property.version = 2;
83       property.quantize_input_as_activations = true;
84       break;
85     case BuiltinOperator_ARG_MAX:
86       property.inputs = {{0, {}}};
87       // ArgMax has no quantizable output.
88       property.version = 2;
89       property.quantizable_int16 = false;
90       break;
91     case BuiltinOperator_AVERAGE_POOL_2D:
92       property.inputs = {{0, {}}};
93       property.outputs = {{0, {}}};
94       property.restrict_same_input_output_scale = true;
95       property.version = 2;
96       break;
97     case BuiltinOperator_BATCH_MATMUL: {
98       property.inputs = {{0, {}}, {1, {}}};
99       property.outputs = {{0, {}}};
100       property.version = 2;
101       property.quantize_input_as_activations = true;
102       break;
103     }
104     case BuiltinOperator_BATCH_TO_SPACE_ND:
105     case BuiltinOperator_SPACE_TO_BATCH_ND:
106     case BuiltinOperator_SPACE_TO_DEPTH:
107       // We skip inputs 1 and 2 since they aren't real valued (they are shapes).
108       property.inputs = {{0, {}}};
109       property.outputs = {{0, {}}};
110       property.restrict_same_input_output_scale = true;
111       property.version = 2;
112       property.quantizable_int16 = false;
113       break;
114     case BuiltinOperator_BROADCAST_TO:
115       property.inputs = {{0, {}}};
116       property.outputs = {{0, {}}};
117       property.restrict_same_input_output_scale = true;
118       property.version = 3;
119       break;
120     case BuiltinOperator_DEPTH_TO_SPACE:
121       property.inputs = {{0, {}}};
122       property.outputs = {{0, {}}};
123       property.restrict_same_input_output_scale = true;
124       property.version = 2;
125       property.quantizable_int16 = false;
126       break;
127     case BuiltinOperator_SPLIT:
128       // We skip input 0 since it is the split dim which is not real valued.
129       property.inputs = {{1, {}}};
130       property.arbitrary_outputs = true;
131       property.restrict_same_input_output_scale = true;
132       property.version = 2;
133       break;
134     case BuiltinOperator_SPLIT_V:
135       property.inputs = {{0, {}}};
136       property.arbitrary_outputs = true;
137       property.restrict_same_input_output_scale = true;
138       property.version = 2;
139       break;
140     case BuiltinOperator_CONCATENATION:
141       property.arbitrary_inputs = true;
142       property.outputs = {{0, {}}};
143       property.restrict_same_input_output_scale = true;
144       property.version = 2;
145       break;
146     case BuiltinOperator_CONV_2D: {
147       TensorProperty tensor_property;
148       tensor_property.per_axis = true;
149       tensor_property.per_axis_index = 0;
150       tensor_property.symmetric = true;
151       property.inputs = {{0, {}}, {1, tensor_property}};
152       property.outputs = {{0, {}}};
153       property.biases = {2};
154       property.version = 3;
155       break;
156     }
157     case BuiltinOperator_TRANSPOSE_CONV: {
158       TensorProperty tensor_property;
159       tensor_property.per_axis = true;
160       tensor_property.per_axis_index = 0;
161       tensor_property.symmetric = true;
162       property.inputs = {{2, {}}, {1, tensor_property}};
163       property.outputs = {{0, {}}};
164       property.biases = {3};
165       property.version = 3;
166       break;
167     }
168     case BuiltinOperator_DEPTHWISE_CONV_2D: {
169       TensorProperty tensor_property;
170       tensor_property.per_axis = true;
171       tensor_property.per_axis_index = 3;
172       tensor_property.symmetric = true;
173       property.inputs = {
174           {0, {}},
175           {1, tensor_property},
176       };
177       property.outputs = {{0, {}}};
178       property.biases = {2};
179       property.version = 3;
180       break;
181     }
182     case BuiltinOperator_EQUAL:
183     case BuiltinOperator_NOT_EQUAL:
184     case BuiltinOperator_GREATER:
185     case BuiltinOperator_GREATER_EQUAL:
186     case BuiltinOperator_LESS:
187     case BuiltinOperator_LESS_EQUAL:
188       property.inputs = {{0, {}}, {1, {}}};
189       // Comparisons have no quantizable outputs.
190       property.version = 2;
191       property.quantizable_int16 = false;
192       break;
193     case BuiltinOperator_EXPAND_DIMS:
194       // We skip input 1 as it is not real valued (it's the index of axis) and
195       // hence does not need to be quantized.
196       property.inputs = {{0, {}}};
197       property.outputs = {{0, {}}};
198       property.version = 1;
199       break;
200     case BuiltinOperator_FILL: {
201       property.inputs = {{1, {}}};
202       property.outputs = {{0, {}}};
203       property.restrict_same_input_output_scale = true;
204       property.version = 3;
205       break;
206     }
207     case BuiltinOperator_FULLY_CONNECTED: {
208       TensorProperty tensor_property;
209       tensor_property.symmetric = true;
210       property.inputs = {{0, {}}, {1, tensor_property}};
211       property.outputs = {{0, {}}};
212       property.biases = {2};
213       property.version = 4;
214       break;
215     }
216     case BuiltinOperator_GATHER:
217       property.inputs = {{0, {}}};
218       property.outputs = {{0, {}}};
219       property.restrict_same_input_output_scale = true;
220       property.quantize_input_as_activations = true;
221       property.version = 2;
222       break;
223     case BuiltinOperator_GATHER_ND:
224       property.inputs = {{0, {}}};
225       property.outputs = {{0, {}}};
226       property.restrict_same_input_output_scale = true;
227       property.version = 3;
228       break;
229     case BuiltinOperator_HARD_SWISH: {
230       property.inputs = {{0, {}}};
231       property.outputs = {{0, {}}};
232       property.version = 1;
233       property.quantizable_int16 = false;
234       break;
235     }
236     case BuiltinOperator_LOG_SOFTMAX: {
237       property.inputs = {{0, {}}};
238       // LogSoftmax requires output with 16/256 as scale and 127 as zero point.
239       TensorProperty tensor_property;
240       tensor_property.restriction = true;
241       tensor_property.restricted_value_int8 = {16.0f / 256.0f, 127};
242       property.outputs = {{0, tensor_property}};
243       property.version = 2;
244       property.quantizable_int16 = false;
245       break;
246     }
247     case BuiltinOperator_LOGISTIC: {
248       property.inputs = {{0, {}}};
249       // Logistic requires output with 1/256 as scale and -128 as zero point.
250       TensorProperty tensor_property;
251       tensor_property.restriction = true;
252       tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
253       tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
254       property.outputs = {{0, tensor_property}};
255       property.version = 2;
256       break;
257     }
258     case BuiltinOperator_LSTM:
259     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
260       if (!op_variant.is_quantizable) {
261         // Early exist for 5 input LSTM.
262         // It is not supported in this tooling yet.
263         property.quantizable = false;
264         break;
265       }
266       // TODO(jianlijianli): extend LSTM op spec to include input, bias etc.
267       // LSTM needs 5 intermediate tensors. This agrees with the fully quantized
268       // kernels in lstm_eval.cc
269       if (op_variant.use_layer_norm && op_variant.use_projection &&
270           op_variant.use_peephole) {
271         static const float alpha = static_cast<float>(std::pow(2, -10));
272         TensorProperty tensor_property_9;
273         tensor_property_9.number_of_bits = 16;
274         tensor_property_9.symmetric = true;
275         TensorProperty tensor_property_12;
276         tensor_property_12.use_derived_scale = true;
277         tensor_property_12.number_of_bits = 32;
278         tensor_property_12.derived_scale = {{20}, {}, {alpha}};
279         TensorProperty tensor_property_13;
280         tensor_property_13.use_derived_scale = true;
281         tensor_property_13.number_of_bits = 32;
282         tensor_property_13.derived_scale = {{21}, {}, {alpha}};
283         TensorProperty tensor_property_14;
284         tensor_property_14.use_derived_scale = true;
285         tensor_property_14.number_of_bits = 32;
286         tensor_property_14.derived_scale = {{22}, {}, {alpha}};
287         TensorProperty tensor_property_15;
288         tensor_property_15.use_derived_scale = true;
289         tensor_property_15.number_of_bits = 32;
290         tensor_property_15.derived_scale = {{23}, {}, {alpha}};
291         TensorProperty tensor_property_17;
292         tensor_property_17.use_derived_scale = true;
293         tensor_property_17.number_of_bits = 32;
294         tensor_property_17.derived_scale = {{16}, {4}, {}};
295         TensorProperty tensor_property_19;
296         tensor_property_19.extend_to_power_of_two = true;
297         tensor_property_19.number_of_bits = 16;
298         tensor_property_19.state_tensor = true;
299         tensor_property_19.symmetric = true;
300         TensorProperty tensor_property_20;
301         tensor_property_20.number_of_bits = 16;
302         tensor_property_20.symmetric = true;
303 
304         property.inputs = {
305             {0, {}},
306             {1, {}},
307             {2, {}},
308             {3, {}},
309             {4, {}},
310             {5, {}},
311             {6, {}},
312             {7, {}},
313             {8, {}},
314             {9, tensor_property_9},
315             {10, tensor_property_9},
316             {11, tensor_property_9},
317             {16, {}},
318             {19, tensor_property_19},
319             {20, tensor_property_20},
320             {21, tensor_property_20},
321             {22, tensor_property_20},
322             {23, tensor_property_20},
323             {12, tensor_property_12},
324             {13, tensor_property_13},
325             {14, tensor_property_14},
326             {15, tensor_property_15},
327             {17, tensor_property_17},
328         };
329         property.outputs = {{0, {}}};
330         property.intermediates = {
331             {0, tensor_property_20},
332             {1, tensor_property_20},
333             {2, tensor_property_20},
334             {3, tensor_property_20},
335             {4, {}},
336         };
337         property.restrict_scale = {{18, 0}};
338         property.version = 2;
339       }
340       if (op_variant.use_layer_norm && op_variant.use_projection &&
341           !op_variant.use_peephole) {
342         static const float alpha = static_cast<float>(std::pow(2, -10));
343 
344         TensorProperty tensor_property_12;
345         tensor_property_12.use_derived_scale = true;
346         tensor_property_12.number_of_bits = 32;
347         tensor_property_12.derived_scale = {{20}, {}, {alpha}};
348         TensorProperty tensor_property_13;
349         tensor_property_13.use_derived_scale = true;
350         tensor_property_13.number_of_bits = 32;
351         tensor_property_13.derived_scale = {{21}, {}, {alpha}};
352         TensorProperty tensor_property_14;
353         tensor_property_14.use_derived_scale = true;
354         tensor_property_14.number_of_bits = 32;
355         tensor_property_14.derived_scale = {{22}, {}, {alpha}};
356         TensorProperty tensor_property_15;
357         tensor_property_15.use_derived_scale = true;
358         tensor_property_15.number_of_bits = 32;
359         tensor_property_15.derived_scale = {{23}, {}, {alpha}};
360         TensorProperty tensor_property_17;
361         tensor_property_17.use_derived_scale = true;
362         tensor_property_17.number_of_bits = 32;
363         tensor_property_17.derived_scale = {{16}, {4}, {}};
364         TensorProperty tensor_property_19;
365         tensor_property_19.extend_to_power_of_two = true;
366         tensor_property_19.number_of_bits = 16;
367         tensor_property_19.state_tensor = true;
368         tensor_property_19.symmetric = true;
369         TensorProperty tensor_property_20;
370         tensor_property_20.number_of_bits = 16;
371         tensor_property_20.symmetric = true;
372 
373         property.inputs = {
374             {0, {}},
375             {1, {}},
376             {2, {}},
377             {3, {}},
378             {4, {}},
379             {5, {}},
380             {6, {}},
381             {7, {}},
382             {8, {}},
383             {16, {}},
384             {19, tensor_property_19},
385             {20, tensor_property_20},
386             {21, tensor_property_20},
387             {22, tensor_property_20},
388             {23, tensor_property_20},
389             {12, tensor_property_12},
390             {13, tensor_property_13},
391             {14, tensor_property_14},
392             {15, tensor_property_15},
393             {17, tensor_property_17},
394         };
395         property.outputs = {{0, {}}};
396         property.intermediates = {
397             {0, tensor_property_20},
398             {1, tensor_property_20},
399             {2, tensor_property_20},
400             {3, tensor_property_20},
401             {4, {}},
402         };
403         property.restrict_scale = {{18, 0}};
404         property.version = 2;
405       }
406       if (op_variant.use_layer_norm && !op_variant.use_projection &&
407           op_variant.use_peephole) {
408         static const float alpha = static_cast<float>(std::pow(2, -10));
409         TensorProperty tensor_property_9;
410         tensor_property_9.number_of_bits = 16;
411         tensor_property_9.symmetric = true;
412         TensorProperty tensor_property_12;
413         tensor_property_12.use_derived_scale = true;
414         tensor_property_12.number_of_bits = 32;
415         tensor_property_12.derived_scale = {{20}, {}, {alpha}};
416         TensorProperty tensor_property_13;
417         tensor_property_13.use_derived_scale = true;
418         tensor_property_13.number_of_bits = 32;
419         tensor_property_13.derived_scale = {{21}, {}, {alpha}};
420         TensorProperty tensor_property_14;
421         tensor_property_14.use_derived_scale = true;
422         tensor_property_14.number_of_bits = 32;
423         tensor_property_14.derived_scale = {{22}, {}, {alpha}};
424         TensorProperty tensor_property_15;
425         tensor_property_15.use_derived_scale = true;
426         tensor_property_15.number_of_bits = 32;
427         tensor_property_15.derived_scale = {{23}, {}, {alpha}};
428         TensorProperty tensor_property_19;
429         tensor_property_19.extend_to_power_of_two = true;
430         tensor_property_19.number_of_bits = 16;
431         tensor_property_19.state_tensor = true;
432         tensor_property_19.symmetric = true;
433         TensorProperty tensor_property_20;
434         tensor_property_20.number_of_bits = 16;
435         tensor_property_20.symmetric = true;
436 
437         property.inputs = {
438             {0, {}},
439             {1, {}},
440             {2, {}},
441             {3, {}},
442             {4, {}},
443             {5, {}},
444             {6, {}},
445             {7, {}},
446             {8, {}},
447             {9, tensor_property_9},
448             {10, tensor_property_9},
449             {11, tensor_property_9},
450             {19, tensor_property_19},
451             {20, tensor_property_20},
452             {21, tensor_property_20},
453             {22, tensor_property_20},
454             {23, tensor_property_20},
455             {12, tensor_property_12},
456             {13, tensor_property_13},
457             {14, tensor_property_14},
458             {15, tensor_property_15},
459         };
460         property.outputs = {{0, {}}};
461         property.intermediates = {
462             {0, tensor_property_20},
463             {1, tensor_property_20},
464             {2, tensor_property_20},
465             {3, tensor_property_20},
466             // Without projection, hidden state (4), output (0) and input
467             // activation state (18) are the same except that the very first
468             // inference of input activation is not captured in hidden and
469             // output.
470             // This is not an issue because this intermediate tensor is not used
471             // in the kernel and its quantization parameters are ignored.
472             {4, {}},
473         };
474         property.restrict_scale = {{18, 0}};
475         property.version = 2;
476       }
477       if (op_variant.use_layer_norm && !op_variant.use_projection &&
478           !op_variant.use_peephole) {
479         static const float alpha = static_cast<float>(std::pow(2, -10));
480         TensorProperty tensor_property_12;
481         tensor_property_12.use_derived_scale = true;
482         tensor_property_12.number_of_bits = 32;
483         tensor_property_12.derived_scale = {{20}, {}, {alpha}};
484         TensorProperty tensor_property_13;
485         tensor_property_13.use_derived_scale = true;
486         tensor_property_13.number_of_bits = 32;
487         tensor_property_13.derived_scale = {{21}, {}, {alpha}};
488         TensorProperty tensor_property_14;
489         tensor_property_14.use_derived_scale = true;
490         tensor_property_14.number_of_bits = 32;
491         tensor_property_14.derived_scale = {{22}, {}, {alpha}};
492         TensorProperty tensor_property_15;
493         tensor_property_15.use_derived_scale = true;
494         tensor_property_15.number_of_bits = 32;
495         tensor_property_15.derived_scale = {{23}, {}, {alpha}};
496         TensorProperty tensor_property_19;
497         tensor_property_19.extend_to_power_of_two = true;
498         tensor_property_19.number_of_bits = 16;
499         tensor_property_19.state_tensor = true;
500         tensor_property_19.symmetric = true;
501         TensorProperty tensor_property_20;
502         tensor_property_20.number_of_bits = 16;
503         tensor_property_20.symmetric = true;
504 
505         property.inputs = {
506             {0, {}},
507             {1, {}},
508             {2, {}},
509             {3, {}},
510             {4, {}},
511             {5, {}},
512             {6, {}},
513             {7, {}},
514             {8, {}},
515             {19, tensor_property_19},
516             {20, tensor_property_20},
517             {21, tensor_property_20},
518             {22, tensor_property_20},
519             {23, tensor_property_20},
520             {12, tensor_property_12},
521             {13, tensor_property_13},
522             {14, tensor_property_14},
523             {15, tensor_property_15},
524         };
525         property.outputs = {{0, {}}};
526         property.intermediates = {
527             {0, tensor_property_20},
528             {1, tensor_property_20},
529             {2, tensor_property_20},
530             {3, tensor_property_20},
531             // Without projection, hidden state (4), output (0) and input
532             // activation state (18) are the same except that the very first
533             // inference of input activation is not captured in hidden and
534             // output.
535             // This is not an issue because this intermediate tensor is not used
536             // in the kernel and its quantization parameters are ignored.
537             {4, {}},
538         };
539         property.restrict_scale = {{18, 0}};
540         property.version = 2;
541       }
542       if (!op_variant.use_layer_norm && op_variant.use_projection &&
543           op_variant.use_peephole) {
544         TensorProperty tensor_property_9;
545         tensor_property_9.number_of_bits = 16;
546         tensor_property_9.symmetric = true;
547         // Without layer norm, we choose to quantize bias with the scale of
548         // input and its corresponding weight. The other choice will
549         // be to use the scale of recurrent and its corresponding weight but we
550         // choose to use the smaller scale, which means higher resolution.
551         TensorProperty tensor_property_12;
552         tensor_property_12.use_derived_scale = true;
553         tensor_property_12.number_of_bits = 32;
554         tensor_property_12.derived_scale = {{0, 1}, {}, {}};
555         TensorProperty tensor_property_13;
556         tensor_property_13.use_derived_scale = true;
557         tensor_property_13.number_of_bits = 32;
558         tensor_property_13.derived_scale = {{0, 2}, {}, {}};
559         TensorProperty tensor_property_14;
560         tensor_property_14.use_derived_scale = true;
561         tensor_property_14.number_of_bits = 32;
562         tensor_property_14.derived_scale = {{0, 3}, {}, {}};
563         TensorProperty tensor_property_15;
564         tensor_property_15.use_derived_scale = true;
565         tensor_property_15.number_of_bits = 32;
566         tensor_property_15.derived_scale = {{0, 4}, {}, {}};
567         TensorProperty tensor_property_17;
568         tensor_property_17.use_derived_scale = true;
569         tensor_property_17.number_of_bits = 32;
570         tensor_property_17.derived_scale = {{16}, {4}, {}};
571         TensorProperty tensor_property_19;
572         tensor_property_19.extend_to_power_of_two = true;
573         tensor_property_19.number_of_bits = 16;
574         tensor_property_19.state_tensor = true;
575         tensor_property_19.symmetric = true;
576 
577         property.inputs = {
578             {0, {}},
579             {1, {}},
580             {2, {}},
581             {3, {}},
582             {4, {}},
583             {5, {}},
584             {6, {}},
585             {7, {}},
586             {8, {}},
587             {9, tensor_property_9},
588             {10, tensor_property_9},
589             {11, tensor_property_9},
590             {16, {}},
591             {19, tensor_property_19},
592             {12, tensor_property_12},
593             {13, tensor_property_13},
594             {14, tensor_property_14},
595             {15, tensor_property_15},
596             {17, tensor_property_17},
597         };
598         property.outputs = {{0, {}}};
599         property.intermediates = {
600             // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
601             // not used and their quantization parameters are ignored.
602             {0, {}},
603             {1, {}},
604             {2, {}},
605             {3, {}},
606             // Hidden state is quantized as usual.
607             {4, {}},
608         };
609         property.restrict_scale = {{18, 0}};
610         property.version = 2;
611       }
612       if (!op_variant.use_layer_norm && op_variant.use_projection &&
613           !op_variant.use_peephole) {
614         // Without layer norm, we choose to quantize bias with the scale of
615         // input and its corresponding weight. The other choice will
616         // be to use the scale of recurrent and its corresponding weight but we
617         // choose to use the smaller scale, which means higher resolution.
618         TensorProperty tensor_property_12;
619         tensor_property_12.use_derived_scale = true;
620         tensor_property_12.number_of_bits = 32;
621         tensor_property_12.derived_scale = {{0, 1}, {}, {}};
622         TensorProperty tensor_property_13;
623         tensor_property_13.use_derived_scale = true;
624         tensor_property_13.number_of_bits = 32;
625         tensor_property_13.derived_scale = {{0, 2}, {}, {}};
626         TensorProperty tensor_property_14;
627         tensor_property_14.use_derived_scale = true;
628         tensor_property_14.number_of_bits = 32;
629         tensor_property_14.derived_scale = {{0, 3}, {}, {}};
630         TensorProperty tensor_property_15;
631         tensor_property_15.use_derived_scale = true;
632         tensor_property_15.number_of_bits = 32;
633         tensor_property_15.derived_scale = {{0, 4}, {}, {}};
634         TensorProperty tensor_property_17;
635         tensor_property_17.use_derived_scale = true;
636         tensor_property_17.number_of_bits = 32;
637         tensor_property_17.derived_scale = {{16}, {4}, {}};
638         TensorProperty tensor_property_19;
639         tensor_property_19.extend_to_power_of_two = true;
640         tensor_property_19.number_of_bits = 16;
641         tensor_property_19.state_tensor = true;
642         tensor_property_19.symmetric = true;
643 
644         property.inputs = {
645             {0, {}},
646             {1, {}},
647             {2, {}},
648             {3, {}},
649             {4, {}},
650             {5, {}},
651             {6, {}},
652             {7, {}},
653             {8, {}},
654             {16, {}},
655             {19, tensor_property_19},
656             {12, tensor_property_12},
657             {13, tensor_property_13},
658             {14, tensor_property_14},
659             {15, tensor_property_15},
660             {17, tensor_property_17},
661         };
662         property.outputs = {{0, {}}};
663         property.intermediates = {
664             // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
665             // not used and their quantization parameters are ignored.
666             {0, {}},
667             {1, {}},
668             {2, {}},
669             {3, {}},
670             // Hidden state is quantized as usual.
671             {4, {}},
672         };
673         property.restrict_scale = {{18, 0}};
674         property.version = 2;
675       }
676       if (!op_variant.use_layer_norm && !op_variant.use_projection &&
677           op_variant.use_peephole) {
678         TensorProperty tensor_property_9;
679         tensor_property_9.number_of_bits = 16;
680         tensor_property_9.symmetric = true;
681         // Without layer norm, we choose to quantize bias with the scale of
682         // input and its corresponding weight. The other choice will
683         // be to use the scale of recurrent and its corresponding weight but we
684         // choose to use the smaller scale, which means higher resolution.
685         TensorProperty tensor_property_12;
686         tensor_property_12.use_derived_scale = true;
687         tensor_property_12.number_of_bits = 32;
688         tensor_property_12.derived_scale = {{0, 1}, {}, {}};
689         TensorProperty tensor_property_13;
690         tensor_property_13.use_derived_scale = true;
691         tensor_property_13.number_of_bits = 32;
692         tensor_property_13.derived_scale = {{0, 2}, {}, {}};
693         TensorProperty tensor_property_14;
694         tensor_property_14.use_derived_scale = true;
695         tensor_property_14.number_of_bits = 32;
696         tensor_property_14.derived_scale = {{0, 3}, {}, {}};
697         TensorProperty tensor_property_15;
698         tensor_property_15.use_derived_scale = true;
699         tensor_property_15.number_of_bits = 32;
700         tensor_property_15.derived_scale = {{0, 4}, {}, {}};
701         TensorProperty tensor_property_19;
702         tensor_property_19.extend_to_power_of_two = true;
703         tensor_property_19.number_of_bits = 16;
704         tensor_property_19.state_tensor = true;
705         tensor_property_19.symmetric = true;
706 
707         property.inputs = {
708             {0, {}},
709             {1, {}},
710             {2, {}},
711             {3, {}},
712             {4, {}},
713             {5, {}},
714             {6, {}},
715             {7, {}},
716             {8, {}},
717             {9, tensor_property_9},
718             {10, tensor_property_9},
719             {11, tensor_property_9},
720             {19, tensor_property_19},
721             {12, tensor_property_12},
722             {13, tensor_property_13},
723             {14, tensor_property_14},
724             {15, tensor_property_15},
725         };
726         property.outputs = {{0, {}}};
727         property.intermediates = {
728             // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
729             // not used and their quantization parameters are ignored.
730             {0, {}},
731             {1, {}},
732             {2, {}},
733             {3, {}},
734             // Without projection, hidden state (4), output (0) and input
735             // activation state (18) are the same except that the very first
736             // inference of input activation is not captured in hidden and
737             // output.
738             // This is not an issue because this intermediate tensor is not used
739             // in the kernel and its quantization parameters are ignored.
740             {4, {}},
741         };
742         property.restrict_scale = {{18, 0}};
743         property.version = 2;
744       }
745       if (!op_variant.use_layer_norm && !op_variant.use_projection &&
746           !op_variant.use_peephole) {
747         // Without layer norm, we choose to quantize bias with the scale of
748         // input and its corresponding weight. The other choice will
749         // be to use the scale of recurrent and its corresponding weight but we
750         // choose to use the smaller scale, which means higher resolution.
751         TensorProperty tensor_property_12;
752         tensor_property_12.use_derived_scale = true;
753         tensor_property_12.number_of_bits = 32;
754         tensor_property_12.derived_scale = {{0, 1}, {}, {}};
755         TensorProperty tensor_property_13;
756         tensor_property_13.use_derived_scale = true;
757         tensor_property_13.number_of_bits = 32;
758         tensor_property_13.derived_scale = {{0, 2}, {}, {}};
759         TensorProperty tensor_property_14;
760         tensor_property_14.use_derived_scale = true;
761         tensor_property_14.number_of_bits = 32;
762         tensor_property_14.derived_scale = {{0, 3}, {}, {}};
763         TensorProperty tensor_property_15;
764         tensor_property_15.use_derived_scale = true;
765         tensor_property_15.number_of_bits = 32;
766         tensor_property_15.derived_scale = {{0, 4}, {}, {}};
767         TensorProperty tensor_property_19;
768         tensor_property_19.extend_to_power_of_two = true;
769         tensor_property_19.number_of_bits = 16;
770         tensor_property_19.state_tensor = true;
771         tensor_property_19.symmetric = true;
772 
773         property.inputs = {
774             {0, {}},
775             {1, {}},
776             {2, {}},
777             {3, {}},
778             {4, {}},
779             {5, {}},
780             {6, {}},
781             {7, {}},
782             {8, {}},
783             {19, tensor_property_19},
784             {12, tensor_property_12},
785             {13, tensor_property_13},
786             {14, tensor_property_14},
787             {15, tensor_property_15},
788         };
789         property.outputs = {{0, {}}};
790         property.intermediates = {
791             // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
792             // not used and their quantization parameters are ignored.
793             {0, {}},
794             {1, {}},
795             {2, {}},
796             {3, {}},
797             // Without projection, hidden state (4), output (0) and input
798             // activation state (18) are the same except that the very first
799             // inference of input activation is not captured in hidden and
800             // output.
801             // This is not an issue because this intermediate tensor is not used
802             // in the kernel and its quantization parameters are ignored.
803             {4, {}},
804         };
805         property.restrict_scale = {{18, 0}};
806         property.version = 2;
807       }
808       property.quantizable_int16 = false;
809       break;
810     }
811     case BuiltinOperator_L2_NORMALIZATION: {
812       property.inputs = {{0, {}}};
813       // L2 Norm requires output with 1/128 as scale and 0 as zero point.
814       TensorProperty tensor_property;
815       tensor_property.restriction = true;
816       tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
817       property.outputs = {{0, tensor_property}};
818       property.version = 2;
819       property.quantizable_int16 = false;
820       break;
821     }
822     case BuiltinOperator_MAX_POOL_2D:
823       property.inputs = {{0, {}}};
824       property.outputs = {{0, {}}};
825       property.restrict_same_input_output_scale = true;
826       property.version = 2;
827       break;
828     case BuiltinOperator_MAXIMUM:
829       property.arbitrary_inputs = true;
830       property.outputs = {{0, {}}};
831       property.restrict_same_input_output_scale = true;
832       property.quantize_input_as_activations = true;
833       property.version = 2;
834       break;
835     case BuiltinOperator_MEAN:
836       property.inputs = {{0, {}}};
837       property.outputs = {{0, {}}};
838       property.version = 2;
839       break;
840     case BuiltinOperator_MINIMUM:
841       property.arbitrary_inputs = true;
842       property.outputs = {{0, {}}};
843       property.restrict_same_input_output_scale = true;
844       property.quantize_input_as_activations = true;
845       property.version = 2;
846       break;
847     case BuiltinOperator_MUL:
848       property.inputs = {{0, {}}, {1, {}}};
849       property.outputs = {{0, {}}};
850       property.quantize_input_as_activations = true;
851       property.version = 2;
852       break;
853     case BuiltinOperator_PACK:
854       property.arbitrary_inputs = true;
855       property.outputs = {{0, {}}};
856       property.restrict_same_input_output_scale = true;
857       property.restrict_same_input_output_scale = true;
858       property.version = 2;
859       break;
860     case BuiltinOperator_PAD:
861     case BuiltinOperator_PADV2:
862       property.inputs = {{0, {}}};
863       property.outputs = {{0, {}}};
864       property.restrict_same_input_output_scale = true;
865       property.version = 2;
866       break;
867     case BuiltinOperator_QUANTIZE:
868       property.inputs = {{0, {}}};
869       property.outputs = {{0, {}}};
870       property.version = 2;
871       break;
872     case BuiltinOperator_PRELU:
873       property.inputs = {{0, {}}, {1, {}}};
874       property.outputs = {{0, {}}};
875       property.restrict_same_input_output_scale = false;
876       property.version = 1;
877       property.quantizable_int16 = false;
878       break;
879     case BuiltinOperator_LEAKY_RELU:
880       property.inputs = {{0, {}}};
881       property.outputs = {{0, {}}};
882       property.version = 2;
883       break;
884     case BuiltinOperator_RELU:
885     case BuiltinOperator_RELU6:
886       property.inputs = {{0, {}}};
887       property.outputs = {{0, {}}};
888       property.version = 2;
889       break;
890     case BuiltinOperator_RELU_N1_TO_1:
891       property.inputs = {{0, {}}};
892       property.outputs = {{0, {}}};
893       property.version = 1;
894       property.quantizable_int16 = false;
895       break;
896     case BuiltinOperator_RESHAPE:
897       property.inputs = {{0, {}}};
898       property.outputs = {{0, {}}};
899       property.restrict_same_input_output_scale = true;
900       property.version = 1;
901       break;
902     case BuiltinOperator_RESIZE_BILINEAR:
903     case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
904       property.inputs = {{0, {}}};
905       property.outputs = {{0, {}}};
906       property.restrict_same_input_output_scale = true;
907       property.version = 2;
908       break;
909     case BuiltinOperator_REVERSE_V2:
910       property.inputs = {{0, {}}};
911       property.outputs = {{0, {}}};
912       property.restrict_same_input_output_scale = true;
913       property.version = 3;
914       break;
915     case BuiltinOperator_SELECT:
916       property.inputs = {{1, {}}, {2, {}}};
917       property.outputs = {{0, {}}};
918       property.restrict_same_input_output_scale = true;
919       property.version = 1;
920       break;
921     case BuiltinOperator_SHAPE:
922       property.inputs = {{0, {}}};
923       // Shape has no quantizable output.
924       property.version = 1;
925       break;
926     case BuiltinOperator_SLICE:
927       // We skip inputs 1 and 2 since they aren't real valued (they are the
928       // index and size).
929       property.inputs = {{0, {}}};
930       property.outputs = {{0, {}}};
931       property.restrict_same_input_output_scale = true;
932       property.version = 2;
933       break;
934     case BuiltinOperator_SQUEEZE:
935       property.inputs = {{0, {}}};
936       property.outputs = {{0, {}}};
937       property.restrict_same_input_output_scale = true;
938       property.version = 1;
939       break;
940     case BuiltinOperator_SOFTMAX: {
941       property.inputs = {{0, {}}};
942       // Softmax requires output with 1/256 as scale and -128 as zero point.
943       TensorProperty tensor_property;
944       tensor_property.restriction = true;
945       tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
946       tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
947       property.outputs = {{0, tensor_property}};
948       property.version = 2;
949       break;
950     }
951     case BuiltinOperator_STRIDED_SLICE:
952       property.inputs = {{0, {}}};
953       property.outputs = {{0, {}}};
954       property.restrict_same_input_output_scale = true;
955       property.version = 2;
956       break;
957     case BuiltinOperator_SQUARED_DIFFERENCE:
958     case BuiltinOperator_SUB:
959       property.inputs = {{0, {}}, {1, {}}};
960       property.outputs = {{0, {}}};
961       property.version = 2;
962       property.quantize_input_as_activations = true;
963       break;
964     case BuiltinOperator_SUM:
965       property.inputs = {{0, {}}};
966       property.outputs = {{0, {}}};
967       property.version = 2;
968       break;
969     case BuiltinOperator_TANH: {
970       property.inputs = {{0, {}}};
971       // Tanh requires output with 1/128 as scale and 0 as zero point.
972       TensorProperty tensor_property;
973       tensor_property.restriction = true;
974       tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
975       tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
976       property.outputs = {{0, tensor_property}};
977       property.version = 2;
978       break;
979     }
980     case BuiltinOperator_SVDF: {
981       TensorProperty tensor_property_time;
982       // Only 10bits are needed because 6bits are reserved for the reduce
983       // operation after element-wise multiplication between state and time
984       // weights.
985       tensor_property_time.number_of_bits = 10;
986       TensorProperty tensor_property_bias;
987       tensor_property_bias.use_derived_scale = true;
988       tensor_property_bias.number_of_bits = 32;
989       tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
990       TensorProperty tensor_property_state;
991       tensor_property_state.number_of_bits = 16;
992       tensor_property_state.state_tensor = true;
993 
994       property.inputs = {{0, {}},
995                          {1, {}},
996                          {2, tensor_property_time},
997                          {4, tensor_property_state},
998                          {3, tensor_property_bias}};
999       property.outputs = {{0, {}}};
1000       property.version = 3;
1001       property.quantizable_int16 = false;
1002       break;
1003     }
1004     case BuiltinOperator_TRANSPOSE:
1005       property.inputs = {{0, {}}};
1006       property.outputs = {{0, {}}};
1007       property.restrict_same_input_output_scale = true;
1008       property.version = 2;
1009       break;
1010     case BuiltinOperator_UNPACK:
1011       property.inputs = {{0, {}}};
1012       property.arbitrary_outputs = true;
1013       property.restrict_same_input_output_scale = true;
1014       property.version = 1;
1015       break;
1016     case BuiltinOperator_MIRROR_PAD:
1017       property.inputs = {{0, {}}};
1018       property.outputs = {{0, {}}};
1019       property.restrict_same_input_output_scale = true;
1020       property.version = 2;
1021       property.quantizable_int16 = false;
1022       break;
1023     case BuiltinOperator_REDUCE_MAX:
1024     case BuiltinOperator_REDUCE_MIN:
1025       property.inputs = {{0, {}}};
1026       property.outputs = {{0, {}}};
1027       property.restrict_same_input_output_scale = true;
1028       property.version = 2;
1029       break;
1030     case BuiltinOperator_WHERE:
1031       property.inputs = {{0, {}}};
1032       property.outputs = {{0, {}}};
1033       property.version = 1;
1034       break;
1035     default:
1036       // No quantized implementation exists for this operation.
1037       property.quantizable = false;
1038       property.quantizable_int16 = false;
1039   }
1040   return property;
1041 }  // NOLINT(readability/fn_size)
1042 // LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_ops.td)
1043 
1044 }  // namespace operator_property
1045 }  // namespace optimize
1046 }  // namespace tflite
1047