1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include <algorithm>
20 #include <cfloat>
21 #include <cmath>
22 #include <vector>
23
24 #include "CpuOperationUtils.h"
25 #include "HalInterfaces.h"
26 #include "OperationResolver.h"
27 #include "OperationsUtils.h"
28 #include "Tracing.h"
29
30 namespace android {
31 namespace nn {
32 namespace heatmap_max_keypoint {
33
34 constexpr char kOperationName[] = "HEATMAP_MAX_KEYPOINT";
35
36 constexpr uint32_t kNumInputs = 3;
37 constexpr uint32_t kHeatmapTensor = 0;
38 constexpr uint32_t kBoxesTensor = 1;
39 constexpr uint32_t kLayoutScalar = 2;
40
41 constexpr uint32_t kNumOutputs = 2;
42 constexpr uint32_t kOutputScoreTensor = 0;
43 constexpr uint32_t kOutputKeypointTensor = 1;
44
45 namespace {
46
47 using namespace hal;
48
49 // This function uses Taylor expansion up to the quatratic term to approximate bicubic
50 // upscaling result.
51 // 2nd order Taylor expansion: D(x) = D - b'x + 1/2 * x'Ax
52 // where D = grid[1][1], Taylor expansion center, the original score,
53 // x = delta, the correction on max keypoint position,
54 // D(x) = deltaScore, the accuracy score after correction
solveForDelta(const float grid[3][3],float * delta,float * deltaScore,float fpAtol=1e-5f,float fpRtol=1e-5f)55 static void solveForDelta(const float grid[3][3], float* delta, float* deltaScore,
56 float fpAtol = 1e-5f, float fpRtol = 1e-5f) {
57 // b: negative 1st order derivative at center
58 // A: Hessian matrix at center (2nd order derivative)
59 float A[2][2], b[2];
60 b[0] = -(grid[1][2] - grid[1][0]) / 2.0f;
61 b[1] = -(grid[2][1] - grid[0][1]) / 2.0f;
62 A[0][0] = grid[1][0] - 2.0f * grid[1][1] + grid[1][2];
63 A[0][1] = (grid[2][2] - grid[2][0] - grid[0][2] + grid[0][0]) / 4.0f;
64 A[1][0] = A[0][1];
65 A[1][1] = grid[0][1] - 2.0f * grid[1][1] + grid[2][1];
66
67 // solve Ax=b, where x=delta -> delta = inv(A) * b
68 float crossProd1 = A[0][0] * A[1][1], crossProd2 = A[0][1] * A[1][0];
69 float detA = crossProd1 - crossProd2;
70 // check if A is invertible
71 if (std::abs(detA) < (fpAtol + fpRtol * crossProd1)) return;
72 delta[0] = (A[1][1] * b[0] - A[0][1] * b[1]) / detA;
73 delta[1] = (A[0][0] * b[1] - A[1][0] * b[0]) / detA;
74
75 // clip out of range delta, i.e. delta > 3/2
76 if (std::abs(delta[0]) > 1.5f || std::abs(delta[1]) > 1.5f) {
77 float scale = 1.5f / std::max(std::abs(delta[0]), std::abs(delta[1]));
78 delta[0] *= scale;
79 delta[1] *= scale;
80 }
81
82 *deltaScore = grid[1][1] - b[0] * delta[0] - b[1] * delta[1] +
83 ((A[0][0] * delta[0] + A[0][1] * delta[1]) * delta[0] +
84 (A[1][0] * delta[0] + A[1][1] * delta[1]) * delta[1]) /
85 2.0f;
86 }
87
heatmapMaxKeypointFloat32Nhwc(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,float * outputScoreData,const Shape & outputScoreShape,float * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)88 inline bool heatmapMaxKeypointFloat32Nhwc(const float* heatmap, const Shape& heatmapShape,
89 const float* boxes, const Shape& boxesShape,
90 float* outputScoreData, const Shape& outputScoreShape,
91 float* outputKeypointData,
92 const Shape& outputKeypointShape, float fpAtol,
93 float fpRtol) {
94 NNTRACE_TRANS("HeatmapMaxKeypoint");
95
96 uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
97 uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 1);
98 uint32_t numKeypoints = getSizeOfDimension(heatmapShape, 3);
99 uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
100
101 const float* heatmapBase = heatmap;
102 const float* boxInfoBase = boxes;
103 float* outputScoreBase = outputScoreData;
104 float* outputKeypointBase = outputKeypointData;
105 for (uint32_t i = 0; i < numBoxes; i++) {
106 NN_RET_CHECK_LE(boxInfoBase[0], boxInfoBase[2]);
107 NN_RET_CHECK_LE(boxInfoBase[1], boxInfoBase[3]);
108 for (uint32_t j = 0; j < numKeypoints; j++) {
109 // find max score and its index
110 uint32_t maxIndex = 0;
111 float maxScore = -FLT_MAX;
112 for (uint32_t k = 0; k < heatmapSize * heatmapSize; k++) {
113 float val = heatmapBase[k * numKeypoints + j];
114 if (maxScore < val) {
115 maxScore = val;
116 maxIndex = k;
117 }
118 }
119
120 uint32_t maxIndexWidth = maxIndex % heatmapSize;
121 uint32_t maxIndexHeight = maxIndex / heatmapSize;
122
123 // get local 3x3 grid
124 float localGrid[3][3];
125 for (int32_t dh = -1; dh <= 1; dh++) {
126 for (int32_t dw = -1; dw <= 1; dw++) {
127 // cast uint32_t to int32_t
128 int32_t h = static_cast<int32_t>(maxIndexHeight) + dh;
129 int32_t w = static_cast<int32_t>(maxIndexWidth) + dw;
130
131 // use mirroring for out of bound indexing
132 // need to ensure heatmapSize >= 2
133 h = h < 0 ? 1 : (h >= heatmapSize ? heatmapSize - 2 : h);
134 w = w < 0 ? 1 : (w >= heatmapSize ? heatmapSize - 2 : w);
135
136 uint32_t heatmapIndex = static_cast<uint32_t>(h) * heatmapSize * numKeypoints +
137 static_cast<uint32_t>(w) * numKeypoints + j;
138 localGrid[dh + 1][dw + 1] = heatmapBase[heatmapIndex];
139 }
140 }
141
142 float delta[2] = {0.0f, 0.0f}, deltaScore = maxScore;
143 solveForDelta(localGrid, delta, &deltaScore, fpAtol, fpRtol);
144
145 float wRoiStart = boxInfoBase[0];
146 float hRoiStart = boxInfoBase[1];
147 float wRoiEnd = boxInfoBase[2];
148 float hRoiEnd = boxInfoBase[3];
149 float roiWidth = wRoiEnd - wRoiStart;
150 float roiHeight = hRoiEnd - hRoiStart;
151 float wRelativePos = (static_cast<float>(maxIndexWidth) + delta[0] + 0.5f) /
152 static_cast<float>(heatmapSize);
153 float hRelativePos = (static_cast<float>(maxIndexHeight) + delta[1] + 0.5f) /
154 static_cast<float>(heatmapSize);
155 *outputScoreBase++ = deltaScore;
156 outputKeypointBase[0] = wRelativePos * roiWidth + wRoiStart;
157 outputKeypointBase[1] = hRelativePos * roiHeight + hRoiStart;
158 outputKeypointBase += 2;
159 }
160 boxInfoBase += boxInfoLength;
161 heatmapBase += heatmapSize * heatmapSize * numKeypoints;
162 }
163
164 return true;
165 }
166
heatmapMaxKeypointFloat32(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,bool layout,float * outputScoreData,const Shape & outputScoreShape,float * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)167 inline bool heatmapMaxKeypointFloat32(const float* heatmap, const Shape& heatmapShape,
168 const float* boxes, const Shape& boxesShape, bool layout,
169 float* outputScoreData, const Shape& outputScoreShape,
170 float* outputKeypointData, const Shape& outputKeypointShape,
171 float fpAtol, float fpRtol) {
172 std::vector<float> heatmap_nhwc;
173 Shape heatmapShape_nhwc;
174 if (layout) {
175 NN_RET_CHECK(convertNchwToNhwc(heatmap, heatmapShape, &heatmap_nhwc, &heatmapShape_nhwc));
176 }
177 const float* heatmap_tmp = layout ? heatmap_nhwc.data() : heatmap;
178 const Shape& heatmapShape_tmp = layout ? heatmapShape_nhwc : heatmapShape;
179 return heatmapMaxKeypointFloat32Nhwc(heatmap_tmp, heatmapShape_tmp, boxes, boxesShape,
180 outputScoreData, outputScoreShape, outputKeypointData,
181 outputKeypointShape, fpAtol, fpRtol);
182 }
183
heatmapMaxKeypointQuant(const uint8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,uint8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)184 inline bool heatmapMaxKeypointQuant(const uint8_t* heatmap, const Shape& heatmapShape,
185 const uint16_t* boxes, const Shape& boxesShape, bool layout,
186 uint8_t* outputScoreData, const Shape& outputScoreShape,
187 uint16_t* outputKeypointData, const Shape& outputKeypointShape,
188 float fpAtol, float fpRtol) {
189 std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
190 convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
191 std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
192 convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
193 std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
194 std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
195 NN_RET_CHECK(heatmapMaxKeypointFloat32(
196 heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
197 outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
198 outputKeypointShape, fpAtol, fpRtol));
199 convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
200 outputScoreData);
201 convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
202 outputKeypointShape.offset, outputKeypointData);
203 return true;
204 }
205
heatmapMaxKeypointQuant(const int8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,int8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)206 inline bool heatmapMaxKeypointQuant(const int8_t* heatmap, const Shape& heatmapShape,
207 const uint16_t* boxes, const Shape& boxesShape, bool layout,
208 int8_t* outputScoreData, const Shape& outputScoreShape,
209 uint16_t* outputKeypointData, const Shape& outputKeypointShape,
210 float fpAtol, float fpRtol) {
211 std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
212 convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
213 std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
214 convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
215 std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
216 std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
217 NN_RET_CHECK(heatmapMaxKeypointFloat32(
218 heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
219 outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
220 outputKeypointShape, fpAtol, fpRtol));
221 convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
222 outputScoreData);
223 convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
224 outputKeypointShape.offset, outputKeypointData);
225 return true;
226 }
227
228 } // namespace
229
validate(const IOperationValidationContext * context)230 bool validate(const IOperationValidationContext* context) {
231 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
232 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
233 std::vector<OperandType> inExpectedTypes;
234 std::vector<OperandType> outExpectedTypes;
235 auto inputType = context->getInputType(kHeatmapTensor);
236 auto minSupportedHalVersion = HalVersion::V1_2;
237 if (inputType == OperandType::TENSOR_FLOAT32 || inputType == OperandType::TENSOR_FLOAT16) {
238 inExpectedTypes = {inputType, inputType, OperandType::BOOL};
239 outExpectedTypes = {inputType, inputType};
240 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
241 inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM,
242 OperandType::BOOL};
243 outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM};
244 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
245 inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
246 OperandType::TENSOR_QUANT16_ASYMM, OperandType::BOOL};
247 outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
248 OperandType::TENSOR_QUANT16_ASYMM};
249 minSupportedHalVersion = HalVersion::V1_3;
250 } else {
251 LOG(ERROR) << "Unsupported input tensor type for operation " << kOperationName;
252 return false;
253 }
254 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
255 NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
256 return validateHalVersion(context, minSupportedHalVersion);
257 }
258
prepare(IOperationExecutionContext * context)259 bool prepare(IOperationExecutionContext* context) {
260 bool layout = context->getInputValue<bool>(kLayoutScalar);
261 Shape heatmapShape = context->getInputShape(kHeatmapTensor);
262 Shape boxesShape = context->getInputShape(kBoxesTensor);
263 NN_RET_CHECK_EQ(getNumberOfDimensions(heatmapShape), 4);
264 NN_RET_CHECK_EQ(getNumberOfDimensions(boxesShape), 2);
265
266 uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
267 uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 2);
268 uint32_t numKeypoints = getSizeOfDimension(heatmapShape, layout ? 1 : 3);
269 uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
270 NN_RET_CHECK_EQ(getSizeOfDimension(heatmapShape, layout ? 3 : 1), heatmapSize);
271 NN_RET_CHECK_GE(heatmapSize, 2);
272 NN_RET_CHECK_EQ(getSizeOfDimension(boxesShape, 0), numBoxes);
273 NN_RET_CHECK_EQ(boxInfoLength, 4);
274
275 if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
276 heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
277 NN_RET_CHECK_EQ(boxesShape.scale, 0.125f);
278 NN_RET_CHECK_EQ(boxesShape.offset, 0);
279 }
280
281 Shape outputScore = context->getOutputShape(kOutputScoreTensor);
282 outputScore.type = heatmapShape.type;
283 outputScore.dimensions = {numBoxes, numKeypoints};
284 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScore));
285
286 Shape outputKeypoint = context->getOutputShape(kOutputKeypointTensor);
287 outputKeypoint.type = boxesShape.type;
288 outputKeypoint.dimensions = {numBoxes, numKeypoints, 2};
289 outputKeypoint.offset = 0;
290 outputKeypoint.scale = 0.f;
291 if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
292 heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
293 outputKeypoint.scale = 0.125f;
294 }
295 NN_RET_CHECK(context->setOutputShape(kOutputKeypointTensor, outputKeypoint));
296 return true;
297 }
298
execute(IOperationExecutionContext * context)299 bool execute(IOperationExecutionContext* context) {
300 bool layout = context->getInputValue<bool>(kLayoutScalar);
301 switch (context->getInputType(kHeatmapTensor)) {
302 case OperandType::TENSOR_FLOAT16: {
303 const auto heatmap = context->getInputBuffer<_Float16>(kHeatmapTensor);
304 const auto heatmapShape = context->getInputShape(kHeatmapTensor);
305 const auto boxes = context->getInputBuffer<_Float16>(kBoxesTensor);
306 const auto boxesShape = context->getInputShape(kBoxesTensor);
307 auto outputScoreData = context->getOutputBuffer<_Float16>(kOutputScoreTensor);
308 const auto outputScoreShape = context->getOutputShape(kOutputScoreTensor);
309 auto outputKeypointData = context->getOutputBuffer<_Float16>(kOutputKeypointTensor);
310 const auto outputKeypointShape = context->getOutputShape(kOutputKeypointTensor);
311 std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
312 convertFloat16ToFloat32(heatmap, &heatmap_float32);
313 std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
314 convertFloat16ToFloat32(boxes, &boxes_float32);
315 std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
316 std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
317 NN_RET_CHECK(heatmapMaxKeypointFloat32(
318 heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
319 outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
320 outputKeypointShape, 1e-3f, 1e-3f));
321 convertFloat32ToFloat16(outputScore_float32, outputScoreData);
322 convertFloat32ToFloat16(outputKeypoint_float32, outputKeypointData);
323 return true;
324 }
325 case OperandType::TENSOR_FLOAT32: {
326 return heatmapMaxKeypointFloat32(context->getInputBuffer<float>(kHeatmapTensor),
327 context->getInputShape(kHeatmapTensor),
328 context->getInputBuffer<float>(kBoxesTensor),
329 context->getInputShape(kBoxesTensor), layout,
330 context->getOutputBuffer<float>(kOutputScoreTensor),
331 context->getOutputShape(kOutputScoreTensor),
332 context->getOutputBuffer<float>(kOutputKeypointTensor),
333 context->getOutputShape(kOutputKeypointTensor), 1e-5f,
334 1e-5f);
335 }
336 case OperandType::TENSOR_QUANT8_ASYMM: {
337 return heatmapMaxKeypointQuant(
338 context->getInputBuffer<uint8_t>(kHeatmapTensor),
339 context->getInputShape(kHeatmapTensor),
340 context->getInputBuffer<uint16_t>(kBoxesTensor),
341 context->getInputShape(kBoxesTensor), layout,
342 context->getOutputBuffer<uint8_t>(kOutputScoreTensor),
343 context->getOutputShape(kOutputScoreTensor),
344 context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
345 context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
346 }
347 case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
348 return heatmapMaxKeypointQuant(
349 context->getInputBuffer<int8_t>(kHeatmapTensor),
350 context->getInputShape(kHeatmapTensor),
351 context->getInputBuffer<uint16_t>(kBoxesTensor),
352 context->getInputShape(kBoxesTensor), layout,
353 context->getOutputBuffer<int8_t>(kOutputScoreTensor),
354 context->getOutputShape(kOutputScoreTensor),
355 context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
356 context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
357 }
358 default:
359 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
360 }
361 }
362
363 } // namespace heatmap_max_keypoint
364
365 NN_REGISTER_OPERATION(HEATMAP_MAX_KEYPOINT, heatmap_max_keypoint::kOperationName,
366 heatmap_max_keypoint::validate, heatmap_max_keypoint::prepare,
367 heatmap_max_keypoint::execute);
368
369 } // namespace nn
370 } // namespace android
371