1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
18 
19 #include <math.h>
20 
21 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
22 
23 namespace tf_tracking {
24 
25 // Arbitrary keypoint type ids for labeling the origin of tracked keypoints.
26 enum KeypointType {
27   KEYPOINT_TYPE_DEFAULT = 0,
28   KEYPOINT_TYPE_FAST = 1,
29   KEYPOINT_TYPE_INTEREST = 2
30 };
31 
32 // Struct that can be used to more richly store the results of a detection
33 // than a single number, while still maintaining comparability.
34 struct MatchScore {
MatchScoreMatchScore35   explicit MatchScore(double val) : value(val) {}
MatchScoreMatchScore36   MatchScore() { value = 0.0; }
37 
38   double value;
39 
40   MatchScore& operator+(const MatchScore& rhs) {
41     value += rhs.value;
42     return *this;
43   }
44 
45   friend std::ostream& operator<<(std::ostream& stream,
46                                   const MatchScore& detection) {
47     stream << detection.value;
48     return stream;
49   }
50 };
51 inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) {
52     return cC1.value < cC2.value;
53 }
54 inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) {
55     return cC1.value > cC2.value;
56 }
57 inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) {
58     return cC1.value >= cC2.value;
59 }
60 inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) {
61     return cC1.value <= cC2.value;
62 }
63 
64 // Fixed seed used for all random number generators.
65 static const int kRandomNumberSeed = 11111;
66 
67 // TODO(andrewharp): Move as many of these settings as possible into a settings
68 // object which can be passed in from Java at runtime.
69 
70 // Whether or not to use ESM instead of LK flow.
71 static const bool kUseEsm = false;
72 
73 // This constant gets added to the diagonal of the Hessian
74 // before solving for translation in 2dof ESM.
75 // It ensures better behavior especially in the absence of
76 // strong texture.
77 static const int kEsmRegularizer = 20;
78 
79 // Do we want to brightness-normalize each keypoint patch when we compute
80 // its flow using ESM?
81 static const bool kDoBrightnessNormalize = true;
82 
83 // Whether or not to use fixed-point interpolated pixel lookups in optical flow.
84 #define USE_FIXED_POINT_FLOW 1
85 
86 // Whether to normalize keypoint windows for intensity in LK optical flow.
87 // This is a define for now because it helps keep the code streamlined.
88 #define NORMALIZE 1
89 
90 // Number of keypoints to store per frame.
91 static const int kMaxKeypoints = 76;
92 
93 // Keypoint detection.
94 static const int kMaxTempKeypoints = 1024;
95 
96 // Number of floats each keypoint takes up when exporting to an array.
97 static const int kKeypointStep = 7;
98 
99 // Number of frame deltas to keep around in the circular queue.
100 static const int kNumFrames = 512;
101 
102 // Number of iterations to do tracking on each keypoint at each pyramid level.
103 static const int kNumIterations = 3;
104 
105 // The number of bins (on a side) to divide each bin from the previous
106 // cache level into.  Higher numbers will decrease performance by increasing
107 // cache misses, but mean that cache hits are more locally relevant.
108 static const int kCacheBranchFactor = 2;
109 
110 // Number of levels to put in the cache.
111 // Each level of the cache is a square grid of bins, length:
112 // branch_factor^(level - 1) on each side.
113 //
114 // This may be greater than kNumPyramidLevels. Setting it to 0 means no
115 // caching is enabled.
116 static const int kNumCacheLevels = 3;
117 
118 // The level at which the cache pyramid gets cut off and replaced by a matrix
119 // transform if such a matrix has been provided to the cache.
120 static const int kCacheCutoff = 1;
121 
122 static const int kNumPyramidLevels = 4;
123 
124 // The minimum number of keypoints needed in an object's area.
125 static const int kMaxKeypointsForObject = 16;
126 
127 // Minimum number of pyramid levels to use after getting cached value.
128 // This allows fine-scale adjustment from the cached value, which is taken
129 // from the center of the corresponding top cache level box.
130 // Can be [0, kNumPyramidLevels).
131 static const int kMinNumPyramidLevelsToUseForAdjustment = 1;
132 
133 // Window size to integrate over to find local image derivative.
134 static const int kFlowIntegrationWindowSize = 3;
135 
136 // Total area of integration windows.
137 static const int kFlowArraySize =
138     (2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1);
139 
140 // Error that's considered good enough to early abort tracking.
141 static const float kTrackingAbortThreshold = 0.03f;
142 
143 // Maximum number of deviations a keypoint-correspondence delta can be from the
144 // weighted average before being thrown out for region-based queries.
145 static const float kNumDeviations = 2.0f;
146 
147 // The length of the allowed delta between the forward and the backward
148 // flow deltas in terms of the length of the forward flow vector.
149 static const float kMaxForwardBackwardErrorAllowed = 0.5f;
150 
151 // Threshold for pixels to be considered different.
152 static const int kFastDiffAmount = 10;
153 
154 // How far from edge of frame to stop looking for FAST keypoints.
155 static const int kFastBorderBuffer = 10;
156 
157 // Determines if non-detected arbitrary keypoints should be added to regions.
158 // This will help if no keypoints have been detected in the region yet.
159 static const bool kAddArbitraryKeypoints = true;
160 
161 // How many arbitrary keypoints to add along each axis as candidates for each
162 // region?
163 static const int kNumToAddAsCandidates = 1;
164 
165 // In terms of region dimensions, how closely can we place keypoints
166 // next to each other?
167 static const float kClosestPercent = 0.6f;
168 
169 // How many FAST qualifying pixels must be connected to a pixel for it to be
170 // considered a candidate keypoint for Harris filtering.
171 static const int kMinNumConnectedForFastKeypoint = 8;
172 
173 // Size of the window to integrate over for Harris filtering.
174 // Compare to kFlowIntegrationWindowSize.
175 static const int kHarrisWindowSize = 2;
176 
177 
178 // DETECTOR PARAMETERS
179 
180 // Before relocalizing, make sure the new proposed position is better than
181 // the existing position by a small amount to prevent thrashing.
182 static const MatchScore kMatchScoreBuffer(0.01f);
183 
184 // Minimum score a tracked object can have and still be considered a match.
185 // TODO(andrewharp): Make this a per detector thing.
186 static const MatchScore kMinimumMatchScore(0.5f);
187 
188 static const float kMinimumCorrelationForTracking = 0.4f;
189 
190 static const MatchScore kMatchScoreForImmediateTermination(0.0f);
191 
192 // Run the detector every N frames.
193 static const int kDetectEveryNFrames = 4;
194 
195 // How many features does each feature_set contain?
196 static const int kFeaturesPerFeatureSet = 10;
197 
198 // The number of FeatureSets managed by the object detector.
199 // More FeatureSets can increase recall at the cost of performance.
200 static const int kNumFeatureSets = 7;
201 
202 // How many FeatureSets must respond affirmatively for a candidate descriptor
203 // and position to be given more thorough attention?
204 static const int kNumFeatureSetsForCandidate = 2;
205 
206 // How large the thumbnails used for correlation validation are.  Used for both
207 // width and height.
208 static const int kNormalizedThumbnailSize = 11;
209 
210 // The area of intersection divided by union for the bounding boxes that tells
211 // if this tracking has slipped enough to invalidate all unlocked examples.
212 static const float kPositionOverlapThreshold = 0.6f;
213 
214 // The number of detection failures allowed before an object goes invisible.
215 // Tracking will still occur, so if it is actually still being tracked and
216 // comes back into a detectable position, it's likely to be found.
217 static const int kMaxNumDetectionFailures = 4;
218 
219 
220 // Minimum square size to scan with sliding window.
221 static const float kScanMinSquareSize = 16.0f;
222 
223 // Minimum square size to scan with sliding window.
224 static const float kScanMaxSquareSize = 64.0f;
225 
226 // Scale difference for consecutive scans of the sliding window.
227 static const float kScanScaleFactor = sqrtf(2.0f);
228 
229 // Step size for sliding window.
230 static const int kScanStepSize = 10;
231 
232 
233 // How tightly to pack the descriptor boxes for confirmed exemplars.
234 static const float kLockedScaleFactor = 1 / sqrtf(2.0f);
235 
236 // How tightly to pack the descriptor boxes for unconfirmed exemplars.
237 static const float kUnlockedScaleFactor = 1 / 2.0f;
238 
239 // How tightly the boxes to scan centered at the last known position will be
240 // packed.
241 static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f);
242 
243 // The bounds on how close a new object example must be to existing object
244 // examples for detection to be valid.
245 static const float kMinCorrelationForNewExample = 0.75f;
246 static const float kMaxCorrelationForNewExample = 0.99f;
247 
248 
249 // The number of safe tries an exemplar has after being created before
250 // missed detections count against it.
251 static const int kFreeTries = 5;
252 
253 // A false positive is worth this many missed detections.
254 static const int kFalsePositivePenalty = 5;
255 
256 struct ObjectDetectorConfig {
257   const Size image_size;
258 
ObjectDetectorConfigObjectDetectorConfig259   explicit ObjectDetectorConfig(const Size& image_size)
260       : image_size(image_size) {}
261   virtual ~ObjectDetectorConfig() = default;
262 };
263 
264 struct KeypointDetectorConfig {
265   const Size image_size;
266 
267   bool detect_skin;
268 
KeypointDetectorConfigKeypointDetectorConfig269   explicit KeypointDetectorConfig(const Size& image_size)
270       : image_size(image_size),
271         detect_skin(false) {}
272 };
273 
274 
275 struct OpticalFlowConfig {
276   const Size image_size;
277 
OpticalFlowConfigOpticalFlowConfig278   explicit OpticalFlowConfig(const Size& image_size)
279       : image_size(image_size) {}
280 };
281 
282 struct TrackerConfig {
283   const Size image_size;
284   KeypointDetectorConfig keypoint_detector_config;
285   OpticalFlowConfig flow_config;
286   bool always_track;
287 
288   float object_box_scale_factor_for_features;
289 
TrackerConfigTrackerConfig290   explicit TrackerConfig(const Size& image_size)
291       : image_size(image_size),
292         keypoint_detector_config(image_size),
293         flow_config(image_size),
294         always_track(false),
295         object_box_scale_factor_for_features(1.0f) {}
296 };
297 
298 }  // namespace tf_tracking
299 
300 #endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
301