1// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#import "CameraExampleViewController.h"
16#import <AssertMacros.h>
17#import <AssetsLibrary/AssetsLibrary.h>
18#import <CoreImage/CoreImage.h>
19#import <ImageIO/ImageIO.h>
20
21#include <sys/time.h>
22#include <fstream>
23#include <iostream>
24#include <queue>
25
26#include "tensorflow/lite/kernels/register.h"
27#include "tensorflow/lite/model.h"
28#include "tensorflow/lite/op_resolver.h"
29#include "tensorflow/lite/string_util.h"
30#if TFLITE_USE_GPU_DELEGATE
31#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
32#endif
33
34#define LOG(x) std::cerr
35
36namespace {
37
38// If you have your own model, modify this to the file name, and make sure
39// you've added the file to your app resources too.
40#if TFLITE_USE_GPU_DELEGATE
41// GPU Delegate only supports float model now.
42NSString* model_file_name = @"mobilenet_v1_1.0_224";
43#else
44NSString* model_file_name = @"mobilenet_quant_v1_224";
45#endif
46NSString* model_file_type = @"tflite";
47// If you have your own model, point this to the labels file.
48NSString* labels_file_name = @"labels";
49NSString* labels_file_type = @"txt";
50
51// These dimensions need to match those the model was trained with.
52const int wanted_input_width = 224;
53const int wanted_input_height = 224;
54const int wanted_input_channels = 3;
55const float input_mean = 127.5f;
56const float input_std = 127.5f;
57const std::string input_layer_name = "input";
58const std::string output_layer_name = "softmax1";
59
60NSString* FilePathForResourceName(NSString* name, NSString* extension) {
61  NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
62  if (file_path == NULL) {
63    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String]
64               << "' in bundle.";
65  }
66  return file_path;
67}
68
69void LoadLabels(NSString* file_name, NSString* file_type, std::vector<std::string>* label_strings) {
70  NSString* labels_path = FilePathForResourceName(file_name, file_type);
71  if (!labels_path) {
72    LOG(ERROR) << "Failed to find model proto at" << [file_name UTF8String]
73               << [file_type UTF8String];
74  }
75  std::ifstream t;
76  t.open([labels_path UTF8String]);
77  std::string line;
78  while (t) {
79    std::getline(t, line);
80    label_strings->push_back(line);
81  }
82  t.close();
83}
84
85// Returns the top N confidence values over threshold in the provided vector,
86// sorted by confidence in descending order.
87void GetTopN(
88    const float* prediction, const int prediction_size, const int num_results,
89    const float threshold, std::vector<std::pair<float, int> >* top_results) {
90  // Will contain top N results in ascending order.
91  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int> >,
92                      std::greater<std::pair<float, int> > >
93      top_result_pq;
94
95  const long count = prediction_size;
96  for (int i = 0; i < count; ++i) {
97    const float value = prediction[i];
98    // Only add it if it beats the threshold and has a chance at being in
99    // the top N.
100    if (value < threshold) {
101      continue;
102    }
103
104    top_result_pq.push(std::pair<float, int>(value, i));
105
106    // If at capacity, kick the smallest value out.
107    if (top_result_pq.size() > num_results) {
108      top_result_pq.pop();
109    }
110  }
111
112  // Copy to output vector and reverse into descending order.
113  while (!top_result_pq.empty()) {
114    top_results->push_back(top_result_pq.top());
115    top_result_pq.pop();
116  }
117  std::reverse(top_results->begin(), top_results->end());
118}
119
120// Preprocess the input image and feed the TFLite interpreter buffer for a float model.
121void ProcessInputWithFloatModel(
122    uint8_t* input, float* buffer, int image_width, int image_height, int image_channels) {
123  for (int y = 0; y < wanted_input_height; ++y) {
124    float* out_row = buffer + (y * wanted_input_width * wanted_input_channels);
125    for (int x = 0; x < wanted_input_width; ++x) {
126      const int in_x = (y * image_width) / wanted_input_width;
127      const int in_y = (x * image_height) / wanted_input_height;
128      uint8_t* input_pixel =
129          input + (in_y * image_width * image_channels) + (in_x * image_channels);
130      float* out_pixel = out_row + (x * wanted_input_channels);
131      for (int c = 0; c < wanted_input_channels; ++c) {
132        out_pixel[c] = (input_pixel[c] - input_mean) / input_std;
133      }
134    }
135  }
136}
137
138// Preprocess the input image and feed the TFLite interpreter buffer for a quantized model.
139void ProcessInputWithQuantizedModel(
140    uint8_t* input, uint8_t* output, int image_width, int image_height, int image_channels) {
141  for (int y = 0; y < wanted_input_height; ++y) {
142    uint8_t* out_row = output + (y * wanted_input_width * wanted_input_channels);
143    for (int x = 0; x < wanted_input_width; ++x) {
144      const int in_x = (y * image_width) / wanted_input_width;
145      const int in_y = (x * image_height) / wanted_input_height;
146      uint8_t* in_pixel = input + (in_y * image_width * image_channels) + (in_x * image_channels);
147      uint8_t* out_pixel = out_row + (x * wanted_input_channels);
148      for (int c = 0; c < wanted_input_channels; ++c) {
149        out_pixel[c] = in_pixel[c];
150      }
151    }
152  }
153}
154
155}  // namespace
156
157@interface CameraExampleViewController (InternalMethods)
158- (void)setupAVCapture;
159- (void)teardownAVCapture;
160@end
161
162@implementation CameraExampleViewController {
163  std::unique_ptr<tflite::FlatBufferModel> model;
164  tflite::ops::builtin::BuiltinOpResolver resolver;
165  std::unique_ptr<tflite::Interpreter> interpreter;
166  TfLiteDelegate* delegate;
167}
168
169- (void)setupAVCapture {
170  NSError* error = nil;
171
172  session = [AVCaptureSession new];
173  if ([[UIDevice currentDevice] userInterfaceIdiom] == UIUserInterfaceIdiomPhone)
174    [session setSessionPreset:AVCaptureSessionPreset640x480];
175  else
176    [session setSessionPreset:AVCaptureSessionPresetPhoto];
177
178  AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
179  AVCaptureDeviceInput* deviceInput =
180      [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
181
182  if (error != nil) {
183    NSLog(@"Failed to initialize AVCaptureDeviceInput. Note: This app doesn't work with simulator");
184    assert(NO);
185  }
186
187  if ([session canAddInput:deviceInput]) [session addInput:deviceInput];
188
189  videoDataOutput = [AVCaptureVideoDataOutput new];
190
191  NSDictionary* rgbOutputSettings =
192      [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:kCMPixelFormat_32BGRA]
193                                  forKey:(id)kCVPixelBufferPixelFormatTypeKey];
194  [videoDataOutput setVideoSettings:rgbOutputSettings];
195  [videoDataOutput setAlwaysDiscardsLateVideoFrames:YES];
196  videoDataOutputQueue = dispatch_queue_create("VideoDataOutputQueue", DISPATCH_QUEUE_SERIAL);
197  [videoDataOutput setSampleBufferDelegate:self queue:videoDataOutputQueue];
198
199  if ([session canAddOutput:videoDataOutput]) [session addOutput:videoDataOutput];
200  [[videoDataOutput connectionWithMediaType:AVMediaTypeVideo] setEnabled:YES];
201
202  previewLayer = [[AVCaptureVideoPreviewLayer alloc] initWithSession:session];
203  [previewLayer setBackgroundColor:[[UIColor blackColor] CGColor]];
204  [previewLayer setVideoGravity:AVLayerVideoGravityResizeAspect];
205  CALayer* rootLayer = [previewView layer];
206  [rootLayer setMasksToBounds:YES];
207  [previewLayer setFrame:[rootLayer bounds]];
208  [rootLayer addSublayer:previewLayer];
209  [session startRunning];
210
211  if (error) {
212    NSString* title = [NSString stringWithFormat:@"Failed with error %d", (int)[error code]];
213    UIAlertController* alertController =
214        [UIAlertController alertControllerWithTitle:title
215                                            message:[error localizedDescription]
216                                     preferredStyle:UIAlertControllerStyleAlert];
217    UIAlertAction* dismiss =
218        [UIAlertAction actionWithTitle:@"Dismiss" style:UIAlertActionStyleDefault handler:nil];
219    [alertController addAction:dismiss];
220    [self presentViewController:alertController animated:YES completion:nil];
221    [self teardownAVCapture];
222  }
223}
224
225- (void)teardownAVCapture {
226  [previewLayer removeFromSuperlayer];
227}
228
229- (AVCaptureVideoOrientation)avOrientationForDeviceOrientation:
230    (UIDeviceOrientation)deviceOrientation {
231  AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)(deviceOrientation);
232  if (deviceOrientation == UIDeviceOrientationLandscapeLeft)
233    result = AVCaptureVideoOrientationLandscapeRight;
234  else if (deviceOrientation == UIDeviceOrientationLandscapeRight)
235    result = AVCaptureVideoOrientationLandscapeLeft;
236  return result;
237}
238
239- (IBAction)takePicture:(id)sender {
240  if ([session isRunning]) {
241    [session stopRunning];
242    [sender setTitle:@"Continue" forState:UIControlStateNormal];
243
244    flashView = [[UIView alloc] initWithFrame:[previewView frame]];
245    [flashView setBackgroundColor:[UIColor whiteColor]];
246    [flashView setAlpha:0.f];
247    [[[self view] window] addSubview:flashView];
248
249    [UIView animateWithDuration:.2f
250        animations:^{
251          [flashView setAlpha:1.f];
252        }
253        completion:^(BOOL finished) {
254          [UIView animateWithDuration:.2f
255              animations:^{
256                [flashView setAlpha:0.f];
257              }
258              completion:^(BOOL finished) {
259                [flashView removeFromSuperview];
260                flashView = nil;
261              }];
262        }];
263
264  } else {
265    [session startRunning];
266    [sender setTitle:@"Freeze Frame" forState:UIControlStateNormal];
267  }
268}
269
270- (void)captureOutput:(AVCaptureOutput*)captureOutput
271    didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
272           fromConnection:(AVCaptureConnection*)connection {
273  CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
274  CFRetain(pixelBuffer);
275  [self runModelOnFrame:pixelBuffer];
276  CFRelease(pixelBuffer);
277}
278
279- (void)runModelOnFrame:(CVPixelBufferRef)pixelBuffer {
280  assert(pixelBuffer != NULL);
281
282  OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
283  assert(sourcePixelFormat == kCVPixelFormatType_32ARGB ||
284         sourcePixelFormat == kCVPixelFormatType_32BGRA);
285
286  const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer);
287  const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer);
288  const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer);
289
290  CVPixelBufferLockFlags unlockFlags = kNilOptions;
291  CVPixelBufferLockBaseAddress(pixelBuffer, unlockFlags);
292
293  unsigned char* sourceBaseAddr = (unsigned char*)(CVPixelBufferGetBaseAddress(pixelBuffer));
294  int image_height;
295  unsigned char* sourceStartAddr;
296  if (fullHeight <= image_width) {
297    image_height = fullHeight;
298    sourceStartAddr = sourceBaseAddr;
299  } else {
300    image_height = image_width;
301    const int marginY = ((fullHeight - image_width) / 2);
302    sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes));
303  }
304  const int image_channels = 4;
305  assert(image_channels >= wanted_input_channels);
306  uint8_t* in = sourceStartAddr;
307
308  int input = interpreter->inputs()[0];
309  TfLiteTensor *input_tensor = interpreter->tensor(input);
310
311  bool is_quantized;
312  switch (input_tensor->type) {
313  case kTfLiteFloat32:
314    is_quantized = false;
315    break;
316  case kTfLiteUInt8:
317    is_quantized = true;
318    break;
319  default:
320    NSLog(@"Input data type is not supported by this demo app.");
321    return;
322  }
323
324  if (is_quantized) {
325    uint8_t* out = interpreter->typed_tensor<uint8_t>(input);
326    ProcessInputWithQuantizedModel(in, out, image_width, image_height, image_channels);
327  } else {
328    float* out = interpreter->typed_tensor<float>(input);
329    ProcessInputWithFloatModel(in, out, image_width, image_height, image_channels);
330  }
331
332  double start = [[NSDate new] timeIntervalSince1970];
333  if (interpreter->Invoke() != kTfLiteOk) {
334    LOG(FATAL) << "Failed to invoke!";
335  }
336  double end = [[NSDate new] timeIntervalSince1970];
337  total_latency += (end - start);
338  total_count += 1;
339  NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", end - start, total_latency / total_count,
340        total_count);
341
342  // read output size from the output sensor
343  const int output_tensor_index = interpreter->outputs()[0];
344  TfLiteTensor* output_tensor = interpreter->tensor(output_tensor_index);
345  TfLiteIntArray* output_dims = output_tensor->dims;
346  if (output_dims->size != 2 || output_dims->data[0] != 1) {
347    LOG(FATAL) << "Output of the model is in invalid format.";
348  }
349  const int output_size = output_dims->data[1];
350
351  const int kNumResults = 5;
352  const float kThreshold = 0.1f;
353
354  std::vector<std::pair<float, int> > top_results;
355
356  if (is_quantized) {
357    uint8_t* quantized_output = interpreter->typed_output_tensor<uint8_t>(0);
358    int32_t zero_point = input_tensor->params.zero_point;
359    float scale = input_tensor->params.scale;
360    float output[output_size];
361    for (int i = 0; i < output_size; ++i) {
362      output[i] = (quantized_output[i] - zero_point) * scale;
363    }
364    GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
365  } else {
366    float* output = interpreter->typed_output_tensor<float>(0);
367    GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
368  }
369
370  NSMutableDictionary* newValues = [NSMutableDictionary dictionary];
371  for (const auto& result : top_results) {
372    const float confidence = result.first;
373    const int index = result.second;
374    NSString* labelObject = [NSString stringWithUTF8String:labels[index].c_str()];
375    NSNumber* valueObject = [NSNumber numberWithFloat:confidence];
376    [newValues setObject:valueObject forKey:labelObject];
377  }
378  dispatch_async(dispatch_get_main_queue(), ^(void) {
379    [self setPredictionValues:newValues];
380  });
381
382  CVPixelBufferUnlockBaseAddress(pixelBuffer, unlockFlags);
383  CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
384}
385
386- (void)dealloc {
387#if TFLITE_USE_GPU_DELEGATE
388  if (delegate) {
389    DeleteGpuDelegate(delegate);
390  }
391#endif
392  [self teardownAVCapture];
393}
394
395- (void)didReceiveMemoryWarning {
396  [super didReceiveMemoryWarning];
397}
398
399- (void)viewDidLoad {
400  [super viewDidLoad];
401  labelLayers = [[NSMutableArray alloc] init];
402  oldPredictionValues = [[NSMutableDictionary alloc] init];
403
404  NSString* graph_path = FilePathForResourceName(model_file_name, model_file_type);
405  model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]);
406  if (!model) {
407    LOG(FATAL) << "Failed to mmap model " << graph_path;
408  }
409  LOG(INFO) << "Loaded model " << graph_path;
410  model->error_reporter();
411  LOG(INFO) << "resolved reporter";
412
413  tflite::ops::builtin::BuiltinOpResolver resolver;
414  LoadLabels(labels_file_name, labels_file_type, &labels);
415
416  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
417
418#if TFLITE_USE_GPU_DELEGATE
419  GpuDelegateOptions options;
420  options.allow_precision_loss = true;
421  options.wait_type = GpuDelegateOptions::WaitType::kActive;
422  delegate = NewGpuDelegate(&options);
423  interpreter->ModifyGraphWithDelegate(delegate);
424#endif
425
426  // Explicitly resize the input tensor.
427  {
428    int input = interpreter->inputs()[0];
429    std::vector<int> sizes = {1, 224, 224, 3};
430    interpreter->ResizeInputTensor(input, sizes);
431  }
432  if (!interpreter) {
433    LOG(FATAL) << "Failed to construct interpreter";
434  }
435  if (interpreter->AllocateTensors() != kTfLiteOk) {
436    LOG(FATAL) << "Failed to allocate tensors!";
437  }
438
439  [self setupAVCapture];
440}
441
442- (void)viewDidUnload {
443  [super viewDidUnload];
444}
445
446- (void)viewWillAppear:(BOOL)animated {
447  [super viewWillAppear:animated];
448}
449
450- (void)viewDidAppear:(BOOL)animated {
451  [super viewDidAppear:animated];
452}
453
454- (void)viewWillDisappear:(BOOL)animated {
455  [super viewWillDisappear:animated];
456}
457
458- (void)viewDidDisappear:(BOOL)animated {
459  [super viewDidDisappear:animated];
460}
461
462- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation {
463  return (interfaceOrientation == UIInterfaceOrientationPortrait);
464}
465
466- (BOOL)prefersStatusBarHidden {
467  return YES;
468}
469
470- (void)setPredictionValues:(NSDictionary*)newValues {
471  const float decayValue = 0.75f;
472  const float updateValue = 0.25f;
473  const float minimumThreshold = 0.01f;
474
475  NSMutableDictionary* decayedPredictionValues = [[NSMutableDictionary alloc] init];
476  for (NSString* label in oldPredictionValues) {
477    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
478    const float oldPredictionValue = [oldPredictionValueObject floatValue];
479    const float decayedPredictionValue = (oldPredictionValue * decayValue);
480    if (decayedPredictionValue > minimumThreshold) {
481      NSNumber* decayedPredictionValueObject = [NSNumber numberWithFloat:decayedPredictionValue];
482      [decayedPredictionValues setObject:decayedPredictionValueObject forKey:label];
483    }
484  }
485  oldPredictionValues = decayedPredictionValues;
486
487  for (NSString* label in newValues) {
488    NSNumber* newPredictionValueObject = [newValues objectForKey:label];
489    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
490    if (!oldPredictionValueObject) {
491      oldPredictionValueObject = [NSNumber numberWithFloat:0.0f];
492    }
493    const float newPredictionValue = [newPredictionValueObject floatValue];
494    const float oldPredictionValue = [oldPredictionValueObject floatValue];
495    const float updatedPredictionValue = (oldPredictionValue + (newPredictionValue * updateValue));
496    NSNumber* updatedPredictionValueObject = [NSNumber numberWithFloat:updatedPredictionValue];
497    [oldPredictionValues setObject:updatedPredictionValueObject forKey:label];
498  }
499  NSArray* candidateLabels = [NSMutableArray array];
500  for (NSString* label in oldPredictionValues) {
501    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
502    const float oldPredictionValue = [oldPredictionValueObject floatValue];
503    if (oldPredictionValue > 0.05f) {
504      NSDictionary* entry = @{@"label" : label, @"value" : oldPredictionValueObject};
505      candidateLabels = [candidateLabels arrayByAddingObject:entry];
506    }
507  }
508  NSSortDescriptor* sort = [NSSortDescriptor sortDescriptorWithKey:@"value" ascending:NO];
509  NSArray* sortedLabels =
510      [candidateLabels sortedArrayUsingDescriptors:[NSArray arrayWithObject:sort]];
511
512  const float leftMargin = 10.0f;
513  const float topMargin = 10.0f;
514
515  const float valueWidth = 48.0f;
516  const float valueHeight = 18.0f;
517
518  const float labelWidth = 246.0f;
519  const float labelHeight = 18.0f;
520
521  const float labelMarginX = 5.0f;
522  const float labelMarginY = 5.0f;
523
524  [self removeAllLabelLayers];
525
526  int labelCount = 0;
527  for (NSDictionary* entry in sortedLabels) {
528    NSString* label = [entry objectForKey:@"label"];
529    NSNumber* valueObject = [entry objectForKey:@"value"];
530    const float value = [valueObject floatValue];
531    const float originY = topMargin + ((labelHeight + labelMarginY) * labelCount);
532    const int valuePercentage = (int)roundf(value * 100.0f);
533
534    const float valueOriginX = leftMargin;
535    NSString* valueText = [NSString stringWithFormat:@"%d%%", valuePercentage];
536
537    [self addLabelLayerWithText:valueText
538                        originX:valueOriginX
539                        originY:originY
540                          width:valueWidth
541                         height:valueHeight
542                      alignment:kCAAlignmentRight];
543
544    const float labelOriginX = (leftMargin + valueWidth + labelMarginX);
545
546    [self addLabelLayerWithText:[label capitalizedString]
547                        originX:labelOriginX
548                        originY:originY
549                          width:labelWidth
550                         height:labelHeight
551                      alignment:kCAAlignmentLeft];
552
553    labelCount += 1;
554    if (labelCount > 4) {
555      break;
556    }
557  }
558}
559
560- (void)removeAllLabelLayers {
561  for (CATextLayer* layer in labelLayers) {
562    [layer removeFromSuperlayer];
563  }
564  [labelLayers removeAllObjects];
565}
566
567- (void)addLabelLayerWithText:(NSString*)text
568                      originX:(float)originX
569                      originY:(float)originY
570                        width:(float)width
571                       height:(float)height
572                    alignment:(NSString*)alignment {
573  CFTypeRef font = (CFTypeRef) @"Menlo-Regular";
574  const float fontSize = 12.0;
575  const float marginSizeX = 5.0f;
576  const float marginSizeY = 2.0f;
577
578  const CGRect backgroundBounds = CGRectMake(originX, originY, width, height);
579  const CGRect textBounds = CGRectMake((originX + marginSizeX), (originY + marginSizeY),
580                                       (width - (marginSizeX * 2)), (height - (marginSizeY * 2)));
581
582  CATextLayer* background = [CATextLayer layer];
583  [background setBackgroundColor:[UIColor blackColor].CGColor];
584  [background setOpacity:0.5f];
585  [background setFrame:backgroundBounds];
586  background.cornerRadius = 5.0f;
587
588  [[self.view layer] addSublayer:background];
589  [labelLayers addObject:background];
590
591  CATextLayer* layer = [CATextLayer layer];
592  [layer setForegroundColor:[UIColor whiteColor].CGColor];
593  [layer setFrame:textBounds];
594  [layer setAlignmentMode:alignment];
595  [layer setWrapped:YES];
596  [layer setFont:font];
597  [layer setFontSize:fontSize];
598  layer.contentsScale = [[UIScreen mainScreen] scale];
599  [layer setString:text];
600
601  [[self.view layer] addSublayer:layer];
602  [labelLayers addObject:layer];
603}
604
605@end
606