1 // Copyright 2015 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 ////////////////////////////////////////////////////////////////////////////////
16 //
17 // This file implements the image type recognition algorithm. Functions, which
18 // will check each single image type, are implemented based on the comparisons
19 // of magic numbers or signature strings. Other checks (e.g endianness, general
20 // tiff magic number "42", etc.) could also be used in some of those functions
21 // to make the type recognition more stable. Those checks are designed
22 // according to the format spcifications and our own experiments. Notice that
23 // the magic numbers and signature strings may have different binary values
24 // according to different endiannesses.
25 #include "src/image_type_recognition/image_type_recognition_lite.h"
26 
27 #include <algorithm>
28 #include <cassert>
29 #include <string>
30 #include <vector>
31 
32 #include "src/binary_parse/range_checked_byte_ptr.h"
33 
34 namespace piex {
35 namespace image_type_recognition {
36 namespace {
37 
38 using std::string;
39 using binary_parse::MemoryStatus;
40 using binary_parse::RangeCheckedBytePtr;
41 
42 // Base class for checking image type. For each image type, one should create an
43 // inherited class and do the implementation.
44 class TypeChecker {
45  public:
46   // Comparing function, whihc is used for sorting.
Compare(const TypeChecker * a,const TypeChecker * b)47   static bool Compare(const TypeChecker* a, const TypeChecker* b) {
48     assert(a);
49     assert(b);
50     return a->RequestedSize() < b->RequestedSize();
51   }
52 
~TypeChecker()53   virtual ~TypeChecker() {}
54 
55   // Returns the type of current checker.
56   virtual RawImageTypes Type() const = 0;
57 
58   // Returns the requested data size (in bytes) for current checker. The checker
59   // guarantees that it will not read more than this size.
60   virtual size_t RequestedSize() const = 0;
61 
62   // Checks if source data belongs to current checker type.
63   virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
64 
65  protected:
66   // Limits the source length to the RequestedSize(), using it guarantees that
67   // we will not read more than this size from the source.
LimitSource(const RangeCheckedBytePtr & source) const68   RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
69     return source.pointerToSubArray(0 /* pos */, RequestedSize());
70   }
71 };
72 
73 // Check if the uint16 value at (source + offset) is equal to the target value.
CheckUInt16Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned short target_value)74 bool CheckUInt16Value(const RangeCheckedBytePtr& source,
75                       const size_t source_offset, const bool use_big_endian,
76                       const unsigned short target_value) {  // NOLINT
77   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
78   const unsigned short value = binary_parse::Get16u(  // NOLINT
79       source + source_offset, use_big_endian, &status);
80   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
81     return false;
82   }
83   return (target_value == value);
84 }
85 
86 // Check if the uint32 value at (source + offset) is equal to the target value.
CheckUInt32Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned int target_value)87 bool CheckUInt32Value(const RangeCheckedBytePtr& source,
88                       const size_t source_offset, const bool use_big_endian,
89                       const unsigned int target_value) {
90   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
91   const unsigned int value =
92       binary_parse::Get32u(source + source_offset, use_big_endian, &status);
93   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
94     return false;
95   }
96   return (target_value == value);
97 }
98 
99 // Determine the endianness. The return value is NOT the endianness indicator,
100 // it's just that this function was successful.
DetermineEndianness(const RangeCheckedBytePtr & source,bool * is_big_endian)101 bool DetermineEndianness(const RangeCheckedBytePtr& source,
102                          bool* is_big_endian) {
103   if (source.remainingLength() < 2) {
104     return false;
105   }
106 
107   if (source[0] == 0x49 && source[1] == 0x49) {
108     *is_big_endian = false;
109   } else if (source[0] == 0x4D && source[1] == 0x4D) {
110     *is_big_endian = true;
111   } else {
112     return false;
113   }
114   return true;
115 }
116 
117 // Check if signature string can match to the same length string start from
118 // (source + offset). The signature string will be used as longer magic number
119 // series.
IsSignatureMatched(const RangeCheckedBytePtr & source,const size_t source_offset,const string & signature)120 bool IsSignatureMatched(const RangeCheckedBytePtr& source,
121                         const size_t source_offset, const string& signature) {
122   return source.substr(source_offset, signature.size()) == signature;
123 }
124 
125 // Check if signature is found in [source + offset, source + offset + range].
IsSignatureFound(const RangeCheckedBytePtr & source,const size_t search_offset,const size_t search_range,const string & signature,size_t * first_matched)126 bool IsSignatureFound(const RangeCheckedBytePtr& source,
127                       const size_t search_offset, const size_t search_range,
128                       const string& signature, size_t* first_matched) {
129   if (source.remainingLength() < search_offset + search_range) {
130     return false;
131   }
132 
133   // The index must be in range [offset, offset + range - sizeof(signature)], so
134   // that it can guarantee that it will not read outside of range.
135   for (size_t i = search_offset;
136        i < search_offset + search_range - signature.size(); ++i) {
137     if (IsSignatureMatched(source, i, signature)) {
138       if (first_matched) {
139         *first_matched = i;
140       }
141       return true;
142     }
143   }
144   return false;
145 }
146 
147 // Sony RAW format.
148 class ArwTypeChecker : public TypeChecker {
149  public:
Type() const150   virtual RawImageTypes Type() const { return kArwImage; }
151 
RequestedSize() const152   virtual size_t RequestedSize() const { return 5000; }
153 
154   // Check multiple points:
155   // 1. valid endianness at the beginning of the file;
156   // 2. correct tiff magic number at the (offset == 8) position of the file;
157   // 3. signature "SONY" in first requested bytes;
158   // 4. correct signature for (section + version) in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const159   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
160     RangeCheckedBytePtr limited_source = LimitSource(source);
161 
162     bool use_big_endian;
163     if (!DetermineEndianness(limited_source, &use_big_endian)) {
164       return false;
165     }
166 
167     const unsigned short kTiffMagic = 0x2A;  // NOLINT
168     const unsigned int kTiffOffset = 8;
169     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
170                           kTiffMagic) ||
171         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
172                           kTiffOffset)) {
173       return false;
174     }
175 
176     // Search for kSignatureSony in first requested bytes
177     const string kSignatureSony("SONY");
178     if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
179                           kSignatureSony, NULL)) {
180       return false;
181     }
182 
183     // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
184     // requested bytes
185     const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
186     const int kSignatureVersionsSize = 5;
187     const string kSignatureVersions[kSignatureVersionsSize] = {
188         string("\x02\x00", 2),  // ARW 1.0
189         string("\x03\x00", 2),  // ARW 2.0
190         string("\x03\x01", 2),  // ARW 2.1
191         string("\x03\x02", 2),  // ARW 2.2
192         string("\x03\x03", 2),  // ARW 2.3
193     };
194     bool matched = false;
195     for (int i = 0; i < kSignatureVersionsSize; ++i) {
196       matched = matched || IsSignatureFound(
197                                limited_source, 0 /* offset */, RequestedSize(),
198                                kSignatureSection + kSignatureVersions[i], NULL);
199     }
200     return matched;
201   }
202 };
203 
204 // Canon RAW (CR2 extension).
205 class Cr2TypeChecker : public TypeChecker {
206  public:
Type() const207   virtual RawImageTypes Type() const { return kCr2Image; }
208 
RequestedSize() const209   virtual size_t RequestedSize() const { return 16; }
210 
211   // Check multiple points:
212   // 1. valid endianness at the beginning of the file;
213   // 2. magic number "42" at the (offset == 2) position of the file;
214   // 3. signature "CR2" at the (offset == 8) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const215   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
216     RangeCheckedBytePtr limited_source = LimitSource(source);
217 
218     bool use_big_endian;
219     if (!DetermineEndianness(limited_source, &use_big_endian)) {
220       return false;
221     }
222 
223     const unsigned short kTag = 42;  // NOLINT
224     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
225                           kTag)) {
226       return false;
227     }
228 
229     const string kSignature("CR\2\0", 4);
230     return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
231   }
232 };
233 
234 // Canon RAW (CRW extension).
235 class CrwTypeChecker : public TypeChecker {
236  public:
Type() const237   virtual RawImageTypes Type() const { return kCrwImage; }
238 
RequestedSize() const239   virtual size_t RequestedSize() const { return 14; }
240 
241   // Check only the signature at the (offset == 6) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const242   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
243     RangeCheckedBytePtr limited_source = LimitSource(source);
244 
245     bool use_big_endian;
246     if (!DetermineEndianness(limited_source, &use_big_endian)) {
247       return false;
248     }
249 
250     string signature;
251     if (use_big_endian) {
252       signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
253     } else {
254       signature = string("HEAPCCDR");
255     }
256     return IsSignatureMatched(limited_source, 6 /* offset */, signature);
257   }
258 };
259 
260 // Kodak RAW.
261 class DcrTypeChecker : public TypeChecker {
262  public:
Type() const263   virtual RawImageTypes Type() const { return kDcrImage; }
264 
RequestedSize() const265   virtual size_t RequestedSize() const { return 5000; }
266 
267   // Check two different cases, only need to fulfill one of the two:
268   // 1. signature at the (offset == 16) position of the file;
269   // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
270   // first requested bytes of the file.
IsMyType(const RangeCheckedBytePtr & source) const271   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
272     RangeCheckedBytePtr limited_source = LimitSource(source);
273 
274     bool use_big_endian;
275     if (!DetermineEndianness(limited_source, &use_big_endian)) {
276       return false;
277     }
278 
279     // Case 1: has signature
280     const string kSignature(
281         "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
282     if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
283       return true;
284     }
285 
286     // Case 2: search for tags in first requested bytes
287     string kIfdTags[2];
288     if (use_big_endian) {
289       kIfdTags[0] = string("\x03\xe9\x00\x02", 4);  // OriginalFileName
290       kIfdTags[1] = string("\x0c\xe5\x00\x02", 4);  // FirmwareVersion
291     } else {
292       kIfdTags[0] = string("\xe9\x03\x02\x00", 4);  // OriginalFileName
293       kIfdTags[1] = string("\xe5\x0c\x02\x00", 4);  // FirmwareVersion
294     }
295     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
296                             kIfdTags[0], NULL) &&
297            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
298                             kIfdTags[1], NULL);
299   }
300 };
301 
302 // Digital Negative RAW.
303 class DngTypeChecker : public TypeChecker {
304  public:
Type() const305   virtual RawImageTypes Type() const { return kDngImage; }
306 
RequestedSize() const307   virtual size_t RequestedSize() const { return 1024; }
308 
309   // Check multiple points:
310   // 1. valid endianness at the beginning of the file;
311   // 2. at least two dng specific tags in the first requested bytes of the
312   // file
IsMyType(const RangeCheckedBytePtr & source) const313   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
314     RangeCheckedBytePtr limited_source = LimitSource(source);
315 
316     bool use_big_endian;
317     if (!DetermineEndianness(limited_source, &use_big_endian)) {
318       return false;
319     }
320 
321     // Search tags in first requested bytes and verify the order of them.
322     const int kTagsCount = 5;
323     string dng_tags[kTagsCount];
324     if (use_big_endian) {
325       dng_tags[0] =
326           string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8);  // tag: 50706
327       dng_tags[1] =
328           string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8);  // tag: 50707
329       dng_tags[2] = string("\xc6\x14\x00\x02", 4);        // tag: 50708
330       dng_tags[3] = string("\xc6\x20", 2);                // tag: 50720
331       dng_tags[4] =
332           string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8);  // tag: 50733
333     } else {
334       dng_tags[0] =
335           string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50706
336       dng_tags[1] =
337           string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50707
338       dng_tags[2] = string("\x14\xc6\x02\x00", 4);        // tag: 50708
339       dng_tags[3] = string("\x20\xc6", 2);                // tag: 50720
340       dng_tags[4] =
341           string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8);  // tag: 50733
342     }
343     int tags_found = 0;
344     for (int i = 0; i < kTagsCount; ++i) {
345       if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
346                            dng_tags[i], NULL)) {
347         tags_found++;
348       }
349     }
350     return tags_found >= 2;
351   }
352 };
353 
354 // Kodak RAW.
355 class KdcTypeChecker : public TypeChecker {
356  public:
Type() const357   virtual RawImageTypes Type() const { return kKdcImage; }
358 
RequestedSize() const359   virtual size_t RequestedSize() const { return 5000; }
360 
361   // Check two points:
362   // 1. valid endianness at the beginning of the file;
363   // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const364   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
365     RangeCheckedBytePtr limited_source = LimitSource(source);
366 
367     bool use_big_endian;
368     if (!DetermineEndianness(limited_source, &use_big_endian)) {
369       return false;
370     }
371 
372     // Search in first requested bytes
373     const size_t kIfdTagsSize = 2;
374     string kIfdTags[kIfdTagsSize];
375     if (use_big_endian) {
376       kIfdTags[0] = string("\xfa\x0d\x00\x01", 4);  // WhiteBalance
377       kIfdTags[1] = string("\xfa\x00\x00\x02", 4);  // SerialNumber
378     } else {
379       kIfdTags[0] = string("\x0d\xfa\x01\x00", 4);  // WhiteBalance
380       kIfdTags[1] = string("\x00\xfa\x02\x00", 4);  // SerialNumber
381     }
382 
383     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
384                             kIfdTags[0], NULL) &&
385            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
386                             kIfdTags[1], NULL);
387   }
388 };
389 
390 // Leaf RAW.
391 class MosTypeChecker : public TypeChecker {
392  public:
Type() const393   virtual RawImageTypes Type() const { return kMosImage; }
394 
RequestedSize() const395   virtual size_t RequestedSize() const { return 5000; }
396 
397   // Check two points:
398   // 1. valid endianness at the beginning of the file;
399   // 2. signature "PKTS    " in the first requested bytes. Note the
400   // "whitespace". It's important as they are special binary values.
IsMyType(const RangeCheckedBytePtr & source) const401   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
402     RangeCheckedBytePtr limited_source = LimitSource(source);
403 
404     bool use_big_endian;
405     if (!DetermineEndianness(source, &use_big_endian)) {
406       return false;
407     }
408 
409     // Search kSignaturePKTS in first requested bytes
410     const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
411     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
412                             kSignaturePKTS, NULL);
413   }
414 };
415 
416 // Minolta RAW.
417 class MrwTypeChecker : public TypeChecker {
418  public:
Type() const419   virtual RawImageTypes Type() const { return kMrwImage; }
420 
RequestedSize() const421   virtual size_t RequestedSize() const { return 4; }
422 
423   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const424   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
425     // Limits the source length to the RequestedSize(), using it guarantees that
426     // we will not read more than this size from the source.
427     RangeCheckedBytePtr limited_source =
428         source.pointerToSubArray(0 /* pos */, RequestedSize());
429 
430     const string kSignature("\0MRM", 4);
431     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
432   }
433 };
434 
435 // Check if the file contains a NRW signature "NRW   " in the first requested
436 // bytes. Note the "whitespace". It's important as they are special binary
437 // values.
438 const size_t kRequestedSizeForNrwSignature = 4000;
ContainsNrwSignature(const RangeCheckedBytePtr & source)439 bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
440   // Search for kSignatureNrw.
441   const string kSignatureNrw("NRW\x20\x20\x20", 6);
442   return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
443                           kSignatureNrw, NULL);
444 }
445 
446 // Checks if the file contains the signatures for Nikon formats:
447 // * the general Nikon singature "NIKON" string.
448 // * the ReferenceBlackWhite tag.
449 const size_t kRequestedSizeForNikonSignatures = 4000;
ContainsNikonSignatures(const RangeCheckedBytePtr & source,const bool use_big_endian)450 bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
451                              const bool use_big_endian) {
452   const string kSignatureNikon("NIKON");
453   const string kReferenceBlackWhiteTag = use_big_endian
454                                              ? string("\x02\x14\x00\x05", 4)
455                                              : string("\x14\x02\x05\x00", 4);
456   const std::vector<string> kSignatures = {kSignatureNikon,
457                                            kReferenceBlackWhiteTag};
458   for (auto const& signature : kSignatures) {
459     if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
460                           signature, NULL)) {
461       return false;
462     }
463   }
464   return true;
465 }
466 
467 // Nikon RAW (NEF extension).
468 class NefTypeChecker : public TypeChecker {
469  public:
Type() const470   virtual RawImageTypes Type() const { return kNefImage; }
471 
RequestedSize() const472   virtual size_t RequestedSize() const {
473     return std::max(kRequestedSizeForNikonSignatures,
474                     kRequestedSizeForNrwSignature);
475   }
476 
477   // Check multiple points:
478   // 1. valid endianness at the beginning of the file;
479   // 2. magic number at the (offset == 2) position of the file;
480   // 3. the signature "NIKON" in the requested bytes of the file;
481   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
482   // 5. does not contain the NRW signature. We may also check a special
483   // signature "RAW   " similar to the NRW case, but we got issues in some
484   // special images that the signature locates in the middle of the file, and it
485   // costs too  long time to check;
IsMyType(const RangeCheckedBytePtr & source) const486   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
487     RangeCheckedBytePtr limited_source = LimitSource(source);
488 
489     bool use_big_endian;
490     if (!DetermineEndianness(limited_source, &use_big_endian)) {
491       return false;
492     }
493 
494     const unsigned short kTiffMagic = 0x2A;  // NOLINT
495     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
496                           kTiffMagic)) {
497       return false;
498     }
499 
500     return ContainsNikonSignatures(limited_source, use_big_endian) &&
501            !ContainsNrwSignature(limited_source);  // not NRW
502   }
503 };
504 
505 // Nikon RAW (NRW extension).
506 class NrwTypeChecker : public TypeChecker {
507  public:
Type() const508   virtual RawImageTypes Type() const { return kNrwImage; }
509 
RequestedSize() const510   virtual size_t RequestedSize() const {
511     return std::max(kRequestedSizeForNikonSignatures,
512                     kRequestedSizeForNrwSignature);
513   }
514 
515   // Check multiple points:
516   // 1. valid endianness at the beginning of the file;
517   // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
518   // file;
519   // 3. the signature "NIKON" in the first requested bytes of the file;
520   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
521   // 5. contains the NRW signature;
IsMyType(const RangeCheckedBytePtr & source) const522   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
523     RangeCheckedBytePtr limited_source = LimitSource(source);
524 
525     bool use_big_endian;
526     if (!DetermineEndianness(limited_source, &use_big_endian)) {
527       return false;
528     }
529 
530     const unsigned short kTiffMagic = 0x2A;  // NOLINT
531     const unsigned int kTiffOffset = 8;
532     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
533                           kTiffMagic) ||
534         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
535                           kTiffOffset)) {
536       return false;
537     }
538 
539     return ContainsNikonSignatures(limited_source, use_big_endian) &&
540            ContainsNrwSignature(limited_source);
541   }
542 };
543 
544 // Olympus RAW.
545 class OrfTypeChecker : public TypeChecker {
546  public:
Type() const547   virtual RawImageTypes Type() const { return kOrfImage; }
548 
RequestedSize() const549   virtual size_t RequestedSize() const { return 3000; }
550 
551   // Check multiple points:
552   // 1. valid endianness at the beginning of the file;
553   // 2. tag at the (offset == 2) position of the file;
554   // 3. signature "OLYMP" in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const555   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
556     RangeCheckedBytePtr limited_source = LimitSource(source);
557 
558     bool use_big_endian;
559     if (!DetermineEndianness(limited_source, &use_big_endian)) {
560       return false;
561     }
562 
563     const size_t kTagSize = 2;
564     const unsigned short kTag[kTagSize] = {0x4F52, 0x5352};  // NOLINT
565     if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
566                            kTag[0]) ||
567           CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
568                            kTag[1]))) {
569       return false;
570     }
571 
572     // Search for kSignatureOlymp in first requested bytes
573     const string kSignatureOlymp("OLYMP");
574     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
575                             kSignatureOlymp, NULL);
576   }
577 };
578 
579 // Pentax RAW.
580 class PefTypeChecker : public TypeChecker {
581  public:
Type() const582   virtual RawImageTypes Type() const { return kPefImage; }
583 
RequestedSize() const584   virtual size_t RequestedSize() const { return 1280; }
585 
586   // Check multiple points:
587   // 1. valid big endianness at the beginning of the file;
588   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
589   // 3. signature "AOC   " or "PENTAX  " in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const590   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
591     RangeCheckedBytePtr limited_source = LimitSource(source);
592 
593     bool use_big_endian;
594     if (!DetermineEndianness(limited_source, &use_big_endian)) {
595       return false;
596     }
597 
598     const unsigned short kTiffMagic = 0x2A;  // NOLINT
599     const unsigned int kTiffOffset = 8;
600     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
601                           kTiffMagic) ||
602         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
603                           kTiffOffset)) {
604       return false;
605     }
606 
607     // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
608     const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
609     const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
610     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
611                             kSignatureAOC, NULL) ||
612            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
613                             kSignaturePENTAX, NULL);
614   }
615 };
616 
617 // Apple format.
618 class QtkTypeChecker : public TypeChecker {
619  public:
Type() const620   virtual RawImageTypes Type() const { return kQtkImage; }
621 
RequestedSize() const622   virtual size_t RequestedSize() const { return 8; }
623 
624   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const625   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
626     RangeCheckedBytePtr limited_source = LimitSource(source);
627 
628     const size_t kSignatureSize = 2;
629     const string kSignature[kSignatureSize] = {
630         string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
631     };
632     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
633            IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
634   }
635 };
636 
637 // Fuji RAW.
638 class RafTypeChecker : public TypeChecker {
639  public:
Type() const640   virtual RawImageTypes Type() const { return kRafImage; }
641 
RequestedSize() const642   virtual size_t RequestedSize() const { return 8; }
643 
644   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const645   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
646     RangeCheckedBytePtr limited_source = LimitSource(source);
647 
648     const string kSignature("FUJIFILM");
649     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
650   }
651 };
652 
653 // Contax N RAW.
654 class RawContaxNTypeChecker : public TypeChecker {
655  public:
Type() const656   virtual RawImageTypes Type() const { return kRawContaxNImage; }
657 
RequestedSize() const658   virtual size_t RequestedSize() const { return 36; }
659 
660   // Check only the signature at the (offset == 25) position of the
661   // file.
IsMyType(const RangeCheckedBytePtr & source) const662   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
663     RangeCheckedBytePtr limited_source = LimitSource(source);
664 
665     const string kSignature("ARECOYK");
666     return IsSignatureMatched(limited_source, 25, kSignature);
667   }
668 };
669 
670 // Panasonic RAW.
671 class Rw2TypeChecker : public TypeChecker {
672  public:
Type() const673   virtual RawImageTypes Type() const { return kRw2Image; }
674 
RequestedSize() const675   virtual size_t RequestedSize() const { return 4; }
676 
677   // Check two points: 1. valid endianness at the beginning of the
678   // file; 2. tag at the (offset == 2) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const679   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
680     RangeCheckedBytePtr limited_source = LimitSource(source);
681 
682     bool use_big_endian;
683     if (!DetermineEndianness(source, &use_big_endian)) {
684       return false;
685     }
686 
687     const unsigned short kTag = 0x55;  // NOLINT
688     return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
689                             kTag);
690   }
691 };
692 
693 // Samsung RAW.
694 class SrwTypeChecker : public TypeChecker {
695  public:
Type() const696   virtual RawImageTypes Type() const { return kSrwImage; }
697 
RequestedSize() const698   virtual size_t RequestedSize() const { return 256; }
699 
700   // Check multiple points:
701   // 1. valid big endianness at the beginning of the file;
702   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
703   // 3. the signature "SAMSUNG" in the requested bytes of the file;
IsMyType(const RangeCheckedBytePtr & source) const704   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
705     RangeCheckedBytePtr limited_source = LimitSource(source);
706 
707     bool use_big_endian;
708     if (!DetermineEndianness(source, &use_big_endian)) {
709       return false;
710     }
711 
712     const unsigned short kTiffMagic = 0x2A;  // NOLINT
713     const unsigned int kTiffOffset = 8;
714     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
715                           kTiffMagic) ||
716         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
717                           kTiffOffset)) {
718       return false;
719     }
720 
721     const string kSignature("SAMSUNG");
722     if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
723       return false;
724     }
725     return true;
726   }
727 };
728 
729 // Sigma / Polaroid RAW.
730 class X3fTypeChecker : public TypeChecker {
731  public:
Type() const732   virtual RawImageTypes Type() const { return kX3fImage; }
733 
RequestedSize() const734   virtual size_t RequestedSize() const { return 4; }
735 
736   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const737   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
738     RangeCheckedBytePtr limited_source = LimitSource(source);
739 
740     const string kSignature("FOVb", 4);
741     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
742   }
743 };
744 
745 // This class contains the list of all type checkers. One should used this list
746 // as a whole to execute the image type recognition.
747 class TypeCheckerList {
748  public:
TypeCheckerList()749   TypeCheckerList() {
750     // Add all supported RAW type checkers here.
751     checkers_.push_back(new ArwTypeChecker());
752     checkers_.push_back(new Cr2TypeChecker());
753     checkers_.push_back(new CrwTypeChecker());
754     checkers_.push_back(new DcrTypeChecker());
755     checkers_.push_back(new DngTypeChecker());
756     checkers_.push_back(new KdcTypeChecker());
757     checkers_.push_back(new MosTypeChecker());
758     checkers_.push_back(new MrwTypeChecker());
759     checkers_.push_back(new NefTypeChecker());
760     checkers_.push_back(new NrwTypeChecker());
761     checkers_.push_back(new OrfTypeChecker());
762     checkers_.push_back(new PefTypeChecker());
763     checkers_.push_back(new QtkTypeChecker());
764     checkers_.push_back(new RafTypeChecker());
765     checkers_.push_back(new RawContaxNTypeChecker());
766     checkers_.push_back(new Rw2TypeChecker());
767     checkers_.push_back(new SrwTypeChecker());
768     checkers_.push_back(new X3fTypeChecker());
769 
770     // Sort the checkers by the ascending RequestedSize() to get better
771     // performance when checking type.
772     std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
773   }
774 
~TypeCheckerList()775   ~TypeCheckerList() {
776     for (size_t i = 0; i < checkers_.size(); ++i) {
777       delete checkers_[i];
778       checkers_[i] = NULL;
779     }
780   }
781 
782   // Returns the type of source data. If it can not be identified, returns
783   // kNonRawImage.
GetType(const RangeCheckedBytePtr & source) const784   RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
785     for (size_t i = 0; i < checkers_.size(); ++i) {
786       if (checkers_[i]->IsMyType(source)) {
787         return checkers_[i]->Type();
788       }
789     }
790     return kNonRawImage;
791   }
792 
793   // Returns the maximum size of requested size of data for identifying image
794   // type using this class. The class guarantees that it will not read more than
795   // this size.
RequestedSize() const796   size_t RequestedSize() const {
797     assert(!checkers_.empty());
798     // The checkers_ is ascending sorted. The last element is the maximum.
799     return checkers_.back()->RequestedSize();
800   }
801 
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)802   bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
803     const TypeChecker* type_checker = GetTypeCheckerForType(type);
804     if (type_checker) {
805       return type_checker->IsMyType(source);
806     } else {
807       return false;
808     }
809   }
810 
RequestedSizeForType(const RawImageTypes type)811   size_t RequestedSizeForType(const RawImageTypes type) {
812     const TypeChecker* type_checker = GetTypeCheckerForType(type);
813     if (type_checker) {
814       return type_checker->RequestedSize();
815     } else {
816       return 0;
817     }
818   }
819 
820  private:
GetTypeCheckerForType(const RawImageTypes type)821   const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
822     for (const auto* type_checker : checkers_) {
823       if (type_checker->Type() == type) {
824         return type_checker;
825       }
826     }
827     return nullptr;
828   }
829 
830   std::vector<TypeChecker*> checkers_;
831 };
832 
833 }  // namespace
834 
IsRaw(const RawImageTypes type)835 bool IsRaw(const RawImageTypes type) {
836   switch (type) {
837     // Non-RAW-image type
838     case kNonRawImage: {
839       return false;
840     }
841 
842     // Raw image types
843     case kArwImage:
844     case kCr2Image:
845     case kCrwImage:
846     case kDcrImage:
847     case kDngImage:
848     case kKdcImage:
849     case kMosImage:
850     case kMrwImage:
851     case kNefImage:
852     case kNrwImage:
853     case kOrfImage:
854     case kPefImage:
855     case kQtkImage:
856     case kRafImage:
857     case kRawContaxNImage:
858     case kRw2Image:
859     case kSrwImage:
860     case kX3fImage: {
861       return true;
862     }
863 
864     default: {
865       // Unsupported type!
866       assert(false);
867     }
868   }
869   return false;
870 }
871 
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)872 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
873   return TypeCheckerList().IsOfType(source, type);
874 }
875 
RecognizeRawImageTypeLite(const RangeCheckedBytePtr & source)876 RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
877   return TypeCheckerList().GetType(source);
878 }
879 
GetNumberOfBytesForIsRawLite()880 size_t GetNumberOfBytesForIsRawLite() {
881   return TypeCheckerList().RequestedSize();
882 }
883 
GetNumberOfBytesForIsOfType(const RawImageTypes type)884 size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
885   return TypeCheckerList().RequestedSizeForType(type);
886 }
887 
IsRawLite(const RangeCheckedBytePtr & source)888 bool IsRawLite(const RangeCheckedBytePtr& source) {
889   return IsRaw(RecognizeRawImageTypeLite(source));
890 }
891 
892 }  // namespace image_type_recognition
893 }  // namespace piex
894