1 /* 2 ********************************************************************** 3 * Copyright (C) 2005-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #ifndef __CSR2022_H 9 #define __CSR2022_H 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_CONVERSION 14 15 #include "csrecog.h" 16 17 U_NAMESPACE_BEGIN 18 19 class CharsetMatch; 20 21 /** 22 * class CharsetRecog_2022 part of the ICU charset detection imlementation. 23 * This is a superclass for the individual detectors for 24 * each of the detectable members of the ISO 2022 family 25 * of encodings. 26 * 27 * The separate classes are nested within this class. 28 * 29 * @internal 30 */ 31 class CharsetRecog_2022 : public CharsetRecognizer 32 { 33 34 public: 35 virtual ~CharsetRecog_2022() = 0; 36 37 protected: 38 39 /** 40 * Matching function shared among the 2022 detectors JP, CN and KR 41 * Counts up the number of legal an unrecognized escape sequences in 42 * the sample of text, and computes a score based on the total number & 43 * the proportion that fit the encoding. 44 * 45 * 46 * @param text the byte buffer containing text to analyse 47 * @param textLen the size of the text in the byte. 48 * @param escapeSequences the byte escape sequences to test for. 49 * @return match quality, in the range of 0-100. 50 */ 51 int32_t match_2022(const uint8_t *text, 52 int32_t textLen, 53 const uint8_t escapeSequences[][5], 54 int32_t escapeSequences_length) const; 55 56 }; 57 58 class CharsetRecog_2022JP :public CharsetRecog_2022 59 { 60 public: 61 virtual ~CharsetRecog_2022JP(); 62 63 const char *getName() const; 64 65 UBool match(InputText *textIn, CharsetMatch *results) const; 66 }; 67 68 #if !UCONFIG_ONLY_HTML_CONVERSION 69 class CharsetRecog_2022KR :public CharsetRecog_2022 { 70 public: 71 virtual ~CharsetRecog_2022KR(); 72 73 const char *getName() const; 74 75 UBool match(InputText *textIn, CharsetMatch *results) const; 76 77 }; 78 79 class CharsetRecog_2022CN :public CharsetRecog_2022 80 { 81 public: 82 virtual ~CharsetRecog_2022CN(); 83 84 const char* getName() const; 85 86 UBool match(InputText *textIn, CharsetMatch *results) const; 87 }; 88 #endif 89 90 U_NAMESPACE_END 91 92 #endif 93 #endif /* __CSR2022_H */ 94