1 /*
2  **********************************************************************
3  *   Copyright (C) 2005-2008, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  */
7 
8 #ifndef __INPUTEXT_H
9 #define __INPUTEXT_H
10 
11 /**
12  * \file
13  * \internal
14  *
15  * This is an internal header for the Character Set Detection code. The
16  * name is probably too generic...
17  */
18 
19 
20 #include "unicode/uobject.h"
21 
22 #if !UCONFIG_NO_CONVERSION
23 
24 U_NAMESPACE_BEGIN
25 
26 class InputText : public UMemory
27 {
28     // Prevent copying
29     InputText(const InputText &);
30 public:
31     InputText(UErrorCode &status);
32     ~InputText();
33 
34     void setText(const char *in, int32_t len);
35     void setDeclaredEncoding(const char *encoding, int32_t len);
36     UBool isSet() const;
37     void MungeInput(UBool fStripTags);
38 
39     // The text to be checked.  Markup will have been
40     //   removed if appropriate.
41     uint8_t    *fInputBytes;
42     int32_t     fInputLen;          // Length of the byte data in fInputBytes.
43     // byte frequency statistics for the input text.
44     //   Value is percent, not absolute.
45     //   Value is rounded up, so zero really means zero occurences.
46     int16_t  *fByteStats;
47     UBool     fC1Bytes;          // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
48     char     *fDeclaredEncoding;
49 
50     const uint8_t           *fRawInput;     // Original, untouched input bytes.
51     //  If user gave us a byte array, this is it.
52     //  If user gave us a stream, it's read to a
53     //   buffer here.
54     int32_t                  fRawLength;    // Length of data in fRawInput array.
55 
56 };
57 
58 U_NAMESPACE_END
59 
60 #endif
61 #endif /* __INPUTEXT_H */
62