1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: bytestriebuilder.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010sep25 12 * created by: Markus W. Scherer 13 */ 14 15 /** 16 * \file 17 * \brief C++ API: Builder for icu::BytesTrie 18 */ 19 20 #ifndef __BYTESTRIEBUILDER_H__ 21 #define __BYTESTRIEBUILDER_H__ 22 23 #include "unicode/utypes.h" 24 #include "unicode/bytestrie.h" 25 #include "unicode/stringpiece.h" 26 #include "unicode/stringtriebuilder.h" 27 28 U_NAMESPACE_BEGIN 29 30 class BytesTrieElement; 31 class CharString; 32 33 /** 34 * Builder class for BytesTrie. 35 * 36 * This class is not intended for public subclassing. 37 * @stable ICU 4.8 38 */ 39 class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 40 public: 41 /** 42 * Constructs an empty builder. 43 * @param errorCode Standard ICU error code. 44 * @stable ICU 4.8 45 */ 46 BytesTrieBuilder(UErrorCode &errorCode); 47 48 /** 49 * Destructor. 50 * @stable ICU 4.8 51 */ 52 virtual ~BytesTrieBuilder(); 53 54 /** 55 * Adds a (byte sequence, value) pair. 56 * The byte sequence must be unique. 57 * The bytes will be copied; the builder does not keep 58 * a reference to the input StringPiece or its data(). 59 * @param s The input byte sequence. 60 * @param value The value associated with this byte sequence. 61 * @param errorCode Standard ICU error code. Its input value must 62 * pass the U_SUCCESS() test, or else the function returns 63 * immediately. Check for U_FAILURE() on output or use with 64 * function chaining. (See User Guide for details.) 65 * @return *this 66 * @stable ICU 4.8 67 */ 68 BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); 69 70 /** 71 * Builds a BytesTrie for the add()ed data. 72 * Once built, no further data can be add()ed until clear() is called. 73 * 74 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 75 * must have been add()ed. 76 * 77 * This method passes ownership of the builder's internal result array to the new trie object. 78 * Another call to any build() variant will re-serialize the trie. 79 * After clear() has been called, a new array will be used as well. 80 * @param buildOption Build option, see UStringTrieBuildOption. 81 * @param errorCode Standard ICU error code. Its input value must 82 * pass the U_SUCCESS() test, or else the function returns 83 * immediately. Check for U_FAILURE() on output or use with 84 * function chaining. (See User Guide for details.) 85 * @return A new BytesTrie for the add()ed data. 86 * @stable ICU 4.8 87 */ 88 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 89 90 /** 91 * Builds a BytesTrie for the add()ed data and byte-serializes it. 92 * Once built, no further data can be add()ed until clear() is called. 93 * 94 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 95 * must have been add()ed. 96 * 97 * Multiple calls to buildStringPiece() return StringPieces referring to the 98 * builder's same byte array, without rebuilding. 99 * If buildStringPiece() is called after build(), the trie will be 100 * re-serialized into a new array. 101 * If build() is called after buildStringPiece(), the trie object will become 102 * the owner of the previously returned array. 103 * After clear() has been called, a new array will be used as well. 104 * @param buildOption Build option, see UStringTrieBuildOption. 105 * @param errorCode Standard ICU error code. Its input value must 106 * pass the U_SUCCESS() test, or else the function returns 107 * immediately. Check for U_FAILURE() on output or use with 108 * function chaining. (See User Guide for details.) 109 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 110 * @stable ICU 4.8 111 */ 112 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 113 114 /** 115 * Removes all (byte sequence, value) pairs. 116 * New data can then be add()ed and a new trie can be built. 117 * @return *this 118 * @stable ICU 4.8 119 */ 120 BytesTrieBuilder &clear(); 121 122 private: 123 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 124 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 125 126 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 127 128 virtual int32_t getElementStringLength(int32_t i) const; 129 virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; 130 virtual int32_t getElementValue(int32_t i) const; 131 132 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 133 134 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 135 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 136 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; 137 matchNodesCanHaveValues()138 virtual UBool matchNodesCanHaveValues() const { return FALSE; } 139 getMaxBranchLinearSubNodeLength()140 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } getMinLinearMatch()141 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } getMaxLinearMatchLength()142 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 143 144 /** 145 * @internal 146 */ 147 class BTLinearMatchNode : public LinearMatchNode { 148 public: 149 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 150 virtual UBool operator==(const Node &other) const; 151 virtual void write(StringTrieBuilder &builder); 152 private: 153 const char *s; 154 }; 155 156 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 157 Node *nextNode) const; 158 159 UBool ensureCapacity(int32_t length); 160 virtual int32_t write(int32_t byte); 161 int32_t write(const char *b, int32_t length); 162 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 163 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 164 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 165 virtual int32_t writeDeltaTo(int32_t jumpTarget); 166 167 CharString *strings; // Pointer not object so we need not #include internal charstr.h. 168 BytesTrieElement *elements; 169 int32_t elementsCapacity; 170 int32_t elementsLength; 171 172 // Byte serialization of the trie. 173 // Grows from the back: bytesLength measures from the end of the buffer! 174 char *bytes; 175 int32_t bytesCapacity; 176 int32_t bytesLength; 177 }; 178 179 U_NAMESPACE_END 180 181 #endif // __BYTESTRIEBUILDER_H__ 182